publish master branch snapshot, revision 49482ae3bea0cbaa07474f86f36db11943142687

author Alexey Suhov <alexey.suhov@intel.com>

Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)

committer Alexey Suhov <alexey.suhov@intel.com>

Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)
author Alexey Suhov <alexey.suhov@intel.com>
Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)
committer Alexey Suhov <alexey.suhov@intel.com>
Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)
diff --git a/.clang-format b/.clang-format

deleted file mode 100644 (file)

index b78e96d..0000000
--- a/.clang-format
+++ /dev/null
@@ -1,19 +0,0 @@
-BasedOnStyle: Google
-IndentWidth: 4
-UseTab: Never
----
-Language: Cpp
-Standard: Cpp11
-
-AccessModifierOffset: -4
-AllowAllArgumentsOnNextLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortLambdasOnASingleLine: Empty
-AlwaysBreakBeforeMultilineStrings: false
-ColumnLimit: 120
-DerivePointerAlignment: false
-FixNamespaceComments: true
-IndentCaseLabels: false
-SpaceBeforeCpp11BracedList: true
-SpaceBeforeCtorInitializerColon: false
----
diff --git a/.coveragerc b/.coveragerc

deleted file mode 100644 (file)

index ce0be58..0000000
--- a/.coveragerc
+++ /dev/null
@@ -1,39 +0,0 @@
-# .coveragerc to control coverage.py
-[run]
-branch = True
-
-source =
-    mo/
-    mo.py
-
-omit =
-    # omit anything in a .local directory anywhere
-    */.local/*
-    # omit everything in /usr
-    /usr/*
-    # omit tests
-    */test_*.py
-    # init scripts
-    */__init__.py
-
-[report]
-# Regexes for lines to exclude from consideration
-exclude_lines =
-    # Have to re-enable the standard pragma
-    pragma: no cover
-
-    # Don't complain about missing debug-only code:
-    def __repr__
-
-    # Don't complain if tests don't hit defensive assertion code:
-    raise AssertionError
-    raise NotImplementedError
-
-    # Don't complain if non-runnable code isn't run:
-    if 0:
-    if __name__ == .__main__.:
-
-ignore_errors = True
-
-[html]
-directory = htmlcov
-\ No newline at end of file
diff --git a/.gitignore b/.gitignore

index 547ae62..867e60e 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@ _*
  # but ensure we don't skip __init__.py
  !__init__.py
  # developer tools
-.idea
+*.idea
  .vscode
  cmake-build-debug
  cmake-build-release
@@ -18,7 +18,41 @@ build/
  doc/
  docs/build_documentation/work_dir/
  inference-engine/plugins/
+inference-engine/temp
+inference-engine/report
  .repo/
  docs/template_plugin/html/
  CMakeLists.txt.user
  docs/IE_PLUGIN_DG/html/
+
+*.project
+*.cproject
+*.pydevproject
+*.settings
+*/gen/
+__pycache__
+*.swp
+/config.xml
+
+# Python-specific
+*.env3
+*.pyc
+
+# Tests-specific
+*.coverage
+*htmlcov
+*pylint_report.txt
+*pylint_report_comments.txt
+
+# Artifacts
+/model-optimizer/*.bin
+/model-optimizer/*.xml
+/model-optimizer/*.json
+/model-optimizer/*.so
+/model-optimizer/*.txt
+/model-optimizer/*.pb
+/model-optimizer/*.pbtxt
+/model-optimizer/!CMakeLists.txt
+/model-optimizer/*.mapping
+/model-optimizer/*.dat
+/model-optimizer/*.svg
+\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 08cb5ab..a90c010 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,6 @@ else()
      cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
  endif()
  
-
  project(OpenVINO)
  
  set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
@@ -26,7 +25,7 @@ include(CTest)
  include(features)
  
  # include developer package
-include(developer_package NO_POLICY_SCOPE)
+include(developer_package)
  
  # These options are shared with 3rdparty plugins
  # by means of developer package
@@ -37,7 +36,7 @@ include(dependencies)
  message (STATUS "PROJECT ............................... " ${PROJECT_NAME})
  message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR})
  message (STATUS "OpenVINO_MAIN_SOURCE_DIR .............. " ${OpenVINO_MAIN_SOURCE_DIR})
-message (STATUS "IE_MAIN_SOURCE_DIR .............. " ${IE_MAIN_SOURCE_DIR})
+message (STATUS "IE_MAIN_SOURCE_DIR .................... " ${IE_MAIN_SOURCE_DIR})
  message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
  message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
  message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
@@ -76,7 +75,7 @@ function(build_ngraph)
  
      if (NOT ANDROID)
          ngraph_set(NGRAPH_UNIT_TEST_ENABLE TRUE)
-        ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE TRUE)
+        ngraph_set(NGRAPH_IE_ENABLE TRUE)
          ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
      else()
          ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
@@ -85,7 +84,7 @@ function(build_ngraph)
          ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
      endif()
  
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
          ie_add_compiler_flags(-Wno-error=uninitialized -Wno-error=literal-conversion)
      elseif(UNIX)
          ie_add_compiler_flags(-Wno-error=maybe-uninitialized -Wno-error=return-type -fPIC)
diff --git a/cmake/check_features.cmake b/cmake/check_features.cmake

index eef993f..319bc88 100644 (file)
--- a/cmake/check_features.cmake
+++ b/cmake/check_features.cmake
@@ -20,14 +20,14 @@ if (NOT ENABLE_MKL_DNN)
  endif()
  
  if(ENABLE_AVX512F)
-    if ((CMAKE_CXX_COMPILER_ID MATCHES MSVC) AND (MSVC_VERSION VERSION_LESS 1920))
+    if ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_LESS 1920))
          # 1920 version of MSVC 2019. In MSVC 2017 AVX512F not work
          set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
      endif()
-    if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
+    if (CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
          set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
      endif()
-    if ((CMAKE_CXX_COMPILER_ID STREQUAL GNU) AND (NOT (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9)))
+    if ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND (NOT (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9)))
          set(ENABLE_AVX512F OFF CACHE BOOL "" FORCE)
      endif()
  endif()
diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake

index c7a43b4..58f80ad 100644 (file)
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@@ -143,7 +143,10 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "")
      set(CMAKE_BUILD_TYPE "Release")
  endif()
  
-set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
+# allow to override default OUTPUT_ROOT root
+if(NOT DEFINED OUTPUT_ROOT)
+    set(OUTPUT_ROOT ${OpenVINO_MAIN_SOURCE_DIR})
+endif()
  
  # Enable postfixes for Debug/Release builds
  set(IE_DEBUG_POSTFIX_WIN "d")
@@ -206,8 +209,10 @@ endif()
  # Use solution folders
  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
  
+set(CMAKE_POLICY_DEFAULT_CMP0054 NEW)
+
  include(sdl)
-include(os_flags NO_POLICY_SCOPE)
+include(os_flags)
  include(sanitizer)
  
  function(set_ci_build_number)
diff --git a/cmake/fuzzing.cmake b/cmake/fuzzing.cmake

index abc9114..4e62429 100644 (file)
--- a/cmake/fuzzing.cmake
+++ b/cmake/fuzzing.cmake
@@ -4,7 +4,7 @@
  
  function(enable_fuzzing)
      # Enable (libFuzzer)[https://llvm.org/docs/LibFuzzer.html] if supported.
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT WIN32)
+    if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
          # Communicate libfuzzer is enabled
          set(WITH_LIBFUZZER ON PARENT_SCOPE)
          add_compile_definitions(WITH_LIBFUZZER)
diff --git a/cmake/os_flags.cmake b/cmake/os_flags.cmake

index a91c464..72ca685 100644 (file)
--- a/cmake/os_flags.cmake
+++ b/cmake/os_flags.cmake
@@ -8,13 +8,13 @@
  #
  macro(disable_deprecated_warnings)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(ie_c_cxx_deprecated "/Qdiag-disable:1478,1786")
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              set(ie_c_cxx_deprecated "/wd4996")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(ie_c_cxx_deprecated "-diag-disable=1478,1786")
          else()
              set(ie_c_cxx_deprecated "-Wno-deprecated-declarations")
@@ -35,13 +35,13 @@ endmacro()
  #
  macro(ie_deprecated_no_errors)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(ie_c_cxx_deprecated "/Qdiag-warning:1478,1786")
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              set(ie_c_cxx_deprecated "/wd4996")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(ie_c_cxx_deprecated_no_errors "-diag-warning=1478,1786")
          else()
              set(ie_c_cxx_deprecated_no_errors "-Wno-error=deprecated-declarations")
@@ -61,15 +61,15 @@ endmacro()
  #
  function(ie_sse42_optimization_flags flags)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              # No such option for MSVC 2019
-        elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "/arch:SSE4.2 /QxSSE4.2" PARENT_SCOPE)
          else()
              message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "-msse4.2 -xSSE4.2" PARENT_SCOPE)
          else()
              set(${flags} "-msse4.2" PARENT_SCOPE)
@@ -82,15 +82,15 @@ endfunction()
  #
  function(ie_avx2_optimization_flags flags)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "/QxCORE-AVX2" PARENT_SCOPE)
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              set(${flags} "/arch:AVX2" PARENT_SCOPE)
          else()
              message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "-march=core-avx2 -xCORE-AVX2 -mtune=core-avx2" PARENT_SCOPE)
          else()
              set(${flags} "-mavx2 -mfma" PARENT_SCOPE)
@@ -104,18 +104,18 @@ endfunction()
  #
  function(ie_avx512_optimization_flags flags)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "/QxCOMMON-AVX512" PARENT_SCOPE)
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              set(${flags} "/arch:AVX512" PARENT_SCOPE)
          else()
              message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "-xCOMMON-AVX512" PARENT_SCOPE)
          endif()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
              set(${flags} "-mavx512f -mfma" PARENT_SCOPE)
          endif()
      endif()
@@ -138,12 +138,12 @@ macro(ie_enable_lto)
              set(CMAKE_RANLIB "gcc-ranlib")
          endif()
      elseif(WIN32)
-        if(CMAKE_BUILD_TYPE STREQUAL Release)
-            # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GL")
-            # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GL")
-            # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LTCG:STATUS")
-            # set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LTCG:STATUS")
-            # set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /LTCG:STATUS")
+        if(CMAKE_BUILD_TYPE STREQUAL "Release")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GL")
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /GL")
+            set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LTCG:STATUS")
+            set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LTCG:STATUS")
+            set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /LTCG:STATUS")
          endif()
      endif()
  endmacro()
@@ -167,7 +167,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
  
  # to allows to override CMAKE_CXX_STANDARD from command line
  if(NOT DEFINED CMAKE_CXX_STANDARD)
-    if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
          set(CMAKE_CXX_STANDARD 14)
      else()
          set(CMAKE_CXX_STANDARD 11)
@@ -176,7 +176,7 @@ if(NOT DEFINED CMAKE_CXX_STANDARD)
      set(CMAKE_CXX_STANDARD_REQUIRED ON)
  endif()
  
-if(COVERAGE)
+if(ENABLE_COVERAGE)
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage")
      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --coverage")
@@ -198,10 +198,10 @@ if(WIN32)
      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
  
      if (TREAT_WARNING_AS_ERROR)
-        if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              ie_add_compiler_flags(/WX)
              ie_add_compiler_flags(/Qdiag-warning:47,1740,1786)
-        elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
             # ie_add_compiler_flags(/WX) # Too many warnings
          endif()
      endif()
@@ -212,14 +212,14 @@ if(WIN32)
  
      # Disable noisy warnings
  
-    if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
          # C4251 needs to have dll-interface to be used by clients of class
          ie_add_compiler_flags(/wd4251)
          # C4275 non dll-interface class used as base for dll-interface class
          ie_add_compiler_flags(/wd4275)
      endif()
  
-    if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
          # 161 unrecognized pragma
          # 177 variable was declared but never referenced
          # 556 not matched type of assigned function pointer
@@ -236,20 +236,6 @@ if(WIN32)
  
      set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
      set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
-
-    if(ENABLE_DEBUG_SYMBOLS)
-        ie_add_compiler_flags(/Z7)
-
-        set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
-        if (CMAKE_BUILD_TYPE STREQUAL "Release")
-            # Keep default /OPT values. See /DEBUG reference for details.
-            set(DEBUG_SYMBOLS_LINKER_FLAGS "${DEBUG_SYMBOLS_LINKER_FLAGS} /OPT:REF /OPT:ICF")
-        endif()
-
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
-        set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}")
-    endif()
  else()
      # TODO: enable for C sources as well
      # ie_add_compiler_flags(-Werror)
diff --git a/cmake/sanitizer.cmake b/cmake/sanitizer.cmake

index 047f370..a010378 100644 (file)
--- a/cmake/sanitizer.cmake
+++ b/cmake/sanitizer.cmake
@@ -14,7 +14,7 @@ if (ENABLE_SANITIZER)
      set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
      if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
          set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold")
-    elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT WIN32)
+    elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32)
          set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
      endif()
  
@@ -26,9 +26,8 @@ if (ENABLE_SANITIZER)
  endif()
  
  if (ENABLE_THREAD_SANITIZER)
-    set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread")
-
-    set(SANITIZER_LINKER_FLAGS "-fsanitize=thread")
+    set(SANITIZER_COMPILER_FLAGS "-g -fsanitize=thread -fno-omit-frame-pointer")
+    set(SANITIZER_LINKER_FLAGS "-fsanitize=thread -static-libsan")
  
      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
diff --git a/cmake/sdl.cmake b/cmake/sdl.cmake

index 5c56528..27ed08a 100644 (file)
--- a/cmake/sdl.cmake
+++ b/cmake/sdl.cmake
@@ -25,7 +25,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
              if (NOT ENABLE_SANITIZER)
                  set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s")
              endif()
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
              set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
          elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              if (NOT ENABLE_SANITIZER)
@@ -36,7 +36,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
              set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
              set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -z noexecstack -z relro -z now")
          endif()
-    elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
          set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
      endif()
  
diff --git a/cmake/target_flags.cmake b/cmake/target_flags.cmake

index 1c89c09..506f7d8 100644 (file)
--- a/cmake/target_flags.cmake
+++ b/cmake/target_flags.cmake
@@ -7,7 +7,7 @@ if(CMAKE_CL_64)
    set(MSVC64 ON)
  endif()
  
-if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine
                    OUTPUT_VARIABLE OPENVINO_GCC_TARGET_MACHINE
                    OUTPUT_STRIP_TRAILING_WHITESPACE)
diff --git a/.editorconfig b/inference-engine/.editorconfig

similarity index 100%

rename from .editorconfig

rename to inference-engine/.editorconfig
diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt

index efdb263..52b94b1 100644 (file)
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -188,7 +188,7 @@ configure_file(
  # Coverage
  #
  
-if(COVERAGE)
+if(ENABLE_COVERAGE)
      include(coverage_ie)
  endif()
  
diff --git a/inference-engine/cmake/coverage_ie.cmake b/inference-engine/cmake/coverage_ie.cmake

index 79eb496..f288246 100644 (file)
--- a/inference-engine/cmake/coverage_ie.cmake
+++ b/inference-engine/cmake/coverage_ie.cmake
@@ -13,14 +13,17 @@ ie_coverage_capture(INFO_FILE "dldt"
  
  # Generate reports
  
-ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_with_builders"
+ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine"
                      PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/inference_engine/*"
                               "${DLDT_COVERAGE_BASE_DIRECTORY}/plugin_api/*")
-ie_coverage_remove(INPUT "inference_engine_with_builders" OUTPUT "inference_engine"
-                   PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/inference_engine/builders/*")
  ie_coverage_genhtml(INFO_FILE "inference_engine"
                      PREFIX "${DLDT_COVERAGE_BASE_DIRECTORY}")
  
+ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_ir_reader"
+                    PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/ir_readers/*")
+ie_coverage_genhtml(INFO_FILE "inference_engine_ir_reader"
+                    PREFIX "${DLDT_COVERAGE_BASE_DIRECTORY}")
+
  ie_coverage_extract(INPUT "dldt" OUTPUT "inference_engine_legacy"
                      PATTERNS "${DLDT_COVERAGE_BASE_DIRECTORY}/legacy_api/*")
  ie_coverage_genhtml(INFO_FILE "inference_engine_legacy"
diff --git a/inference-engine/cmake/cpplint.cmake b/inference-engine/cmake/cpplint.cmake

index 9e279ff..6c58d4a 100644 (file)
--- a/inference-engine/cmake/cpplint.cmake
+++ b/inference-engine/cmake/cpplint.cmake
@@ -3,10 +3,10 @@
  #
  
  if(ENABLE_CPPLINT)
-    find_host_package(PythonInterp)
+    find_package(Python3 COMPONENTS Interpreter)
  
-    if(NOT PYTHONINTERP_FOUND)
-        message(WARNING "Python interpreter was not found (required for cpplint check)")
+    if(NOT Python3_Interpreter_FOUND)
+        message(WARNING "Python3 interpreter was not found (required for cpplint check)")
          set(ENABLE_CPPLINT OFF)
      endif()
  endif()
diff --git a/inference-engine/cmake/developer_package_config.cmake.in b/inference-engine/cmake/developer_package_config.cmake.in

index 93a27cb..5600af6 100644 (file)
--- a/inference-engine/cmake/developer_package_config.cmake.in
+++ b/inference-engine/cmake/developer_package_config.cmake.in
@@ -25,6 +25,8 @@ endforeach()
  message("")
  
  set(gflags_DIR "@gflags_BINARY_DIR@")
+# GNA lib dir
+set(GNA "@GNA@")
  
  # Targets
  
@@ -43,7 +45,7 @@ list(APPEND CMAKE_MODULE_PATH "${OpenVINO_MAIN_SOURCE_DIR}/cmake")
  list(APPEND CMAKE_MODULE_PATH "${IE_MAIN_SOURCE_DIR}/cmake")
  
  # generic stuff from developer package
-include(developer_package NO_POLICY_SCOPE)
+include(developer_package)
  include(developer_package_ie)
  
  # Don't threat deprecated API warnings as errors in 3rd party apps
diff --git a/inference-engine/cmake/features_ie.cmake b/inference-engine/cmake/features_ie.cmake

index 82f1a85..e5b022c 100644 (file)
--- a/inference-engine/cmake/features_ie.cmake
+++ b/inference-engine/cmake/features_ie.cmake
@@ -62,8 +62,6 @@ if (ENABLE_GNA)
      endif()
  endif()
  
-ie_option (ENABLE_IR_READER "Compile with IR readers / parsers" ON)
-
  ie_option (ENABLE_VPU "vpu targeted plugins for inference engine" ON)
  
  ie_dependent_option (ENABLE_MYRIAD "myriad targeted plugin for inference engine" ON "ENABLE_VPU" OFF)
@@ -72,7 +70,7 @@ ie_dependent_option (ENABLE_MYRIAD_NO_BOOT "myriad plugin will skip device boot"
  
  ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
  
-ie_dependent_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" OFF "ENABLE_TESTS" OFF)
+ie_dependent_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" ON "ENABLE_TESTS" OFF)
  
  ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_GAPI_TESTS" OFF)
  
@@ -84,7 +82,7 @@ ie_dependent_option (ENABLE_SAME_BRANCH_FOR_MODELS "uses same branch for models
  
  ie_dependent_option (ENABLE_BEH_TESTS "tests oriented to check inference engine API corecteness" ON "ENABLE_TESTS" OFF)
  
-ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS;ENABLE_IR_READER" OFF)
+ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS" OFF)
  
  ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF)
  
@@ -98,18 +96,15 @@ ie_option (ENABLE_ALTERNATIVE_TEMP "in case of dependency conflict, to avoid mod
  
  ie_option (ENABLE_OPENCV "enables OpenCV" ON)
  
-ie_option (ENABLE_DEBUG_SYMBOLS "generates symbols for debugging" OFF)
-
  ie_option (ENABLE_PYTHON "enables ie python bridge build" OFF)
  
-ie_option (ENABLE_CPP_CCT "enables C++ version of Cross Check Tool" OFF)
-
  ie_option (ENABLE_C "enables ie c bridge build" ON)
  
-ie_dependent_option(ENABLE_CPPLINT "Enable cpplint checks during the build" OFF "UNIX;NOT APPLE;NOT ANDROID" OFF)
+ie_dependent_option(ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT APPLE;NOT ANDROID" OFF)
+
  ie_dependent_option(ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF)
  
-ie_option(ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" OFF)
+ie_option(ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ON)
  
  set(IE_EXTRA_PLUGINS "" CACHE STRING "Extra paths for plugins to include into DLDT build tree")
  
diff --git a/inference-engine/cmake/models.cmake b/inference-engine/cmake/models.cmake

index 3203068..cb705dc 100644 (file)
--- a/inference-engine/cmake/models.cmake
+++ b/inference-engine/cmake/models.cmake
@@ -34,6 +34,10 @@ function (add_models_repo add_to_fetcher model_name)
  endfunction()
  
  function(add_lfs_repo name prefix url tag)
+    if(TARGET ${name})
+        return()
+    endif()
+
      ExternalProject_Add(${name}
          PREFIX ${prefix}
          GIT_REPOSITORY ${url}
diff --git a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in

index ff27310..d3fc9f8 100644 (file)
--- a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in
+++ b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in
@@ -150,7 +150,7 @@ else()
                  set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
                          IMPORTED_LOCATION "${IE${ie_library_usuffix}_RELEASE_LIBRARY}"
                          INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}")
-                if(CMAKE_CXX_COMPILER_ID MATCHES Intel)
+                if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
                      set_target_properties(IE::inference_engine${ie_library_suffix} PROPERTIES
                              INTERFACE_COMPILE_OPTIONS "-diag-warning=1786")
                  else()
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c

index f869989..3ac570f 100644 (file)
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
@@ -105,9 +105,9 @@ void readInputFilesArgument(const char *arg) {
              const char *fileName = ep->d_name;
              if (strcmp(fileName, ".") == 0 || strcmp(fileName, "..") == 0) continue;
              char *file_path = (char *)calloc(strlen(arg) + strlen(ep->d_name) + 2, sizeof(char));
-            strcpy(file_path, arg);
-            strcat(file_path, "/");
-            strcat(file_path, ep->d_name);
+            memcpy(file_path, arg, strlen(arg));
+            memcpy(file_path + strlen(arg), "/", strlen("/"));
+            memcpy(file_path + strlen(arg) + strlen("/"), ep->d_name, strlen(ep->d_name) + 1);
  
              if (file_num == 0) {
                  file_paths = (char **)calloc(1, sizeof(char *));
@@ -131,7 +131,7 @@ void readInputFilesArgument(const char *arg) {
          dp = NULL;
      } else {
          char *file_path = (char *)calloc(strlen(arg) + 1, sizeof(char));
-        strcpy(file_path, arg);
+        memcpy(file_path, arg, strlen(arg) + 1);
          if (file_num == 0) {
              file_paths = (char **)calloc(1, sizeof(char *));
          }
@@ -187,8 +187,8 @@ ie_config_t *parseConfig(const char *config_file, char comment) {
      if (fscanf(file, "%s", key)!= EOF && fscanf(file, "%s", value) != EOF) {
          char *cfg_name = (char *)calloc(strlen(key) + 1, sizeof(char));
          char *cfg_value = (char *)calloc(strlen(value) + 1, sizeof(char));
-        strcpy(cfg_name, key);
-        strcpy(cfg_value, value);
+        memcpy(cfg_name, key, strlen(key) + 1);
+        memcpy(cfg_value, value, strlen(value) + 1);
          ie_config_t *cfg_t = (ie_config_t *)calloc(1, sizeof(ie_config_t));
          cfg_t->name = cfg_name;
          cfg_t->value = cfg_value;
@@ -203,8 +203,8 @@ ie_config_t *parseConfig(const char *config_file, char comment) {
              }
              char *cfg_name = (char *)calloc(strlen(key) + 1, sizeof(char));
              char *cfg_value = (char *)calloc(strlen(value) + 1, sizeof(char));
-            strcpy(cfg_name, key);
-            strcpy(cfg_value, value);
+            memcpy(cfg_name, key, strlen(key) + 1);
+            memcpy(cfg_value, value, strlen(value) + 1);
              ie_config_t *cfg_t = (ie_config_t *)calloc(1, sizeof(ie_config_t));
              cfg_t->name = cfg_name;
              cfg_t->value = cfg_value;
@@ -345,8 +345,8 @@ int main(int argc, char **argv) {
  
      // --------------------------- 4. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
      input_weight = (char *)calloc(strlen(input_model) + 1, sizeof(char));
-    strncpy(input_weight, input_model, strlen(input_model)-4);
-    strcat(input_weight, ".bin");
+    memcpy(input_weight, input_model, strlen(input_model) - 4);
+    memcpy(input_weight + strlen(input_model) - 4, ".bin", strlen(".bin") + 1);
      printf("%sLoading network files:\n", info);
      printf("\t%s\n", input_model);
      printf("\t%s\n", input_weight);
@@ -680,9 +680,9 @@ int main(int argc, char **argv) {
          char str_num[16] = {0};
          int2str(str_num, batch_id);
          char *img_path = (char *)calloc(strlen(out) + strlen(str_num) + strlen(".bmp") + 1, sizeof(char));
-        strcpy(img_path, out);
-        strcat(img_path, str_num);
-        strcat(img_path, ".bmp");
+        memcpy(img_path, out, strlen(out));
+        memcpy(img_path + strlen(out), str_num, strlen(str_num));
+        memcpy(img_path + strlen(out) + strlen(str_num), ".bmp", strlen(".bmp") + 1);
          image_save(img_path, &originalImages[batch_id]);
          printf("%sImage %s created!\n", info, img_path);
          free(img_path);
diff --git a/inference-engine/ie_bridges/c/tests/CMakeLists.txt b/inference-engine/ie_bridges/c/tests/CMakeLists.txt

index 8bd1169..4feae6b 100644 (file)
--- a/inference-engine/ie_bridges/c/tests/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/tests/CMakeLists.txt
@@ -27,6 +27,8 @@ target_compile_definitions(${TARGET_NAME}
          DATA_PATH=\"${DATA_PATH}\"
          MODELS_PATH=\"${MODELS_PATH}\" )
  
+add_dependencies(${TARGET_NAME} MultiDevicePlugin)
+
  if(ENABLE_MKL_DNN)
      add_dependencies(${TARGET_NAME} MKLDNNPlugin)
  endif()
diff --git a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp

index 25c0972..752b240 100644 (file)
--- a/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp
+++ b/inference-engine/ie_bridges/c/tests/ie_c_api_test.cpp
@@ -816,10 +816,8 @@ TEST(ie_exec_network_set_config, setConfig) {
      IE_ASSERT_OK(ie_core_create("", &core));
      ASSERT_NE(nullptr, core);
  
-    ie_core_versions_t ie_core_versions_multi;
      ie_param_t param;
-    if (ie_core_get_versions(core, "MULTI", &ie_core_versions_multi) != IEStatusCode::OK ||
-        ie_core_get_metric(core, "GPU", "AVAILABLE_DEVICES", &param) != IEStatusCode::OK) {
+    if (ie_core_get_metric(core, "GPU", "AVAILABLE_DEVICES", &param) != IEStatusCode::OK) {
          ie_core_free(&core);
          GTEST_SKIP();
      }
@@ -837,11 +835,10 @@ TEST(ie_exec_network_set_config, setConfig) {
      ie_config_t config_param = {"MULTI_DEVICE_PRIORITIES", "GPU,CPU", nullptr};
      IE_EXPECT_OK(ie_exec_network_set_config(exe_network, &config_param));
  
-    ie_core_versions_free(&ie_core_versions_multi);
-    ie_param_free(&param);
      ie_exec_network_free(&exe_network);
      ie_network_free(&network);
      ie_core_free(&core);
+    ie_param_free(&param);
  }
  
  TEST(ie_exec_network_get_metric, getMetric) {
diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt

index 80db79f..519a5c6 100644 (file)
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -55,6 +55,13 @@ endif()
  set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
  add_subdirectory (src/openvino/inference_engine)
  
+# Check Cython version
+if("${CYTHON_VERSION}" VERSION_LESS "0.29")
+    message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+else()
+    message(STATUS "Found Cython version ${CYTHON_VERSION}")
+endif()
+
  # install
  
  ie_cpack_add_component(${PYTHON_VERSION} REQUIRED)
diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake

index ae97e24..f960fe2 100644 (file)
--- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@@ -56,4 +56,8 @@ endif()
  include( FindPackageHandleStandardArgs )
  FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
  
-mark_as_advanced( CYTHON_EXECUTABLE )
+# Find Cython version
+execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+
+mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx

index f60fd43..06bf8b1 100644 (file)
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@@ -1439,7 +1439,7 @@ cdef class IENetwork:
      #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  input_layer = next(iter(net.inputs))
      #  n, c, h, w = net.inputs[input_layer]
-    #  net.reshape({input_layer: (n, c, h*2, w*2)}]
+    #  net.reshape({input_layer: (n, c, h*2, w*2)})
      #  ```
      def reshape(self, input_shapes: dict):
          cdef map[string, vector[size_t]] c_input_shapes;
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py

index b68b837..0dd7ef2 100644 (file)
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -2,6 +2,7 @@ import numpy as np
  import os
  import pytest
  import warnings
+import threading
  
  from openvino.inference_engine import ie_api as ie
  from conftest import model_path, image_path
@@ -324,15 +325,22 @@ def test_async_infer_callback_wait_in_callback(device):
      class InferReqWrap:
          def __init__(self, request):
              self.request = request
+            self.cv = threading.Condition()
              self.request.set_completion_callback(self.callback)
              self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY)
              assert self.status_code == ie.StatusCode.INFER_NOT_STARTED
  
          def callback(self, statusCode, userdata):
              self.status_code = self.request.wait(ie.WaitMode.STATUS_ONLY)
+            self.cv.acquire()
+            self.cv.notify()
+            self.cv.release()
  
          def execute(self, input_data):
              self.request.async_infer(input_data)
+            self.cv.acquire()
+            self.cv.wait()
+            self.cv.release()
              status = self.request.wait(ie.WaitMode.RESULT_READY)
              assert status == ie.StatusCode.OK
              assert self.status_code == ie.StatusCode.OK
diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h

index 16fb7cd..bcf850f 100644 (file)
--- a/inference-engine/include/cpp/ie_cnn_network.h
+++ b/inference-engine/include/cpp/ie_cnn_network.h
@@ -49,9 +49,7 @@ public:
       */
      explicit CNNNetwork(std::shared_ptr<ICNNNetwork> network): network(network) {
          actual = network.get();
-        if (actual == nullptr) {
-            THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
-        }
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
      }
  
      /**
@@ -100,6 +98,7 @@ public:
       * @return outputs Reference to the OutputsDataMap object
       */
      virtual OutputsDataMap getOutputsInfo() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          OutputsDataMap outputs;
          actual->getOutputsInfo(outputs);
          return outputs;
@@ -113,6 +112,7 @@ public:
       * @return inputs Reference to InputsDataMap object
       */
      virtual InputsDataMap getInputsInfo() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          InputsDataMap inputs;
          actual->getInputsInfo(inputs);
          return inputs;
@@ -126,6 +126,7 @@ public:
       * @return The number of layers as an integer value
       */
      size_t layerCount() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return actual->layerCount();
      }
  
@@ -136,7 +137,8 @@ public:
       *
       * @return Network name
       */
-    const std::string& getName() const noexcept {
+    const std::string& getName() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return actual->getName();
      }
  
@@ -160,6 +162,7 @@ public:
       * @return The size of batch as a size_t value
       */
      virtual size_t getBatchSize() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return actual->getBatchSize();
      }
  
@@ -168,7 +171,7 @@ public:
       *
       * @return A shared pointer of the current network
       */
-    operator std::shared_ptr<ICNNNetwork>() {
+    operator ICNNNetwork::Ptr() {
          return network;
      }
  
@@ -178,6 +181,7 @@ public:
       * @return An instance of the current network
       */
      operator ICNNNetwork&() {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return *actual;
      }
  
@@ -187,6 +191,7 @@ public:
       * @return A const reference of the current network
       */
      operator const ICNNNetwork&() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return *actual;
      }
  
@@ -195,7 +200,8 @@ public:
       *
       * @return constant nGraph function
       */
-    std::shared_ptr<ngraph::Function> getFunction() noexcept {
+    std::shared_ptr<ngraph::Function> getFunction() {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return actual->getFunction();
      }
  
@@ -204,7 +210,8 @@ public:
       *
       * @return constant nGraph function
       */
-    std::shared_ptr<const ngraph::Function> getFunction() const noexcept {
+    std::shared_ptr<const ngraph::Function> getFunction() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          return actual->getFunction();
      }
  
@@ -278,6 +285,7 @@ public:
       * @return Map of pairs: input name and its dimension.
       */
      virtual ICNNNetwork::InputShapes getInputShapes() const {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          ICNNNetwork::InputShapes shapes;
          InputsDataMap inputs;
          actual->getInputsInfo(inputs);
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp

index b6ec009..c88a645 100644 (file)
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -190,9 +190,7 @@ public:
       * @return A vector of Memory State objects
       */
      std::vector<MemoryState> QueryState() {
-        if (actual == nullptr) {
-            THROW_IE_EXCEPTION << "ExecutableNetwork wrapper was not initialized.";
-        }
+        if (actual == nullptr) THROW_IE_EXCEPTION << "ExecutableNetwork was not initialized.";
          IMemoryState::Ptr pState = nullptr;
          auto res = OK;
          std::vector<MemoryState> controller;
diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp

index 6c5ff95..f89d75e 100644 (file)
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@@ -88,9 +88,7 @@ public:
                            InferenceEngine::details::SharedObjectLoader::Ptr splg = {}):
                            actual(request), plg(splg) {
          //  plg can be null, but not the actual
-        if (actual == nullptr) {
-            THROW_IE_EXCEPTION << "InferRequest wrapper was not initialized.";
-        }
+        if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
      }
  
      /**
@@ -230,9 +228,7 @@ public:
       */
      StatusCode Wait(int64_t millis_timeout) {
          ResponseDesc resp;
-        if (actual == nullptr) {
-            THROW_IE_EXCEPTION << "InferRequest wrapper was not initialized.";
-        }
+        if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
          auto res = actual->Wait(millis_timeout, &resp);
          if (res != OK && res != RESULT_NOT_READY && res != INFER_NOT_STARTED) {
              InferenceEngine::details::extract_exception(res, resp.msg);
@@ -259,6 +255,7 @@ public:
       * @return A shared pointer to underlying IInferRequest interface
       */
      operator IInferRequest::Ptr&() {
+        if (actual == nullptr) THROW_IE_EXCEPTION << "InferRequest was not initialized.";
          return actual;
      }
  
diff --git a/inference-engine/include/cpp/ie_plugin_cpp.hpp b/inference-engine/include/cpp/ie_plugin_cpp.hpp

index e75fdff..db56c01 100644 (file)
--- a/inference-engine/include/cpp/ie_plugin_cpp.hpp
+++ b/inference-engine/include/cpp/ie_plugin_cpp.hpp
@@ -57,6 +57,7 @@ public:
      const Version* GetVersion() {
          const Version* versionInfo = nullptr;
          IE_SUPPRESS_DEPRECATED_START
+        if (actual == nullptr) THROW_IE_EXCEPTION << "InferencePlugin wrapper was not initialized";
          actual->GetVersion(versionInfo);
          IE_SUPPRESS_DEPRECATED_END
          if (versionInfo == nullptr) {
@@ -153,6 +154,7 @@ public:
      void QueryNetwork(const ICNNNetwork& network, const std::map<std::string, std::string>& config,
                        QueryNetworkResult& res) const {
          IE_SUPPRESS_DEPRECATED_START
+        if (actual == nullptr) THROW_IE_EXCEPTION << "InferencePlugin wrapper was not initialized";
          actual->QueryNetwork(network, config, res);
          IE_SUPPRESS_DEPRECATED_END
          if (res.rc != OK) THROW_IE_EXCEPTION << res.resp.msg;
diff --git a/inference-engine/include/details/ie_cnn_network_iterator.hpp b/inference-engine/include/details/ie_cnn_network_iterator.hpp

index e5f1af3..4bbc93c 100644 (file)
--- a/inference-engine/include/details/ie_cnn_network_iterator.hpp
+++ b/inference-engine/include/details/ie_cnn_network_iterator.hpp
@@ -53,6 +53,7 @@ public:
       * scope.
       */
      explicit CNNNetworkIterator(const ICNNNetwork* network) {
+        if (network == nullptr) THROW_IE_EXCEPTION << "ICNNNetwork object is nullptr";
          InputsDataMap inputs;
          network->getInputsInfo(inputs);
          if (!inputs.empty()) {
diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/details/ie_so_pointer.hpp

index 643c836..4854aa0 100644 (file)
--- a/inference-engine/include/details/ie_so_pointer.hpp
+++ b/inference-engine/include/details/ie_so_pointer.hpp
@@ -38,7 +38,11 @@ public:
       * @brief The main constructor
       * @param loader Library to load from
       */
-    explicit SymbolLoader(std::shared_ptr<Loader> loader): _so_loader(loader) {}
+    explicit SymbolLoader(std::shared_ptr<Loader> loader): _so_loader(loader) {
+        if (_so_loader == nullptr) {
+            THROW_IE_EXCEPTION << "SymbolLoader cannot be created with nullptr";
+        }
+    }
  
      /**
       * @brief Calls a function from the library that creates an object and returns StatusCode
diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp

index 50307e2..eadfbca 100644 (file)
--- a/inference-engine/include/ie_core.hpp
+++ b/inference-engine/include/ie_core.hpp
@@ -38,7 +38,7 @@ struct QueryNetworkResult {
      StatusCode rc = OK;
  
      /**
-     * @brief Response mssage
+     * @brief Response message
       */
      ResponseDesc resp;
  };
@@ -61,7 +61,7 @@ public:
       * @param xmlConfigFile A path to .xml file with plugins to load from. If XML configuration file is not specified,
       * then default Inference Engine plugins are loaded from the default plugin.xml file.
       */
-    explicit Core(const std::string& xmlConfigFile = std::string());
+    explicit Core(const std::string& xmlConfigFile = {});
  
      /**
       * @brief Returns plugins version information
@@ -92,7 +92,7 @@ public:
       * if bin file with the same name was not found, will load IR without weights.
       * @return CNNNetwork
       */
-    CNNNetwork ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath = std::wstring()) const {
+    CNNNetwork ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath = {}) const {
          return ReadNetwork(details::wStringtoMBCSstringChar(modelPath), details::wStringtoMBCSstringChar(binPath));
      }
  #endif
@@ -104,7 +104,7 @@ public:
       * if bin file with the same name was not found, will load IR without weights.
       * @return CNNNetwork
       */
-    CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = "") const;
+    CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath = {}) const;
      /**
       * @brief Reads IR xml and bin (with the same name) files
       * @param model string with IR
@@ -126,8 +126,8 @@ public:
       * @return An executable network reference
       */
      ExecutableNetwork LoadNetwork(
-        const CNNNetwork network, const std::string& deviceName,
-        const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+        const CNNNetwork& network, const std::string& deviceName,
+        const std::map<std::string, std::string>& config = {});
  
      /**
       * @brief Registers extension
@@ -141,11 +141,11 @@ public:
       * @param context Pointer to RemoteContext object
       * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
       * operation
-     * @return An executable network reference
+     * @return An executable network object
       */
      ExecutableNetwork LoadNetwork(
-        const CNNNetwork network, RemoteContext::Ptr context,
-        const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+        const CNNNetwork& network, RemoteContext::Ptr context,
+        const std::map<std::string, std::string>& config = {});
  
      /**
       * @brief Registers extension for the specified plugin
@@ -166,7 +166,7 @@ public:
       */
      ExecutableNetwork ImportNetwork(
          const std::string& modelFileName, const std::string& deviceName,
-        const std::map<std::string, std::string>& config = std::map<std::string, std::string>());
+        const std::map<std::string, std::string>& config = {});
  
      /**
       * @brief Creates an executable network from a previously exported network
@@ -199,11 +199,11 @@ public:
       * @param deviceName A name of a device to query
       * @param network Network object to query
       * @param config Optional map of pairs: (config parameter name, config parameter value)
-     * @return Pointer to the response message that holds a description of an error if any occurred
+     * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
       */
      QueryNetworkResult QueryNetwork(
          const ICNNNetwork& network, const std::string& deviceName,
-        const std::map<std::string, std::string>& config = std::map<std::string, std::string>()) const;
+        const std::map<std::string, std::string>& config = {}) const;
  
      /**
       * @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp
@@ -213,7 +213,7 @@ public:
       *
       * @param config Map of pairs: (config parameter name, config parameter value)
       */
-    void SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = std::string());
+    void SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName = {});
  
      /**
       * @brief Gets configuration dedicated to device behaviour.
diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt

index 3c00465..9de36cf 100644 (file)
--- a/inference-engine/samples/CMakeLists.txt
+++ b/inference-engine/samples/CMakeLists.txt
@@ -6,6 +6,10 @@ cmake_minimum_required (VERSION 2.8.12)
  
  project(Samples)
  
+if(POLICY CMP0054)
+    cmake_policy(SET CMP0054 NEW)
+endif()
+
  if(POLICY CMP0063)
      cmake_policy(SET CMP0063 NEW)
  endif()
@@ -66,7 +70,7 @@ if (WIN32)
          set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") #treating warnings as errors
      endif ()
  
-    if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
          set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267") #disable some warnings
      endif()
  else()
@@ -78,7 +82,7 @@ else()
          set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
      elseif(UNIX)
          set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
-        if(NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang)
+        if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
              set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
          endif()
      endif()
@@ -100,7 +104,7 @@ if(NOT DEFINED CMAKE_CXX_STANDARD)
      set (CMAKE_CXX_STANDARD 11)
      set (CMAKE_CXX_EXTENSIONS OFF)
      set (CMAKE_CXX_STANDARD_REQUIRED ON)
-    if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
          set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
      endif()
  endif()
@@ -117,7 +121,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/gflags")
      set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
  endif()
  
-if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
  endif()
  
@@ -228,7 +232,7 @@ macro(ie_add_sample)
  
      if(COMMAND add_cpplint_target AND NOT IE_SAMPLE_EXCLUDE_CPPLINT)
          if(folder_name STREQUAL "c_samples")
-            set(custom_filters "-readability/casting,-runtime/printf")
+            set(custom_filters "-readability/casting")
          endif()
          add_cpplint_target(${IE_SAMPLE_NAME}_cpplint FOR_TARGETS ${IE_SAMPLE_NAME}
                             CUSTOM_FILTERS ${custom_filters})
diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md

index 7f675a7..04cf851 100644 (file)
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@@ -84,6 +84,7 @@ Options:
      -stream_output            Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
      -t                        Optional. Time in seconds to execute topology.
      -progress                 Optional. Show progress bar (can affect performance measurement). Default values is "false".
+    -shape                    Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
  
    CPU-specific performance options:
      -nstreams "<integer>"     Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp

index cefff68..9affebb 100644 (file)
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -97,6 +97,9 @@ static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file
  static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
  #endif
  
+static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
+                                    " in case of one input size.";
+
  /// @brief Define flag for showing help message <br>
  DEFINE_bool(h, false, help_message);
  
@@ -178,6 +181,9 @@ DEFINE_string(load_config, "", load_config_message);
  DEFINE_string(dump_config, "", dump_config_message);
  #endif
  
+/// @brief Define flag for input shape <br>
+DEFINE_string(shape, "", shape_message);
+
  /**
  * @brief This function show a help message
  */
@@ -200,6 +206,7 @@ static void showUsage() {
      std::cout << "    -stream_output            " << stream_output_message << std::endl;
      std::cout << "    -t                        " << execution_time_message << std::endl;
      std::cout << "    -progress                 " << progress_message << std::endl;
+    std::cout << "    -shape                    " << shape_message << std::endl;
      std::cout << std::endl << "  device-specific performance options:" << std::endl;
      std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
      std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp

index 800e0f7..684753b 100644 (file)
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -316,33 +316,28 @@ int main(int argc, char *argv[]) {
  
              // ----------------- 5. Resizing network to match image sizes and given batch ----------------------------------
              next_step();
-
              batchSize = cnnNetwork.getBatchSize();
+            // Parse input shapes if specified
+            InferenceEngine::ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+            bool reshape = false;
+            if (!FLAGS_shape.empty()) {
+                reshape |= updateShapes(shapes, FLAGS_shape, inputInfo);
+            }
              if ((FLAGS_b != 0) && (batchSize != FLAGS_b)) {
-                ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
-                bool reshape = false;
-                for (const InputsDataMap::value_type& item : inputInfo) {
-                    auto layout = item.second->getTensorDesc().getLayout();
-
-                    int batchIndex = -1;
-                    if ((layout == Layout::NCHW) || (layout == Layout::NCDHW) ||
-                        (layout == Layout::NHWC) || (layout == Layout::NDHWC) ||
-                        (layout == Layout::NC)) {
-                        batchIndex = 0;
-                    } else if (layout == CN) {
-                        batchIndex = 1;
-                    }
-                    if ((batchIndex != -1) && (shapes[item.first][batchIndex] != FLAGS_b)) {
-                        shapes[item.first][batchIndex] = FLAGS_b;
-                        reshape = true;
-                    }
-                }
-                if (reshape) {
-                    slog::info << "Resizing network to batch = " << FLAGS_b << slog::endl;
-                    cnnNetwork.reshape(shapes);
-                }
+                reshape |= adjustShapesBatch(shapes, FLAGS_b, inputInfo);
+            }
+            if (reshape) {
+                slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
+                startTime = Time::now();
+                cnnNetwork.reshape(shapes);
+                auto duration_ms = double_to_string(get_total_ms_time(startTime));
+                slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
+                if (statistics)
+                    statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
+                                            {
+                                                    {"reshape network time (ms)", duration_ms}
+                                            });
              }
-
              batchSize = cnnNetwork.getBatchSize();
              topology_name = cnnNetwork.getName();
              slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl;
diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp

index d0820bc..1c1186b 100644 (file)
--- a/inference-engine/samples/benchmark_app/utils.cpp
+++ b/inference-engine/samples/benchmark_app/utils.cpp
@@ -7,6 +7,7 @@
  #include <utility>
  #include <vector>
  #include <map>
+#include <regex>
  
  #include <samples/common.hpp>
  #include <samples/slog.hpp>
@@ -101,6 +102,77 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
      return result;
  }
  
+bool adjustShapesBatch(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+                       const size_t batch_size, const InferenceEngine::InputsDataMap& input_info) {
+    bool updated = false;
+    for (auto& item : input_info) {
+        auto layout = item.second->getTensorDesc().getLayout();
+
+        int batch_index = -1;
+        if ((layout == InferenceEngine::Layout::NCHW) || (layout == InferenceEngine::Layout::NCDHW) ||
+            (layout == InferenceEngine::Layout::NHWC) || (layout == InferenceEngine::Layout::NDHWC) ||
+            (layout == InferenceEngine::Layout::NC)) {
+            batch_index = 0;
+        } else if (layout == InferenceEngine::Layout::CN) {
+            batch_index = 1;
+        }
+        if ((batch_index != -1) && (shapes.at(item.first).at(batch_index) != batch_size)) {
+            shapes[item.first][batch_index] = batch_size;
+            updated = true;
+        }
+    }
+    return updated;
+}
+
+bool updateShapes(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+                  const std::string shapes_string, const InferenceEngine::InputsDataMap& input_info) {
+    bool updated = false;
+    std::string search_string = shapes_string;
+    auto start_pos = search_string.find_first_of('[');
+    while (start_pos != std::string::npos) {
+        auto end_pos = search_string.find_first_of(']');
+        if (end_pos == std::string::npos)
+            break;
+        auto input_name = search_string.substr(0, start_pos);
+        auto input_shape = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+        std::vector<size_t> parsed_shape;
+        for (auto& dim : split(input_shape, ',')) {
+            parsed_shape.push_back(std::stoi(dim));
+        }
+        if (!input_name.empty()) {
+            shapes[input_name] = parsed_shape;
+            updated = true;
+        } else {
+            for (auto& item : input_info) {
+                shapes[item.first] = parsed_shape;
+            }
+            updated = true;
+        }
+        search_string = search_string.substr(end_pos + 1);
+        if (search_string.empty() || search_string.front() != ',')
+            break;
+        search_string = search_string.substr(1);
+        start_pos = search_string.find_first_of('[');
+    }
+    if (!search_string.empty())
+        throw std::logic_error("Can't parse `shape` parameter: " + shapes_string);
+    return updated;
+}
+
+std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes) {
+    std::stringstream ss;
+    for (auto& shape : shapes) {
+        if (!ss.str().empty()) ss << ", ";
+        ss << "\'" << shape.first << "': [";
+        for (size_t i = 0; i < shape.second.size(); i++) {
+            if (i > 0) ss << ", ";
+            ss << shape.second.at(i);
+        }
+        ss << "]";
+    }
+    return ss.str();
+}
+
  #ifdef USE_OPENCV
  void dump_config(const std::string& filename,
                   const std::map<std::string, std::map<std::string, std::string>>& config) {
diff --git a/inference-engine/samples/benchmark_app/utils.hpp b/inference-engine/samples/benchmark_app/utils.hpp

index 70c2ce7..75b1025 100644 (file)
--- a/inference-engine/samples/benchmark_app/utils.hpp
+++ b/inference-engine/samples/benchmark_app/utils.hpp
@@ -12,6 +12,12 @@ std::vector<std::string> parseDevices(const std::string& device_string);
  uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
  std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
                                                                 const std::string& values_string);
+bool updateShapes(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+                  const std::string shapes_string, const InferenceEngine::InputsDataMap& input_info);
+bool adjustShapesBatch(InferenceEngine::ICNNNetwork::InputShapes& shapes,
+                       const size_t batch_size, const InferenceEngine::InputsDataMap& input_info);
+std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
+
  #ifdef USE_OPENCV
  void dump_config(const std::string& filename,
                   const std::map<std::string, std::map<std::string, std::string>>& config);
diff --git a/inference-engine/scripts/create_vs2017_proj_x64.cmd b/inference-engine/scripts/create_vs2017_proj_x64.cmd

new file mode 100644 (file)

index 0000000..ff91c59
--- /dev/null
+++ b/inference-engine/scripts/create_vs2017_proj_x64.cmd
@@ -0,0 +1,31 @@
+@echo off
+:: Copyright (C) 2018-2020 Intel Corporation
+:: SPDX-License-Identifier: Apache-2.0
+
+pushd ..\..
+if not exist "vs2017x64" (
+       mkdir "vs2017x64"
+)
+
+cmake -E chdir "vs2017x64" cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" -DOS_FOLDER=ON ^
+       -DENABLE_MYRIAD=OFF -DENABLE_VPU=OFF -DENABLE_GNA=ON -DENABLE_CLDNN=OFF ^
+       -DENABLE_OPENCV=ON -DENABLE_MKL_DNN=ON ^
+       -DVERBOSE_BUILD=ON -DENABLE_TESTS=ON -DTHREADING=TBB ..
+
+
+chdir
+cd "vs2017x64\thirdparty\"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" mkldnn.vcxproj /IC 
+
+chdir
+cd "..\src\mkldnn_plugin"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" MKLDNNPlugin.vcxproj /IC 
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" test_MKLDNNPlugin.vcxproj /IC 
+
+chdir
+cd "..\..\tests\unit"
+"C:\Program Files (x86)\Common Files\Intel\shared files\ia32\Bin\ICProjConvert180.exe" InferenceEngineUnitTests.vcxproj /IC 
+
+
+popd
+pause
diff --git a/inference-engine/scripts/run_code_checks.sh b/inference-engine/scripts/run_code_checks.sh

index 11689b7..d3bdb35 100644 (file)
--- a/inference-engine/scripts/run_code_checks.sh
+++ b/inference-engine/scripts/run_code_checks.sh
@@ -1,4 +1,6 @@
  #!/bin/bash
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
  
  CURRENT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
  command -v realpath >/dev/null 2>&1 || { echo >&2 "cpplint require realpath executable but it's not installed.  Aborting."; exit 1; }
diff --git a/inference-engine/scripts/run_tests_myriad_multistick.sh b/inference-engine/scripts/run_tests_myriad_multistick.sh

new file mode 100644 (file)

index 0000000..c6082a9
--- /dev/null
+++ b/inference-engine/scripts/run_tests_myriad_multistick.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+APP_NAME="MyriadFunctionalTests"
+APPS_TO_RUN=$1
+APPS_TO_RUN=${APPS_TO_RUN:=4}
+
+echo "Run in parallel ${APPS_TO_RUN} applications"
+
+TEST_DIR=../../bin/intel64
+
+# Path to test dir is provided
+if [[ -n "$2" ]]; then
+    TEST_DIR=$2
+# Search for test dir with binaries
+else
+    # Windows default
+    if [[ -f "${TEST_DIR}/${APP_NAME}" ]]; then
+        TEST_DIR=${TEST_DIR}
+    # Search for Release or Debug config
+    elif [[ -f "${TEST_DIR}/Release/${APP_NAME}" ]]; then
+        TEST_DIR="$TEST_DIR/Release/"
+    elif [[ -f "${TEST_DIR}/Debug/${APP_NAME}" ]]; then
+        TEST_DIR="$TEST_DIR/Debug/"
+    else
+        echo "Directory with binaries not found!"
+        exit -1
+    fi
+
+fi
+
+echo "Test directory: ${TEST_DIR}"
+cd ${TEST_DIR}
+
+export IE_VPU_MYRIADX=1
+
+pids=""
+
+if [[ "${APPS_TO_RUN}" -ge 1 ]] ; then
+    ./${APP_NAME} --gtest_filter=*VPURegTest*SSD*myriad* &
+    pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 2 ]] ; then
+    ./${APP_NAME} --gtest_filter=*VPURegTest*VGG*myriad* &
+    pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 3 ]] ; then
+    ./${APP_NAME} --gtest_filter=*VPURegTest*VGG*myriad* &
+    pids+=" $!"
+fi
+
+if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then
+    # For more then 4 multidevice testing
+    for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do
+        ./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* &
+        pids+=" $!"
+    done
+fi
+
+
+# Wait for all processes to finish
+sts=""
+for p in ${pids}; do
+    if wait ${p}; then
+        sts+=" 1"
+    else
+        sts+=" 0"
+    fi
+    echo "--- Process $p finished"
+done
+
+idx=0
+exit_code=0
+for s in ${sts}; do
+    if [[ ${s} -eq 1 ]]; then
+        echo "Task $idx PASSED"
+    else
+        echo "Task $idx FAILED"
+        exit_code=1
+    fi
+    ((idx+=1))
+done
+
+exit ${exit_code}
diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt

index 5774f02..a97e1c7 100644 (file)
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@@ -26,6 +26,8 @@ endif()
  
  add_subdirectory(hetero_plugin)
  
+add_subdirectory(multi_device)
+
  add_subdirectory(transformations)
  
  add_subdirectory(inference_engine)
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp

index e41e649..e728ea6 100644 (file)
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -26,8 +26,10 @@
  #include <ngraph/opsets/opset2.hpp>
  #include <ngraph/op/fused/gelu.hpp>
  #include <generic_ie.hpp>
+#include <transformations/common_optimizations/common_optimizations.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
  #include "convert_function_to_cnn_network.hpp"
  
  #undef min
@@ -79,6 +81,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
          ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
  
          // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
+        ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
+        ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
          ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
          ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
          clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, network);
@@ -143,7 +147,7 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
      }
  };
  
-ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICore * /*core*/, const InferenceEngine::ICNNNetwork &network,
+ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
                                                                 const std::map<std::string, std::string> &config) {
      // verification of supported input
      InferenceEngine::InputsDataMap _networkInputs;
@@ -189,9 +193,9 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
      return std::make_shared<CLDNNExecNetwork>(*CloneNetwork(network), context, conf);
  }
  
-ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICore * /*core*/, const InferenceEngine::ICNNNetwork &network,
-                                                                RemoteContext::Ptr context,
-                                                                const std::map<std::string, std::string> &config) {
+ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
+                                                               RemoteContext::Ptr context,
+                                                               const std::map<std::string, std::string> &config) {
      InferenceEngine::InputsDataMap _networkInputs;
      network.getInputsInfo(_networkInputs);
      check_inputs(_networkInputs);
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.h b/inference-engine/src/cldnn_engine/cldnn_engine.h

index 50c06c8..b1cf24b 100644 (file)
--- a/inference-engine/src/cldnn_engine/cldnn_engine.h
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.h
@@ -30,12 +30,12 @@ class clDNNEngine : public InferenceEngine::InferencePluginInternal,
  public:
      clDNNEngine();
  
-    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
+    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
                                                                         const std::map<std::string, std::string> &config) override;
  
-    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
-                                                                        InferenceEngine::RemoteContext::Ptr context,
-                                                                        const std::map<std::string, std::string> &config) override;
+    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
+                                                                       InferenceEngine::RemoteContext::Ptr context,
+                                                                       const std::map<std::string, std::string> &config) override;
  
      void SetConfig(const std::map<std::string, std::string> &config) override;
      InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp

index 6825717..8c7d8f2 100644 (file)
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -42,11 +42,11 @@ void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
  
  void GNAPluginNS::backend::AMIntelDNN::Init(void *ptr_memory,
                        uint32_t num_memory_bytes,
-                      intel_dnn_number_type_t number_type,
+                      intel_dnn_number_type_t compute_precision,
                        float scale_factor) {
      ptr_dnn_memory_ = ptr_memory;
      num_bytes_dnn_memory_ = num_memory_bytes;
-    number_type_ = number_type;
+    compute_precision_ = compute_precision;
      input_scale_factor_ = scale_factor;
  
      ptr_active_outputs_ = nullptr;
@@ -342,7 +342,7 @@ void GNAPluginNS::backend::AMIntelDNN::Propagate() {
                                  reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks) + j * comp_pwl->num_columns_out);
                          ApplyRecurrentTransform(comp, j, ptr_feedbacks);
                          //  PrintOutputs(i);
-                        ApplyPiecewiseLinearTransform(comp_pwl, number_type_, num_active_outputs, j);
+                        ApplyPiecewiseLinearTransform(comp_pwl, compute_precision_, num_active_outputs, j);
                      }
                      i++;  // skip next component
                  } else {
@@ -352,9 +352,9 @@ void GNAPluginNS::backend::AMIntelDNN::Propagate() {
                  break;
              case kDnnConvolutional1dOp:ApplyConvolutional1DTransform(comp);
                  break;
-            case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, number_type_, num_active_outputs);
+            case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, compute_precision_, num_active_outputs);
                  break;
-            case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, number_type_);
+            case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, compute_precision_);
                  break;
              case kDnnInterleaveOp:ApplyTranspose(comp);
                  break;
@@ -633,8 +633,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
      graph << "}";
  }
  
-void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t number_type) {
-    if ((number_type_ == kDnnFloat) && (number_type == kDnnInt)) {
+void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
+    if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
          fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
          fprintf(stderr, "  Please convert to integer first.\n");
          throw -1;
@@ -653,7 +653,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
          uint32_t layer = 0;
  
          out_file << "<intel_dnn_file>\n";
-        out_file << "<number_type> " << intel_dnn_number_type_name[number_type] << "\n";
+        out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
          out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
          out_file << "<num_memory_bytes> " << std::dec << num_bytes_dnn_memory_ << "\n";
          out_file << "<num_group> " << std::dec << num_group << "\n";
@@ -701,7 +701,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
              out_file << "<orientation_out> " << std::dec << (component[i].orientation_out == kDnnInterleavedOrientation ?
                                                               "interleaved" : "deinterleaved") << "\n";
  
-            if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+            if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                  out_file << "<num_bytes_per_input> " << std::dec << sizeof(float) << "\n";
                  out_file << "<num_bytes_per_output> " << std::dec << sizeof(float) << "\n";
              } else {
@@ -721,14 +721,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                      float output_scale_factor = component[i].output_scale_factor;
                      uint32_t num_weight_rows = (component[i].operation == kDnnDiagonalOp) ? 1 : num_rows_out;
                      uint32_t num_weight_columns = num_rows_in;
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
                      } else {
                          out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
                      }
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
                          out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
                      } else {
@@ -751,7 +751,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
                              for (uint32_t col = 0; col < num_weight_columns; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      float val =
                                          static_cast<float>(ptr_weight[row * num_weight_columns + col]) * ptr_bias[row].multiplier
                                              / weight_scale_factor;
@@ -768,7 +768,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
                              for (uint32_t col = 0; col < num_weight_columns; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      out_wfile << std::setprecision(12)
                                                << ptr_weight[row * num_weight_columns + col] / weight_scale_factor << " ";
                                  } else {
@@ -778,7 +778,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                              }
                          }
  #endif
-                    } else if (number_type_ == kDnnFloat) {
+                    } else if (compute_precision_ == kDnnFloat) {
                          float *ptr_weight = reinterpret_cast<float *>(component[i].op.affine.ptr_weights);
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
@@ -793,21 +793,25 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          fprintf(stderr, "Unsupported weight type in WriteDnnText!\n");
                          throw -1;
                      }
-                    if (number_type_ == kDnnInt) {
+                    if (compute_precision_ == kDnnInt) {
                          if (num_bytes_per_weight == 1) {
                              intel_compound_bias_t
                                  *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
  #ifdef DUMP_WB
                              for (uint32_t row = 0; row < num_rows_out; row++) {
-                                out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
-                                out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
+                                if (logging_precision == kDnnInt) {
+                                    out_bfile << std::setw(8) << ptr_biases[row].bias << ", ";
+                                    out_bfile << std::setw(8) << int(ptr_biases[row].multiplier) << "\n";
+                                } else {
+                                    out_bfile << std::setw(8) << ptr_biases[row].bias / output_scale_factor << "\n";
+                                }
                              }
  #endif
                          } else {
                              int32_t *ptr_biases = reinterpret_cast<int32_t *>(component[i].op.affine.ptr_biases);
  #ifdef DUMP_WB
                              for (uint32_t row = 0; row < num_rows_out; row++) {
-                                if (number_type == kDnnInt) {
+                                if (logging_precision == kDnnInt) {
                                      out_bfile << std::setw(8) << ptr_biases[row] << "\n";
                                  } else {
                                      out_bfile << std::setw(8) << ptr_biases[row] / output_scale_factor << "\n";
@@ -844,14 +848,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                      out_file << "<num_feature_maps> " << std::dec << num_feature_maps << "\n";
                      out_file << "<num_feature_map_rows> " << std::dec << num_feature_map_rows << "\n";
                      out_file << "<num_feature_map_columns> " << std::dec << num_feature_map_columns << "\n";
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
                      } else {
                          out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
                      }
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
                          out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
                      } else {
@@ -876,7 +880,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_filters; row++) {
                              for (uint32_t col = 0; col < num_filter_coefficients; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      float val = static_cast<float>(ptr_weight[row * num_filter_coefficients + col])
                                          * ptr_bias[row].multiplier / weight_scale_factor;
                                      out_wfile << std::setprecision(12) <<val << "\n";
@@ -892,7 +896,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_filters; row++) {
                              for (uint32_t col = 0; col < num_filter_coefficients; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      out_wfile << std::setprecision(12)
                                               << ptr_weight[row * num_filter_coefficients + col] / weight_scale_factor
                                               << "\n";
@@ -903,7 +907,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                              }
                          }
  #endif
-                    } else if (number_type_ == kDnnFloat) {
+                    } else if (compute_precision_ == kDnnFloat) {
                          float *ptr_weight = reinterpret_cast<float *>(component[i].op.conv1D.ptr_filters);
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_filters; row++) {
@@ -918,8 +922,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          throw -1;
                      }
  
-                    if (number_type_ == kDnnInt) {
-                        if (number_type == kDnnInt) {
+                    if (compute_precision_ == kDnnInt) {
+                        if (logging_precision == kDnnInt) {
                              if (num_bytes_per_weight == 1) {
                                  intel_compound_bias_t
                                          *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
@@ -969,14 +973,14 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                      uint32_t num_weight_rows = num_columns_out;
                      uint32_t num_weight_columns = num_columns_in + num_columns_out;
                      out_file << "<num_vector_delay> " << std::dec << num_vector_delay << "\n";
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
                      } else {
                          out_file << "<num_bytes_per_weight> " << std::dec << num_bytes_per_weight << "\n";
                          out_file << "<num_bytes_per_bias> " << std::dec << num_bytes_per_bias << "\n";
                      }
-                    if ((number_type_ == kDnnInt) && (number_type == kDnnFloat)) {
+                    if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
                          out_file << std::setprecision(12) << std::scientific << "<weight_scale_factor> " << 1.0 << "\n";
                          out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
                      } else {
@@ -999,7 +1003,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
                              out_file << "<weight_row> ";
                              for (uint32_t col = 0; col < num_weight_columns; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      float val =
                                          static_cast<float>(ptr_weight[row * num_weight_columns + col]) * ptr_bias[col].multiplier
                                              / weight_scale_factor;
@@ -1018,7 +1022,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
                              out_file << "<weight_row> ";
                              for (uint32_t col = 0; col < num_weight_columns; col++) {
-                                if (number_type == kDnnFloat) {
+                                if (logging_precision == kDnnFloat) {
                                      out_file << std::setprecision(12) << std::scientific
                                               << ptr_weight[row * num_weight_columns + col] / weight_scale_factor << " ";
                                  } else {
@@ -1029,7 +1033,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                              out_file << "\n";
                          }
  #endif
-                    } else if (number_type_ == kDnnFloat) {
+                    } else if (compute_precision_ == kDnnFloat) {
                          float *ptr_weight = reinterpret_cast<float *>(component[i].op.recurrent.ptr_weights);
  #ifdef DUMP_WB
                          for (uint32_t row = 0; row < num_weight_rows; row++) {
@@ -1045,8 +1049,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          fprintf(stderr, "Unsupported weight type in WriteDnnText!\n");
                          throw -1;
                      }
-                    if (number_type_ == kDnnInt) {
-                        if (number_type == kDnnInt) {
+                    if (compute_precision_ == kDnnInt) {
+                        if (logging_precision == kDnnInt) {
                              if (num_bytes_per_weight == 1) {
                                  intel_compound_bias_t
                                          *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
@@ -1110,7 +1114,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                      out_file << "<num_bytes_per_slope> " << std::dec << sizeof(int16_t) << "\n";
                      out_file << "<num_bytes_per_intercept> " << std::dec << sizeof(int16_t) << "\n";
                      out_file << "<num_bytes_per_offset> " << std::dec << sizeof(int32_t) << "\n";
-                    if (number_type == kDnnFloat) {
+                    if (logging_precision == kDnnFloat) {
                          out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
                          out_file << "<num_segments> " << std::dec << 0 << "\n";
                          out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
@@ -1121,7 +1125,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                          out_file << "<num_segments> " << std::dec << num_segments << "\n";
                          out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
                                   << GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
-                        if (number_type_ == kDnnInt) {
+                        if (compute_precision_ == kDnnInt) {
                              out_file << "<slope> ";
                              for (int segment = 0; segment < num_segments; segment++) {
                                  out_file << "0x" << std::setfill('0') << std::setw(4) << std::hex
@@ -1862,7 +1866,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
              for (int j = 0; j < component[i].num_columns_out; j++) {
                  float floatValue = 0.f;
                  if (component[i].num_bytes_per_output == 4) {
-                    if (number_type_ == kDnnInt) {
+                    if (compute_precision_ == kDnnInt) {
                          auto value = reinterpret_cast<int32_t *>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
                          floatValue = static_cast<float>(value);
  
@@ -1903,7 +1907,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
              for (int j = 0; j < component[i].num_columns_in; j++) {
                  float floatValue = 0.f;
                  if (component[i].num_bytes_per_input == 4) {
-                    if (number_type_ == kDnnInt) {
+                    if (compute_precision_ == kDnnInt) {
                          auto value = reinterpret_cast<int32_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
                          floatValue = static_cast<float>(value);
                      } else {
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp

index fd8e347..66536ff 100644 (file)
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
@@ -36,14 +36,14 @@ public:
                ptr_priors(NULL),
                ptr_dnn_memory_(NULL),
                num_bytes_dnn_memory_(0),
-              number_type_(kDnnNumNumberType) {
+              compute_precision_(kDnnNumNumberType) {
      }
  
      ~AMIntelDNN();
  
      void Init(void *ptr_memory,
              uint32_t num_memory_bytes,
-            intel_dnn_number_type_t number_type,
+            intel_dnn_number_type_t compute_precision,
              float scale_factor);
  
      void InitActiveList(uint32_t *ptr_active_list);
@@ -235,7 +235,7 @@ public:
  
      void WriteGraphWizModel(const char *filename);
  
-    void WriteDnnText(const char *filename, intel_dnn_number_type_t number_type);
+    void WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision);
  
  
  #if GNA_LIB_VER == 2
@@ -291,7 +291,7 @@ private:
      uint32_t num_bytes_dnn_memory_;
      uint32_t *ptr_active_outputs_;
      uint32_t num_active_outputs_;
-    intel_dnn_number_type_t number_type_;
+    intel_dnn_number_type_t compute_precision_;
      float input_scale_factor_;
      uint32_t dump_write_index = 0;
  
diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp

index f59d525..64a4746 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -109,7 +109,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
                                    InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
      void Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
      void SetCore(InferenceEngine::ICore*) noexcept override {}
-    const InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
+    InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
      void Reset();
      void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
                        const std::map<std::string, std::string>& config,
diff --git a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp

index dc5a584..0f9ec35 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
@@ -28,7 +28,7 @@ private:
      }
  
  public:
-    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core,
+    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
                                                  const InferenceEngine::ICNNNetwork &network,
                                                  const std::map<std::string, std::string> &config) override {
          Config updated_config(defaultConfig);
diff --git a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp

index 9d16642..ec7aad5 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
@@ -24,6 +24,10 @@ Parameter GNAPlugin::GetMetric(const std::string& name, const std::map<std::stri
      const std::unordered_map<std::string, std::function<Parameter()>> queryApiSupported = {
          {METRIC_KEY(AVAILABLE_DEVICES), [this]() {return GetAvailableDevices();}},
          {METRIC_KEY(SUPPORTED_CONFIG_KEYS), [this]() {return config.GetSupportedKeys();}},
+        {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), [this]() {
+            uint32_t nireq = 1;
+            return nireq;
+        }},
          {METRIC_KEY(FULL_DEVICE_NAME), [&options, this]() {
              auto availableDevices = GetAvailableDevices().as<std::vector<std::string>>();
  
diff --git a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp

index 24c091e..0ad2023 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
@@ -5,7 +5,6 @@
  #include <utility>
  #include <memory>
  #include "hetero_async_infer_request.hpp"
-#include <ie_util_internal.hpp>
  #include <ie_profiling.hpp>
  
  using namespace HeteroPlugin;
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp

index e9660bb..686f728 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
@@ -7,7 +7,6 @@
  #include "hetero_async_infer_request.hpp"
  #include "ie_util_internal.hpp"
  #include "hetero_graph_splitter.hpp"
-#include "file_utils.h"
  #include "xml_parse_utils.h"
  
  #include <vector>
@@ -22,12 +21,9 @@
  #include <array>
  #include <cstdint>
  
-#include "details/caseless.hpp"
  #include "ie_plugin_config.hpp"
  #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
-#include "cpp_interfaces/base/ie_inference_plugin_api.hpp"
  #include "hetero/hetero_plugin_config.hpp"
-#include "precision_utils.h"
  #include "hetero_plugin.hpp"
  #include "network_serializer.h"
  
@@ -141,10 +137,10 @@ void dumpGraph(InferenceEngine::ICNNNetwork &network,
  
  HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork&  network_,
                                                   const Engine::Configs&         config,
-                                                 Engine*                        plugin):
+                                                 Engine*                        heteroPlugin):
      InferenceEngine::ExecutableNetworkThreadSafeDefault(
          nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
-    _plugin{plugin},
+    _heteroPlugin(heteroPlugin),
      _name{network_.getName()},
      _config{config} {
      auto networkPtr = cloneNet(network_);
@@ -171,7 +167,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw
      if (allEmpty) {
          auto it = _config.find("TARGET_FALLBACK");
          if (it != _config.end()) {
-            plugin->SetAffinity(network, _config);
+            _heteroPlugin->SetAffinity(network, _config);
          } else {
              THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
          }
@@ -230,25 +226,8 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw
      network.getOutputsInfo(externalOutputsData);
  
      auto subgraphs = splitGraph(network, getAffinities(network));
-
      sortSubgraphs(subgraphs);
  
-
-    std::vector<NetworkDesc> descs;
-    std::vector<CNNLayerPtr> tempLayers;
-
-    for (auto &&subgraph : subgraphs) {
-        assert(!subgraph.empty());
-        auto affinity = (*subgraph.begin())->affinity;
-        assert(!affinity.empty());
-        _affinities.push_back(affinity);
-        if (_plugin->_plugins.end() == _plugin->_plugins.find(affinity)) {
-            IE_SUPPRESS_DEPRECATED_START
-            _plugin->_plugins[affinity] = _plugin->GetDevicePlugin(affinity);
-            IE_SUPPRESS_DEPRECATED_END
-        }
-    }
-
      if (dumpDotFile) {
          std::stringstream stream(std::stringstream::out);
          stream << "hetero_subgraphs_" << network.getName() << ".dot";
@@ -262,6 +241,8 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw
          networkStats = nullptr;
      }
  
+    std::vector<NetworkDesc> descs;
+    std::vector<CNNLayerPtr> tempLayers;
      for (auto &&subgraph : subgraphs) {
          auto affinity = (*subgraph.begin())->affinity;
          tempLayers.assign(subgraph.begin(), subgraph.end());
@@ -289,11 +270,9 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw
                  inp->second->getPreProcess() = it.second->getPreProcess();
              }
          }
+
          // go over all inputs/outputs and right now
          // set precision for intermediate data (not for external) to FP32
-        // later on we have to add Plugin::getPreferableInputPrecision(network) and
-        // Plugin::getPreferableOutputPrecision(network) and set precision based on this info
-        // TODO(amalyshe) add clever selectino of precision for intermediate blobs
          for (auto &&it : clonedInputs) {
              if (externalInputsData.find(it.first) == externalInputsData.end()) {
                  it.second->setPrecision(Precision::FP32);
@@ -327,36 +306,24 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::ICNNNetw
              }));
  
          auto cfg = _config;
-        cfg[PluginConfigInternalParams::KEY_SUBNETWORK_WITH_NETWORK_INPUTS] = isInputSubnetwork
-                                                                              ? CONFIG_VALUE(YES)
-                                                                              : CONFIG_VALUE(NO);
-        IE_SUPPRESS_DEPRECATED_START
-        auto plugin = _plugin->_plugins[d._device];
-        d._network = plugin._ref.LoadNetwork(d._clonedNetwork, Engine::GetSupportedConfig(plugin._config, cfg, plugin._ref));
-        IE_SUPPRESS_DEPRECATED_END
-    }
-
-    networks = std::move(descs);
-}
+        cfg[PluginConfigInternalParams::KEY_SUBNETWORK_WITH_NETWORK_INPUTS] =
+            isInputSubnetwork ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
  
-namespace  {
+        auto deviceName = d._device;
+        auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, cfg);
+        assert(metaDevices.size() == 1);
  
-IE_SUPPRESS_DEPRECATED_START
-IInferencePluginAPI * getInferencePluginAPIInterface(IInferencePlugin * iplugin) {
-    return dynamic_cast<IInferencePluginAPI *>(iplugin);
-}
+        auto loadConfig = metaDevices[deviceName];
+        d._network = _heteroPlugin->GetCore()->LoadNetwork(d._clonedNetwork, deviceName, loadConfig);
+    }
  
-IInferencePluginAPI * getInferencePluginAPIInterface(InferenceEnginePluginPtr iplugin) {
-    return getInferencePluginAPIInterface(static_cast<IInferencePlugin *>(iplugin.operator->()));
+    networks = std::move(descs);
  }
-IE_SUPPRESS_DEPRECATED_END
-
-}  // namespace
  
  HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&                               heteroModel,
                                                   const std::map<std::string, std::string>&   configs,
-                                                 Engine*                                     plugin) :
-    _plugin(plugin) {
+                                                 Engine*                                     heteroPlugin) :
+    _heteroPlugin(heteroPlugin) {
      std::string heteroXmlStr;
      std::getline(heteroModel, heteroXmlStr);
  
@@ -400,26 +367,17 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
      pugi::xml_node subnetworksNode = heteroNode.child("subnetworks");
      for (auto subnetworkNode = subnetworksNode.child("subnetwork"); !subnetworkNode.empty();
              subnetworkNode = subnetworkNode.next_sibling("subnetwork")) {
-        auto device = GetStrAttr(subnetworkNode, "device");
-        _affinities.push_back(device);
-
-        if (_plugin->_plugins.end() == _plugin->_plugins.find(device)) {
-            IE_SUPPRESS_DEPRECATED_START
-            _plugin->_plugins[device] = _plugin->GetDevicePlugin(device);
-            IE_SUPPRESS_DEPRECATED_END
-        }
+        auto deviceName = GetStrAttr(subnetworkNode, "device");
  
-        auto& plugin = _plugin->_plugins[device];
-        auto supportedConfig = Engine::GetSupportedConfig(plugin._config, importedConfigs, plugin._ref);
-        IE_SUPPRESS_DEPRECATED_START
-        auto pluginAPI = getInferencePluginAPIInterface(plugin._ref);
-        IE_SUPPRESS_DEPRECATED_END
+        auto metaDevices = _heteroPlugin->GetDevicePlugins(deviceName, importedConfigs);
+        assert(metaDevices.size() == 1);
+        auto& loadConfig = metaDevices[deviceName];
  
          InferenceEngine::ExecutableNetwork executableNetwork;
          CNNNetwork cnnnetwork;
          bool loaded = false;
          try {
-            executableNetwork = pluginAPI->ImportNetwork(heteroModel, supportedConfig);
+            executableNetwork = _heteroPlugin->GetCore()->ImportNetwork(heteroModel, deviceName, loadConfig);
          } catch(InferenceEngine::details::InferenceEngineException& ie_ex) {
              if (std::string::npos != std::string{ie_ex.what()}.find(NOT_IMPLEMENTED_str)) {
                  // read XML content
@@ -439,7 +397,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
                      heteroModel.read(dataBlob->buffer(), dataSize);
                  }
  
-                cnnnetwork = _plugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
+                cnnnetwork = _heteroPlugin->GetCore()->ReadNetwork(xmlString, std::move(dataBlob));
                  auto inputs = cnnnetwork.getInputsInfo();
                  auto inputsNode = subnetworkNode.child("inputs");
                  for (auto inputNode = inputsNode.child("input"); !inputNode.empty(); inputNode = inputNode.next_sibling("input")) {
@@ -455,9 +413,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
                  for (auto outputNode = outputsNode.child("output"); !outputNode.empty(); outputNode = outputNode.next_sibling("output")) {
                      outputs[GetStrAttr(outputNode, "name")]->setPrecision(Precision::FromStr(GetStrAttr(outputNode, "precision")));
                  }
-                IE_SUPPRESS_DEPRECATED_START
-                executableNetwork = plugin._ref.LoadNetwork(cnnnetwork, supportedConfig);
-                IE_SUPPRESS_DEPRECATED_END
+                executableNetwork = _heteroPlugin->GetCore()->LoadNetwork(cnnnetwork, deviceName, loadConfig);
                  loaded = true;
              } else {
                  throw;
@@ -477,7 +433,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(std::istream&
          }
  
          descs.emplace_back(NetworkDesc{
-            device,
+            deviceName,
              loaded ? CNNNetwork{cloneNet(static_cast<InferenceEngine::ICNNNetwork&>(cnnnetwork))} : CNNNetwork{},
              executableNetwork,
          });
@@ -578,10 +534,10 @@ void HeteroExecutableNetwork::CreateInferRequest(IInferRequest::Ptr &asyncReques
      auto heteroInferRequest = std::dynamic_pointer_cast<HeteroInferRequest>(
              CreateInferRequestImpl(_networkInputs, _networkOutputs));
      heteroInferRequest->setPointerToExecutableNetworkInternal(shared_from_this());
-    auto asyncTreadSafeImpl = std::make_shared<HeteroAsyncInferRequest>(heteroInferRequest, _taskExecutor, _callbackExecutor);
-    asyncRequest.reset(new InferRequestBase<HeteroAsyncInferRequest>(asyncTreadSafeImpl),
+    auto asyncThreadSafeImpl = std::make_shared<HeteroAsyncInferRequest>(heteroInferRequest, _taskExecutor, _callbackExecutor);
+    asyncRequest.reset(new InferRequestBase<HeteroAsyncInferRequest>(asyncThreadSafeImpl),
                         [](IInferRequest *p) { p->Release(); });
-    asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
+    asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
  }
  
  void HeteroExecutableNetwork::GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *) const {
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp

index a27570a..736cdd1 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
@@ -39,7 +39,7 @@ public:
      /**
      * @brief constructor
      */
-    HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork&               network,
+    HeteroExecutableNetwork(const InferenceEngine::ICNNNetwork&         network,
                              const std::map<std::string, std::string>&   config,
                              Engine*                                     plugin);
  
@@ -71,9 +71,8 @@ private:
      };
      std::vector<NetworkDesc> networks;
  
-    Engine*                             _plugin;
+    Engine*                             _heteroPlugin;
      std::string                         _name;
-    std::vector<std::string>            _affinities;
      std::map<std::string, std::string>  _config;
  };
  
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.cpp b/inference-engine/src/hetero_plugin/hetero_plugin.cpp

index 953fa90..0e7d4b9 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_plugin.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
@@ -3,9 +3,7 @@
  //
  
  #include "ie_metric_helpers.hpp"
-#include "ie_plugin_dispatcher.hpp"
  #include "hetero_plugin.hpp"
-#include "ie_util_internal.hpp"
  #include <memory>
  #include <vector>
  #include <map>
@@ -17,7 +15,6 @@
  #include "hetero/hetero_plugin_config.hpp"
  #include <cpp_interfaces/base/ie_plugin_base.hpp>
  #include "hetero_executable_network.hpp"
-#include "cpp_interfaces/base/ie_inference_plugin_api.hpp"
  
  using namespace InferenceEngine;
  using namespace InferenceEngine::PluginConfigParams;
@@ -31,163 +28,88 @@ static Version heteroPluginDescription = {
          "heteroPlugin"  // plugin description message
  };
  
-void Engine::GetVersion(const Version *&versionInfo)noexcept {
-    versionInfo = &heteroPluginDescription;
-}
-
  Engine::Engine() {
      _pluginName = "HETERO";
-    _config[InferenceEngine::PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = "YES";
+    _config[KEY_EXCLUSIVE_ASYNC_REQUESTS] = YES;
      _config[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)] = NO;
  }
  
-InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const ICore*                     /*core*/,
-                                                                           const InferenceEngine::ICNNNetwork&    network,
+namespace {
+
+Engine::Configs mergeConfigs(Engine::Configs config, const Engine::Configs & local) {
+    for (auto && kvp : local) {
+        config[kvp.first] = kvp.second;
+    }
+    return config;
+}
+
+}  // namespace
+
+InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork&    network,
                                                                             const Configs&                   config) {
-    // TODO(amalyshe) do we need here verification of input precisions?
-    Configs tconfig;
-    tconfig = config;
-
-    // we must not override the parameter, but need to copy everything from plugin config
-    for (auto && c : _config) {
-        if (tconfig.find(c.first) == tconfig.end()) {
-            tconfig[c.first] = c.second;
-        }
+    if (GetCore() == nullptr) {
+        THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
      }
  
-    return std::make_shared<HeteroExecutableNetwork>(*cloneNet(network), tconfig, this);
+    return std::make_shared<HeteroExecutableNetwork>(*cloneNet(network), mergeConfigs(_config, config), this);
  }
  
  ExecutableNetwork Engine::ImportNetworkImpl(std::istream& heteroModel, const Configs& config) {
-    Configs tconfig;
-    tconfig = config;
-
-    // we must not override the parameter, but need to copy everything from plugin config
-    for (auto && c : _config) {
-        if (tconfig.find(c.first) == tconfig.end()) {
-            tconfig[c.first] = c.second;
-        }
+    if (GetCore() == nullptr) {
+        THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
      }
  
      IExecutableNetwork::Ptr executableNetwork;
-    // Use config provided by an user ignoring default config
      executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(
-                                std::make_shared<HeteroExecutableNetwork>(heteroModel, tconfig, this)),
+                                std::make_shared<HeteroExecutableNetwork>(heteroModel, mergeConfigs(_config, config), this)),
                              [](InferenceEngine::details::IRelease *p) {p->Release();});
  
      return ExecutableNetwork{executableNetwork};
  }
  
-namespace  {
-
-IE_SUPPRESS_DEPRECATED_START
-
-IInferencePluginAPI * getInferencePluginAPIInterface(IInferencePlugin * iplugin) {
-    return dynamic_cast<IInferencePluginAPI *>(iplugin);
-}
-
-IInferencePluginAPI * getInferencePluginAPIInterface(InferenceEnginePluginPtr iplugin) {
-    return getInferencePluginAPIInterface(static_cast<IInferencePlugin *>(iplugin.operator->()));
-}
-
-IInferencePluginAPI * getInferencePluginAPIInterface(InferencePlugin plugin) {
-    return getInferencePluginAPIInterface(static_cast<InferenceEnginePluginPtr>(plugin));
-}
-
-}  // namespace
-
-Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& globalConfig,
-                                           const Engine::Configs& localConfig,
-                                           const InferenceEngine::InferencePlugin& plugin) {
-    auto pluginApi = getInferencePluginAPIInterface(plugin);
-    std::vector<std::string> supportedConfigKeys = pluginApi->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
+Engine::Configs Engine::GetSupportedConfig(const Engine::Configs& config, const std::string & deviceName) const {
+    std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
      Engine::Configs supportedConfig;
      for (auto&& key : supportedConfigKeys) {
-        auto itKey = localConfig.find(key);
-        if (localConfig.end() != itKey) {
+        auto itKey = config.find(key);
+        if (config.end() != itKey) {
              supportedConfig[key] = itKey->second;
-        } else {
-            itKey = globalConfig.find(key);
-            if (globalConfig.end() != itKey) {
-                supportedConfig[key] = itKey->second;
-            }
          }
      }
      return supportedConfig;
  }
  
-Engine::PluginEntry Engine::GetDevicePlugin(const std::string& deviceWithID) const {
-    InferenceEngine::InferencePlugin plugin;
-    DeviceIDParser deviceParser(deviceWithID);
-    std::string deviceName = deviceParser.getDeviceName();
-
-    if (nullptr == _core) {
-        IE_SUPPRESS_DEPRECATED_START
-        // try to create plugin
-        PluginDispatcher dispatcher({file_name_t()});
-        plugin = dispatcher.getPluginByDevice(deviceName);
-        IE_SUPPRESS_DEPRECATED_END
-    } else {
-        plugin = InferencePlugin{_core->GetPluginByName(deviceName)};
-    }
-
-    try {
-        for (auto&& ext : _extensions) {
-            plugin.AddExtension(ext);
+Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback,
+                                                          const Configs & localConfig) const {
+    auto getDeviceConfig = [&](const std::string & deviceWithID) {
+        DeviceIDParser deviceParser(deviceWithID);
+        std::string deviceName = deviceParser.getDeviceName();
+        Configs tconfig = mergeConfigs(_config, localConfig);
+
+        // set device ID if any
+        std::string deviceIDLocal = deviceParser.getDeviceID();
+        if (!deviceIDLocal.empty()) {
+            tconfig[KEY_DEVICE_ID] = deviceIDLocal;
          }
-    } catch (InferenceEngine::details::InferenceEngineException &) {}
-
-    Configs pluginConfig = GetSupportedConfig(_config, {}, plugin);
-
-    // set device ID if any
-    std::string deviceIDLocal = deviceParser.getDeviceID();
-    if (!deviceIDLocal.empty()) {
-        pluginConfig = GetSupportedConfig(pluginConfig, { { KEY_DEVICE_ID, deviceIDLocal } }, plugin);
-    }
-
-    return { plugin, pluginConfig };
-}
  
-IE_SUPPRESS_DEPRECATED_END
+        return GetSupportedConfig(tconfig, deviceName);
+    };
  
-Engine::Plugins Engine::GetDevicePlugins(const std::string& targetFallback) const {
-    auto devices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
-    Engine::Plugins plugins = _plugins;
-    for (auto&& device : devices) {
-        auto itPlugin = plugins.find(device);
-        if (plugins.end() == itPlugin) {
-            IE_SUPPRESS_DEPRECATED_START
-            plugins[device] = GetDevicePlugin(device);
-            IE_SUPPRESS_DEPRECATED_END
+    auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback);
+    Engine::DeviceMetaInformationMap metaDevices;
+    for (auto&& deviceName : fallbackDevices) {
+        auto itPlugin = metaDevices.find(deviceName);
+        if (metaDevices.end() == itPlugin) {
+            metaDevices[deviceName] = getDeviceConfig(deviceName);
          }
      }
-    return plugins;
-}
-
-Engine::Plugins Engine::GetDevicePlugins(const std::string& targetFallback) {
-    _plugins = const_cast<const Engine*>(this)->GetDevicePlugins(targetFallback);
-    return _plugins;
+    return metaDevices;
  }
  
  void Engine::SetConfig(const Configs &configs) {
      for (auto&& config : configs) {
          _config[config.first] = config.second;
      }
-
-    for (auto&& plugin : _plugins) {
-        plugin.second._config = GetSupportedConfig(plugin.second._config, configs, plugin.second._ref);
-    }
-}
-
-void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) {
-    _extensions.emplace_back(extension);
-    try {
-        for (auto&& plugin : _plugins) {
-            IE_SUPPRESS_DEPRECATED_START
-            plugin.second._ref.AddExtension(extension);
-            IE_SUPPRESS_DEPRECATED_END
-        }
-    } catch (InferenceEngine::details::InferenceEngineException &) {}
  }
  
  HeteroLayerColorer::HeteroLayerColorer(const std::vector<std::string>& devices) {
@@ -206,19 +128,8 @@ void HeteroLayerColorer::operator()(const CNNLayerPtr layer,
  }
  
  void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network, const Configs &config) {
-    Configs tconfig = _config;
-    for (auto && value : config) {
-        tconfig[value.first] = value.second;
-    }
-
-    auto it = tconfig.find("TARGET_FALLBACK");
-    if (it == tconfig.end()) {
-        THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
-    }
-
-    GetDevicePlugins(it->second);
      QueryNetworkResult qr;
-    QueryNetwork(network, tconfig, qr);
+    QueryNetwork(network, config, qr);
  
      details::CNNNetworkIterator i(&network);
      while (i != details::CNNNetworkIterator()) {
@@ -230,7 +141,12 @@ void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network, const Configs &c
          i++;
      }
  
-    if (YES == tconfig[HETERO_CONFIG_KEY(DUMP_GRAPH_DOT)]) {
+    auto dumpDot = [](const Configs & config) {
+        auto it = config.find(HETERO_CONFIG_KEY(DUMP_GRAPH_DOT));
+        return it != config.end() ? it->second == YES : false;
+    };
+
+    if (dumpDot(config) || dumpDot(_config)) {
          std::unordered_set<std::string> devicesSet;
          details::CNNNetworkIterator i(&network);
          while (i != details::CNNNetworkIterator()) {
@@ -245,52 +161,49 @@ void Engine::SetAffinity(InferenceEngine::ICNNNetwork &network, const Configs &c
          stream << "hetero_affinity_" << network.getName() << ".dot";
  
          std::ofstream file(stream.str());
-
          saveGraphToDot(network, file, HeteroLayerColorer{devices});
      }
  }
  
  void Engine::QueryNetwork(const ICNNNetwork &network, const Configs& config, QueryNetworkResult &qr) const {
-    auto it = config.find("TARGET_FALLBACK");
-    if (it == config.end()) {
-        it = _config.find("TARGET_FALLBACK");
-
-        if (it == _config.end()) {
-            THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
-        }
+    if (GetCore() == nullptr) {
+        THROW_IE_EXCEPTION << "Please, work with HETERO device via InferencEngine::Core object";
      }
  
-    Plugins plugins = GetDevicePlugins(it->second);
+    auto tconfig = mergeConfigs(_config, config);
+    auto it = tconfig.find("TARGET_FALLBACK");
+    if (it == tconfig.end()) {
+        THROW_IE_EXCEPTION << "The 'TARGET_FALLBACK' option was not defined for heterogeneous plugin";
+    }
  
-    qr.rc = StatusCode::OK;
+    std::string fallbackDevicesStr = it->second;
+    DeviceMetaInformationMap metaDevices = GetDevicePlugins(fallbackDevicesStr, tconfig);
  
      std::map<std::string, QueryNetworkResult> queryResults;
-    // go over devices, create appropriate plugins and
-    for (auto&& value : plugins) {
-        auto& device = value.first;
-        auto& plugin = value.second;
-        QueryNetworkResult r;
-        IE_SUPPRESS_DEPRECATED_START
-        plugin._ref.QueryNetwork(network, GetSupportedConfig(plugin._config, config, plugin._ref), r);
-        IE_SUPPRESS_DEPRECATED_END
-        queryResults[device] = r;
+    // go over devices and call query network
+    for (auto&& metaDevice : metaDevices) {
+        auto& deviceName = metaDevice.first;
+        queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, metaDevice.second);
      }
  
      //  WARNING: Here is devices with user set priority
-    auto falbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(it->second);
+    auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(fallbackDevicesStr);
  
      details::CNNNetworkIterator i(&network);
      while (i != details::CNNNetworkIterator()) {
          CNNLayer::Ptr layer = *i;
-        for (auto&& device : falbackDevices) {
-            auto& deviceQueryResult = queryResults[device];
+        for (auto&& deviceName : fallbackDevices) {
+            auto& deviceQueryResult = queryResults[deviceName];
              if (deviceQueryResult.supportedLayersMap.find(layer->name) != deviceQueryResult.supportedLayersMap.end()) {
-                qr.supportedLayersMap[layer->name] = device;
+                qr.supportedLayersMap[layer->name] = deviceName;
                  break;
              }
          }
          i++;
      }
+
+    // set OK status
+    qr.rc = StatusCode::OK;
  }
  
  Parameter Engine::GetMetric(const std::string& name, const std::map<std::string, Parameter> & /*options*/) const {
@@ -317,6 +230,13 @@ Parameter Engine::GetConfig(const std::string& name, const std::map<std::string,
          IE_ASSERT(it != _config.end());
          bool dump = it->second == YES;
          return { dump };
+    } else if (name == "TARGET_FALLBACK") {
+        auto it = _config.find("TARGET_FALLBACK");
+        if (it == _config.end()) {
+            THROW_IE_EXCEPTION << "Value for TARGET_FALLBACK is not set";
+        } else {
+            return { it->second };
+        }
      } else {
          THROW_IE_EXCEPTION << "Unsupported config key: " << name;
      }
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.hpp b/inference-engine/src/hetero_plugin/hetero_plugin.hpp

index 030dc76..9b6cbd6 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_plugin.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.hpp
@@ -21,55 +21,34 @@ namespace HeteroPlugin {
  class Engine : public InferenceEngine::InferencePluginInternal {
  public:
      using Configs = std::map<std::string, std::string>;
-
-    struct PluginEntry {
-        IE_SUPPRESS_DEPRECATED_START
-        InferenceEngine::InferencePlugin _ref;
-        IE_SUPPRESS_DEPRECATED_END
-        Configs                          _config;
-    };
-
-    using Plugins = std::unordered_map<std::string, PluginEntry >;
-
-    using Devices = std::vector<std::string>;
+    using DeviceMetaInformationMap = std::unordered_map<std::string, Configs>;
  
      Engine();
  
-    void GetVersion(const InferenceEngine::Version *&versionInfo) noexcept;
-
      InferenceEngine::ExecutableNetworkInternal::Ptr
-    LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network, const Configs &config) override;
-    void SetConfig(const Configs &config) override;
-
-    void SetAffinity(InferenceEngine::ICNNNetwork& network, const Configs &config);
+    LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const Configs &config) override;
  
-    void AddExtension(InferenceEngine::IExtensionPtr extension)override;
+    void SetConfig(const Configs &config) override;
  
      void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
                        const Configs& config, InferenceEngine::QueryNetworkResult &res) const override;
  
-    InferenceEngine::Parameter GetMetric(const std::string& name,
-        const std::map<std::string, InferenceEngine::Parameter> & options) const override;
-
-    InferenceEngine::Parameter GetConfig(const std::string& name,
-        const std::map<std::string, InferenceEngine::Parameter> & options) const override;
-
-    IE_SUPPRESS_DEPRECATED_START
-
-    PluginEntry GetDevicePlugin(const std::string& device) const;
+    InferenceEngine::Parameter GetMetric(const std::string& name, const std::map<std::string,
+                                         InferenceEngine::Parameter> & options) const override;
  
-    static Configs GetSupportedConfig(const Configs& globalConfig, const Configs& localConfig, const InferenceEngine::InferencePlugin& plugin);
+    InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string,
+                                         InferenceEngine::Parameter> & options) const override;
  
-    IE_SUPPRESS_DEPRECATED_END
+    ExecutableNetwork ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
  
-    Plugins GetDevicePlugins(const std::string& targetFallback);
  
-    Plugins GetDevicePlugins(const std::string& targetFallback) const;
+    void SetAffinity(InferenceEngine::ICNNNetwork& network, const Configs &config);
  
-    ExecutableNetwork ImportNetworkImpl(std::istream& heteroModel, const Configs& config) override;
+    DeviceMetaInformationMap GetDevicePlugins(const std::string& targetFallback,
+        const Configs & localConfig) const;
  
-    Plugins                                     _plugins;
-    std::vector<InferenceEngine::IExtensionPtr> _extensions;
+private:
+    Configs GetSupportedConfig(const Configs& config, const std::string & deviceName) const;
  };
  
  struct HeteroLayerColorer {
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt

index 832d69c..091e68f 100644 (file)
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -131,10 +131,6 @@ if(ENABLE_PROFILING_ITT AND INTEL_ITT_LIBS)
      target_compile_definitions(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:ittnotify,INTERFACE_COMPILE_DEFINITIONS>)
  endif()
  
-if(ENABLE_IR_READER)
-    target_compile_definitions(${TARGET_NAME}_obj PRIVATE ENABLE_IR_READER)
-endif()
-
  target_include_directories(${TARGET_NAME}_obj PRIVATE $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
  
  if(ENABLE_MKL_DNN)
@@ -172,7 +168,7 @@ endif()
  target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
  
  ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
-                    POSSIBLE_PLUGINS HeteroPlugin clDNNPlugin GNAPlugin MKLDNNPlugin myriadPlugin)
+                    POSSIBLE_PLUGINS MultiDevicePlugin HeteroPlugin clDNNPlugin GNAPlugin MKLDNNPlugin myriadPlugin)
  
  # Static library used for unit tests which are always built
  
diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp

index 9b460bc..1619098 100644 (file)
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
@@ -23,6 +23,7 @@
  #include <transformations/common_optimizations/common_optimizations.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.hpp>
  
  #include "ngraph_ops/eltwise.hpp"
@@ -76,6 +77,10 @@ static std::shared_ptr<ngraph::Function> copyFunction(const std::shared_ptr<cons
  
  // WA: for cnnNetwork ngraph constructor
  CNNNetwork::CNNNetwork(const std::shared_ptr<const ngraph::Function>& graph) {
+    if (graph == nullptr) {
+        THROW_IE_EXCEPTION << "CNNNetwork was not initialized: 'graph' object is empty";
+    }
+
      // Copy nGraph function
      network = std::make_shared<CNNNetworkNGraphImpl>(copyFunction(graph, false, {}));
      actual = network.get();
@@ -141,8 +146,10 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr<Function>& nGra
          keep_input_info(*this, ptr);
      }
      for (auto& output : _outputData) {
-        // Convert precision into native format. Be consistent with possible convertation to CNNNetwork later.
-        if (output.second->getPrecision() != Precision::FP32 &&
+        // Convert precision into native format. Be consistent with possible conversion to CNNNetwork later.
+        if (output.second->getPrecision() == Precision::I64) {
+            output.second->setPrecision(Precision::I32);
+        } else if (output.second->getPrecision() != Precision::FP32 &&
              output.second->getPrecision() != Precision::I32) {
              output.second->setPrecision(Precision::FP32);
          }
@@ -456,6 +463,7 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath, const std
          ::ngraph::op::GenericIE::DisableReshape noReshape(graph);
  
          ::ngraph::pass::CommonOptimizations().run_on_function(graph);
+        ::ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(graph);
          ::ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(graph);
          ::ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(graph);
          network = InferenceEngine::details::convertFunctionToICNNNetwork(graph, *this);
@@ -519,6 +527,7 @@ void CNNNetworkNGraphImpl::convertToCNNNetworkImpl() {
      ::ngraph::op::GenericIE::DisableReshape noReshape(graph);
  
      ::ngraph::pass::CommonOptimizations().run_on_function(graph);
+    ::ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(graph);
      ::ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(graph);
      ::ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(graph);
      cnnNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, *this);
diff --git a/inference-engine/src/inference_engine/dll_main.hpp b/inference-engine/src/inference_engine/dll_main.hpp

deleted file mode 100644 (file)

index d433e13..0000000
--- a/inference-engine/src/inference_engine/dll_main.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#ifdef _WIN32
-#define _WINSOCKAPI_
-#include <windows.h>
-
-BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) {
-    switch (ul_reason_for_call) {
-    case DLL_PROCESS_ATTACH:
-    case DLL_THREAD_ATTACH:
-    case DLL_THREAD_DETACH:
-    case DLL_PROCESS_DETACH:
-        break;
-    }
-    return TRUE;
-}
-
-#endif
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp

index cfb68ab..7508696 100644 (file)
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -14,6 +14,7 @@
  #include <string>
  #include <utility>
  #include <vector>
+#include <mutex>
  
  #include <ngraph/opsets/opset.hpp>
  #include "cpp/ie_cnn_net_reader.h"
@@ -70,6 +71,62 @@ IInferencePluginAPI* getInferencePluginAPIInterface(InferencePlugin plugin) {
      return getInferencePluginAPIInterface(static_cast<InferenceEnginePluginPtr>(plugin));
  }
  
+template <typename T>
+struct Parsed {
+    std::string _deviceName;
+    std::map<std::string, T> _config;
+};
+
+template <typename T = Parameter>
+Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::map<std::string, T>& config = {}) {
+    auto config_ = config;
+    auto deviceName_ = deviceName;
+    if (deviceName_.find("HETERO:") == 0) {
+        deviceName_ = "HETERO";
+        config_["TARGET_FALLBACK"] = deviceName.substr(7);
+    } else if (deviceName_.find("MULTI:") == 0) {
+        deviceName_ = "MULTI";
+        config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
+    } else {
+        DeviceIDParser parser(deviceName_);
+        deviceName_ = parser.getDeviceName();
+        std::string deviceIDLocal = parser.getDeviceID();
+
+        if (!deviceIDLocal.empty()) {
+            config_[KEY_DEVICE_ID] = deviceIDLocal;
+        }
+    }
+    return {deviceName_, config_};
+}
+
+Parameter copyParameterValue(const Parameter & value) {
+    if (value.is<bool>()) {
+        return { value.as<bool>() };
+    } else if (value.is<int>()) {
+        return { value.as<int>() };
+    } else if (value.is<unsigned int>()) {
+        return { value.as<unsigned int>() };
+    } else if (value.is<float>()) {
+        return { value.as<float>() };
+    } else if (value.is<std::string>()) {
+        return { value.as<std::string>() };
+    } else if (value.is<std::vector<std::string> >()) {
+        return { value.as<std::vector<std::string> >() };
+    } else if (value.is<std::vector<int> >()) {
+        return { value.as<std::vector<int> >() };
+    } else if (value.is<std::vector<float> >()) {
+        return { value.as<std::vector<float> >() };
+    } else if (value.is<std::vector<unsigned int> >()) {
+        return { value.as<std::vector<unsigned int> >() };
+    } else if (value.is<std::tuple<unsigned int, unsigned int, unsigned int> >()) {
+        return { value.as<std::tuple<unsigned int, unsigned int, unsigned int> >() };
+    } else if (value.is<std::tuple<unsigned int, unsigned int> >()) {
+        return { value.as<std::tuple<unsigned int, unsigned int> >() };
+    }
+
+    return std::move(value);
+}
+
  }  // namespace
  
  CNNNetReaderPtr CreateCNNNetReaderPtr() noexcept {
@@ -151,7 +208,8 @@ class Core::Impl : public ICore {
      };
  
      /**
-     * Hold original blob in order to avoid situations when original blob is allocated on stack
+     * @brief Holds original blob in order to avoid situations
+     *        when original blob is allocated on stack
       */
      class WeightsHolderBlob : public TBlob<uint8_t> {
          Blob::CPtr originBlob;
@@ -167,6 +225,7 @@ class Core::Impl : public ICore {
      std::vector<IExtensionPtr> extensions;
  
      std::map<std::string, PluginDescriptor> pluginRegistry;
+    mutable std::mutex pluginsMutex;  // to lock parallel access to pluginRegistry and plugins
  
  public:
      Impl();
@@ -174,9 +233,11 @@ public:
  
      /**
       * @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE path
-     * @param xmlConfigFile - an .xml configuraion with device / plugin information
+     * @param xmlConfigFile An .xml configuraion with device / plugin information
       */
      void RegisterPluginsInRegistry(const std::string& xmlConfigFile) {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          auto parse_result = ParseXml(xmlConfigFile.c_str());
          if (!parse_result.error_msg.empty()) {
              THROW_IE_EXCEPTION << parse_result.error_msg;
@@ -256,7 +317,8 @@ public:
          StatusCode rt = cnnReader->ReadNetwork(modelPath.c_str(), &desc);
          if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
          if (cnnReader->getVersion(&desc) >= 10) {
-            cnnReader->addExtensions(getExtensions());
+            std::lock_guard<std::mutex> lock(pluginsMutex);
+            cnnReader->addExtensions(GetExtensions());
          }
          std::string bPath = binPath;
          if (bPath.empty()) {
@@ -289,7 +351,8 @@ public:
          StatusCode rt = cnnReader->ReadNetwork(model.data(), model.length(), &desc);
          if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
          if (cnnReader->getVersion(&desc) >= 10) {
-            cnnReader->addExtensions(getExtensions());
+            std::lock_guard<std::mutex> lock(pluginsMutex);
+            cnnReader->addExtensions(GetExtensions());
          }
          TBlob<uint8_t>::Ptr weights_ptr;
          if (weights) {
@@ -302,11 +365,90 @@ public:
          return CNNNetwork(cnnReader);
      }
  
+    ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
+                                  const std::map<std::string, std::string>& config) override {
+        IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
+        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+        IE_SUPPRESS_DEPRECATED_START
+        return GetCPPPluginByName(parsed._deviceName).LoadNetwork(network, parsed._config);
+        IE_SUPPRESS_DEPRECATED_END
+    }
+
      IE_SUPPRESS_DEPRECATED_START
  
+    ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
+                                    const std::map<std::string, std::string>& config) override {
+        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+
+        if (parsed._deviceName.empty()) {
+            ExportMagic magic = {};
+            auto currentPos = networkModel.tellg();
+            networkModel.read(magic.data(), magic.size());
+            auto exportedWithName = (exportMagic == magic);
+            if (exportedWithName) {
+                std::getline(networkModel, parsed._deviceName);
+            }
+            networkModel.seekg(currentPos, networkModel.beg);
+        }
+
+        auto cppPlugin = GetCPPPluginByName(parsed._deviceName);
+        auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
+        if (pluginAPIInterface == nullptr) {
+            THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the ImportNetwork method";
+        }
+
+        return pluginAPIInterface->ImportNetwork(networkModel, parsed._config);
+    }
+
+    QueryNetworkResult QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
+                                    const std::map<std::string, std::string>& config) const override {
+        QueryNetworkResult res;
+        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+        IE_SUPPRESS_DEPRECATED_START
+        GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config, res);
+        IE_SUPPRESS_DEPRECATED_END
+        return res;
+    }
+
+    Parameter GetMetric(const std::string& deviceName, const std::string& name) const override {
+        // HETERO case
+        {
+            if (deviceName.find("HETERO:") == 0) {
+                THROW_IE_EXCEPTION
+                    << "You can get specific metrics with the GetMetric only for the HETERO itself (without devices). "
+                       "To get individual devices's metrics call GetMetric for each device separately";
+            }
+        }
+
+        // MULTI case
+        {
+            if (deviceName.find("MULTI:") == 0) {
+                THROW_IE_EXCEPTION
+                    << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
+                       "To get individual devices's metrics call GetMetric for each device separately";
+            }
+        }
+
+        auto parsed = parseDeviceNameIntoConfig(deviceName);
+        IE_SUPPRESS_DEPRECATED_START
+        InferencePlugin cppPlugin = GetCPPPluginByName(parsed._deviceName);
+        auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
+        IE_SUPPRESS_DEPRECATED_END
+
+        if (pluginAPIInterface == nullptr) {
+            THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetMetric method";
+        }
+
+        // we need to return a copy of Parameter object which is created on Core side,
+        // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
+        // TODO: remove this WA after *-31417 is resolved
+        return copyParameterValue(pluginAPIInterface->GetMetric(name, parsed._config));
+    }
+
      /**
+     * @deprecated Use ICore::LoadNetwork, ICore::QueryNetwork, ICore::GetMetric instead
       * @brief Returns reference to plugin by a device name
-     * @param deviceName - a name of device
+     * @param deviceName A name of device
       * @return Reference to a plugin
       */
      InferenceEnginePluginPtr GetPluginByName(const std::string& deviceName) const override {
@@ -314,11 +456,14 @@ public:
      }
  
      /**
+     * @deprecated
       * @brief Returns reference to CPP plugin wrapper by a device name
-     * @param deviceName - a name of device
+     * @param deviceName A name of device
       * @return Reference to a CPP plugin wrapper
       */
      InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          IE_SUPPRESS_DEPRECATED_START
  
          auto it = pluginRegistry.find(deviceName);
@@ -379,10 +524,11 @@ public:
      IE_SUPPRESS_DEPRECATED_END
  
      /**
-     * @brief Unregisters plugin for specified device
-     * @param deviceName - a name of device
+     * @brief Unload plugin for specified device, but plugin meta-data is still in plugin registry
+     * @param deviceName A name of device
       */
-    void UnregisterPluginByName(const std::string& deviceName) {
+    void UnloadPluginByName(const std::string& deviceName) {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
          auto it = plugins.find(deviceName);
          if (it == plugins.end()) {
              THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
@@ -392,10 +538,12 @@ public:
      }
  
      /**
-     * @brief Registers plugin in registry for specified device
-     * @param deviceName - a name of device
+     * @brief Registers plugin meta-data in registry for specified device
+     * @param deviceName A name of device
       */
      void RegisterPluginByName(const std::string& pluginName, const std::string& deviceName) {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          auto it = pluginRegistry.find(deviceName);
          if (it != pluginRegistry.end()) {
              THROW_IE_EXCEPTION << "Device with \"" << deviceName << "\"  is already registered in the InferenceEngine";
@@ -418,7 +566,13 @@ public:
          pluginRegistry[deviceName] = desc;
      }
  
+    /**
+     * @brief Porvides a list of plugin names in registry; physically such plugins may not be created
+     * @return A list of plugin names
+     */
      std::vector<std::string> GetListOfDevicesInRegistry() const {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          std::vector<std::string> listOfDevices;
          for (auto&& pluginDesc : pluginRegistry) {
              listOfDevices.push_back(pluginDesc.first);
@@ -427,7 +581,14 @@ public:
          return listOfDevices;
      }
  
+    /**
+     * @brief Sets config values for a plugin or set of plugins
+     * @param deviceName A device name to set config to
+     *        If empty, config is set for all the plugins / plugin's meta-data
+     */
      void SetConfigForPlugins(const std::map<std::string, std::string>& config, const std::string& deviceName) {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          // set config for plugins in registry
          bool configIsSet = false;
          for (auto& desc : pluginRegistry) {
@@ -453,7 +614,13 @@ public:
          }
      }
  
-    void addExtension(const IExtensionPtr& extension) {
+    /**
+     * @brief Registers the extension in a Core object
+     *        Such extensions can be used for both CNNNetwork readers and device plugins
+     */
+    void AddExtension(const IExtensionPtr& extension) {
+        std::lock_guard<std::mutex> lock(pluginsMutex);
+
          std::map<std::string, ngraph::OpSet> opsets = extension->getOpSets();
          for (const auto& it : opsets) {
              if (opsetNames.find(it.first) != opsetNames.end())
@@ -461,6 +628,7 @@ public:
              opsetNames.insert(it.first);
          }
  
+        // add extensions for already created plugins
          for (auto& plugin : plugins) {
              IE_SUPPRESS_DEPRECATED_START
              try {
@@ -471,7 +639,11 @@ public:
          extensions.emplace_back(extension);
      }
  
-    const std::vector<IExtensionPtr>& getExtensions() const {
+    /**
+     * @brief Provides a list of extensions
+     * @return A list of registered extensions
+     */
+    const std::vector<IExtensionPtr>& GetExtensions() const {
          return extensions;
      }
  };
@@ -525,7 +697,8 @@ std::map<std::string, Version> Core::GetVersions(const std::string& deviceName)
          std::string deviceNameLocal = parser.getDeviceName();
  
          IE_SUPPRESS_DEPRECATED_START
-        const Version* version = _impl->GetCPPPluginByName(deviceNameLocal).GetVersion();
+        InferenceEngine::InferencePlugin cppPlugin = _impl->GetCPPPluginByName(deviceNameLocal);
+        const Version * version = cppPlugin.GetVersion();
          IE_SUPPRESS_DEPRECATED_END
          versions[deviceNameLocal] = *version;
      }
@@ -538,36 +711,6 @@ void Core::SetLogCallback(IErrorListener&) const {
  }
  IE_SUPPRESS_DEPRECATED_END
  
-namespace {
-template <typename T>
-struct Parsed {
-    std::string _deviceName;
-    std::map<std::string, T> _config;
-};
-
-template <typename T = Parameter>
-Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::map<std::string, T>& config = {}) {
-    auto config_ = config;
-    auto deviceName_ = deviceName;
-    if (deviceName_.find("HETERO:") == 0) {
-        deviceName_ = "HETERO";
-        config_["TARGET_FALLBACK"] = deviceName.substr(7);
-    } else if (deviceName_.find("MULTI:") == 0) {
-        deviceName_ = "MULTI";
-        config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
-    } else {
-        DeviceIDParser parser(deviceName_);
-        deviceName_ = parser.getDeviceName();
-        std::string deviceIDLocal = parser.getDeviceID();
-
-        if (!deviceIDLocal.empty()) {
-            config_[KEY_DEVICE_ID] = deviceIDLocal;
-        }
-    }
-    return {deviceName_, config_};
-}
-}  //  namespace
-
  CNNNetwork Core::ReadNetwork(const std::string& modelPath, const std::string& binPath) const {
      return _impl->ReadNetwork(modelPath, binPath);
  }
@@ -576,20 +719,16 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights
      return _impl->ReadNetwork(model, weights);
  }
  
-ExecutableNetwork Core::LoadNetwork(const CNNNetwork network, const std::string& deviceName,
+ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
                                      const std::map<std::string, std::string>& config) {
-    IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
-    auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-    IE_SUPPRESS_DEPRECATED_START
-    return _impl->GetCPPPluginByName(parsed._deviceName).LoadNetwork(network, parsed._config);
-    IE_SUPPRESS_DEPRECATED_END
+    return _impl->LoadNetwork(network, deviceName, config);
  }
  
  void Core::AddExtension(const IExtensionPtr& extension) {
-    _impl->addExtension(extension);
+    _impl->AddExtension(extension);
  }
  
-ExecutableNetwork Core::LoadNetwork(const CNNNetwork network, RemoteContext::Ptr context,
+ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context,
                                      const std::map<std::string, std::string>& config) {
      IE_PROFILING_AUTO_SCOPE(Core::LoadNetwork)
      std::map<std::string, std::string> config_ = config;
@@ -603,7 +742,8 @@ ExecutableNetwork Core::LoadNetwork(const CNNNetwork network, RemoteContext::Ptr
      std::string deviceName = device.getDeviceName();
  
      IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+    auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+    auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
  
      if (pluginAPIInterface == nullptr) {
          THROW_IE_EXCEPTION << deviceName << " does not implement the LoadNetwork method";
@@ -625,7 +765,8 @@ RemoteContext::Ptr Core::CreateContext(const std::string& deviceName_, const Par
      std::string deviceName = device.getDeviceName();
  
      IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+    auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+    auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
  
      if (pluginAPIInterface == nullptr) {
          THROW_IE_EXCEPTION << deviceName << " does not implement the CreateContext method";
@@ -647,7 +788,8 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName_) {
      std::string deviceName = device.getDeviceName();
  
      IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(deviceName));
+    auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+    auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
  
      if (pluginAPIInterface == nullptr) {
          THROW_IE_EXCEPTION << deviceName << " does not implement the CreateContext method";
@@ -667,13 +809,7 @@ void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_)
              << "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
      }
  
-    DeviceIDParser parser(deviceName_);
-    std::string deviceName = parser.getDeviceName();
-
-    IE_SUPPRESS_DEPRECATED_START
-    _impl->GetCPPPluginByName(deviceName).AddExtension(extension);
-    _impl->addExtension(extension);
-    IE_SUPPRESS_DEPRECATED_END
+    _impl->AddExtension(extension);
  }
  
  ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName,
@@ -692,32 +828,11 @@ ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const st
      IE_SUPPRESS_DEPRECATED_END
  }
  
-IE_SUPPRESS_DEPRECATED_START
-
  ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName,
                                        const std::map<std::string, std::string>& config) {
-    auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-
-    if (parsed._deviceName.empty()) {
-        ExportMagic magic = {};
-        networkModel.read(magic.data(), magic.size());
-        auto exportedWithName = (exportMagic == magic);
-        if (exportedWithName) {
-            std::getline(networkModel, parsed._deviceName);
-        }
-        networkModel.seekg(0, networkModel.beg);
-    }
-
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
-    if (pluginAPIInterface == nullptr) {
-        THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the ImportNetwork method";
-    }
-
-    return pluginAPIInterface->ImportNetwork(networkModel, parsed._config);
+    return _impl->ImportNetwork(networkModel, deviceName, config);
  }
  
-IE_SUPPRESS_DEPRECATED_END
-
  ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
                                        const RemoteContext::Ptr& context,
                                        const std::map<std::string, std::string>& config) {
@@ -734,7 +849,8 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
      auto parsed = parseDeviceNameIntoConfig(deviceName, config);
  
      IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
+    auto cppPlugin = _impl->GetCPPPluginByName(deviceName);
+    auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
  
      if (pluginAPIInterface == nullptr) {
          THROW_IE_EXCEPTION << deviceName << " does not implement the ImportNetwork method";
@@ -745,12 +861,7 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
  
  QueryNetworkResult Core::QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
                                        const std::map<std::string, std::string>& config) const {
-    QueryNetworkResult res;
-    auto parsed = parseDeviceNameIntoConfig(deviceName, config);
-    IE_SUPPRESS_DEPRECATED_START
-    _impl->GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config, res);
-    IE_SUPPRESS_DEPRECATED_END
-    return res;
+    return _impl->QueryNetwork(network, deviceName, config);
  }
  
  void Core::SetConfig(const std::map<std::string, std::string>& config, const std::string& deviceName) {
@@ -798,42 +909,22 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
  
      auto parsed = parseDeviceNameIntoConfig(deviceName);
      IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
+    auto cppPlugin = _impl->GetCPPPluginByName(parsed._deviceName);
+    auto pluginAPIInterface = getInferencePluginAPIInterface(cppPlugin);
      IE_SUPPRESS_DEPRECATED_END
+
      if (pluginAPIInterface == nullptr) {
          THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetConfig method";
      }
-    return pluginAPIInterface->GetConfig(name, parsed._config);
+
+    // we need to return a copy of Parameter object which is created on Core side,
+    // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
+    // TODO: remove this WA after *-31417 is resolved
+    return copyParameterValue(pluginAPIInterface->GetConfig(name, parsed._config));
  }
  
  Parameter Core::GetMetric(const std::string& deviceName, const std::string& name) const {
-    // HETERO case
-    {
-        if (deviceName.find("HETERO:") == 0) {
-            THROW_IE_EXCEPTION
-                << "You can get specific metrics with the GetMetric only for the HETERO itself (without devices). "
-                   "To get individual devices's metrics call GetMetric for each device separately";
-        }
-    }
-
-    // MULTI case
-    {
-        if (deviceName.find("MULTI:") == 0) {
-            THROW_IE_EXCEPTION
-                << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
-                   "To get individual devices's metrics call GetMetric for each device separately";
-        }
-    }
-
-    auto parsed = parseDeviceNameIntoConfig(deviceName);
-    IE_SUPPRESS_DEPRECATED_START
-    auto pluginAPIInterface = getInferencePluginAPIInterface(_impl->GetCPPPluginByName(parsed._deviceName));
-    IE_SUPPRESS_DEPRECATED_END
-    if (pluginAPIInterface == nullptr) {
-        THROW_IE_EXCEPTION << parsed._deviceName << " does not implement the GetMetric method";
-    }
-
-    return pluginAPIInterface->GetMetric(name, parsed._config);
+    return _impl->GetMetric(deviceName, name);
  }
  
  std::vector<std::string> Core::GetAvailableDevices() const {
@@ -842,11 +933,11 @@ std::vector<std::string> Core::GetAvailableDevices() const {
      std::string propertyName = METRIC_KEY(AVAILABLE_DEVICES);
  
      for (auto&& deviceName : _impl->GetListOfDevicesInRegistry()) {
-        Parameter p;
          std::vector<std::string> devicesIDs;
  
+        IE_SUPPRESS_DEPRECATED_START
          try {
-            p = GetMetric(deviceName, propertyName);
+            Parameter p = GetMetric(deviceName, propertyName);
              devicesIDs = p.as<std::vector<std::string>>();
          } catch (details::InferenceEngineException&) {
              // plugin is not created by e.g. invalid env
@@ -857,6 +948,7 @@ std::vector<std::string> Core::GetAvailableDevices() const {
              THROW_IE_EXCEPTION << "Unknown exception is thrown while trying to create the " << deviceName
                                 << " device and call GetMetric";
          }
+        IE_SUPPRESS_DEPRECATED_END
  
          if (devicesIDs.size() > 1) {
              for (auto&& deviceID : devicesIDs) {
@@ -882,7 +974,7 @@ void Core::UnregisterPlugin(const std::string& deviceName_) {
      DeviceIDParser parser(deviceName_);
      std::string deviceName = parser.getDeviceName();
  
-    _impl->UnregisterPluginByName(deviceName);
+    _impl->UnloadPluginByName(deviceName);
  }
  
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp

index 6467b6b..6ba2002 100644 (file)
--- a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp
+++ b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp
@@ -23,159 +23,215 @@
  #include "threading/ie_cpu_streams_executor.hpp"
  
  namespace InferenceEngine {
+struct CPUStreamsExecutor::Impl {
+    struct Stream {
  #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
-struct PinningObserver: public tbb::task_scheduler_observer {
-    CpuSet& _mask;
-    int     _ncpus                  = 0;
-    int     _streamId               = 0;
-    int     _threadsPerStream       = 0;
-    int     _threadBindingStep      = 0;
-    int     _threadBindingOffset    = 0;
-
-    PinningObserver(tbb::task_arena&    arena,
-                    CpuSet&             mask,
-                    int                 ncpus,
-                    const int           streamId,
-                    const int           threadsPerStream,
-                    const int           threadBindingStep,
-                    const int           threadBindingOffset) :
-        tbb::task_scheduler_observer(arena),
-        _mask(mask),
-        _ncpus(ncpus),
-        _streamId(streamId),
-        _threadsPerStream(threadsPerStream),
-        _threadBindingStep(threadBindingStep),
-        _threadBindingOffset(threadBindingOffset) {
-        observe(true);
-    }
-
-    void on_scheduler_entry(bool) override {
-        int threadIdx = tbb::task_arena::current_thread_index();
-        int thrIdx = _streamId * _threadsPerStream + threadIdx + _threadBindingOffset;
-        // pin thread to the vacant slot
-        PinThreadToVacantCore(thrIdx, _threadBindingStep, _ncpus, _mask);
-    }
-
-    void on_scheduler_exit(bool) override {
-        // reset the thread's mask (to the original process mask)
-        PinCurrentThreadByMask(_ncpus, _mask);
-    }
-
-    ~PinningObserver() {
-        observe(false);
-    }
-};
-#endif  //  IE_THREAD != IE_THREAD_TBB
-
-struct Stream {
-    int _streamId   = 0;
-    int _numaNodeId = 0;
-#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
-    std::unique_ptr<tbb::task_arena> _taskArena;
-    std::unique_ptr<PinningObserver> _pinningObserver;
+        struct Observer: public tbb::task_scheduler_observer {
+            CpuSet  _mask;
+            int     _ncpus                  = 0;
+            int     _threadBindingStep      = 0;
+            int     _offset                 = 0;
+            Observer(tbb::task_arena&    arena,
+                     CpuSet              mask,
+                     int                 ncpus,
+                     const int           streamId,
+                     const int           threadsPerStream,
+                     const int           threadBindingStep,
+                     const int           threadBindingOffset) :
+                tbb::task_scheduler_observer(arena),
+                _mask{std::move(mask)},
+                _ncpus(ncpus),
+                _threadBindingStep(threadBindingStep),
+                _offset{streamId * threadsPerStream  + threadBindingOffset} {
+            }
+            void on_scheduler_entry(bool) override {
+                PinThreadToVacantCore(_offset + tbb::task_arena::current_thread_index(), _threadBindingStep, _ncpus, _mask);
+            }
+            void on_scheduler_exit(bool) override {
+                PinCurrentThreadByMask(_ncpus, _mask);
+            }
+            ~Observer() override = default;
+        };
  #endif
-};
-
-struct CPUStreamsExecutor::Impl {
-    std::string                 _name;
-    std::vector<std::thread>    _threads;
-    std::mutex                  _mutex;
-    std::condition_variable     _queueCondVar;
-    std::queue<Task>            _taskQueue;
-    bool                        _isStopped = false;
-    int                         _ncpus = 0;
-    CpuSet                      _processMask;
-    ThreadLocal<Stream*>        _localStream;
-};
-
-int CPUStreamsExecutor::GetStreamId() {
-    auto stream = _impl->_localStream.local();
-    if (nullptr == stream) THROW_IE_EXCEPTION << "Not in the stream thread";
-    return stream->_streamId;
-}
-
-int CPUStreamsExecutor::GetNumaNodeId() {
-    auto stream = _impl->_localStream.local();
-    if (nullptr == stream) THROW_IE_EXCEPTION << "Not in the stream thread";
-    return stream->_numaNodeId;
-}
-
-CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) :
-    _impl{new Impl} {
-    IE_ASSERT(config._streams > 0);
-    _impl->_name = config._name;
-    auto numaNodes = getAvailableNUMANodes();
-    IE_ASSERT(!numaNodes.empty());
-    if (ThreadBindingType::CORES == config._threadBindingType) {
-        std::tie(_impl->_processMask, _impl->_ncpus) = GetProcessMask();
-    }
-    for (auto streamId = 0; streamId < config._streams; ++streamId) {
-        _impl->_threads.emplace_back([=] {
-            annotateSetThreadName((_impl->_name + "_" + std::to_string(streamId)).c_str());
-            Stream stream;
-            stream._streamId   = streamId;
-            stream._numaNodeId = numaNodes.at(streamId/((config._streams + numaNodes.size() - 1)/numaNodes.size()));
+        explicit Stream(Impl* impl) :
+            _impl(impl) {
+            {
+                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
+                if (_impl->_streamIdQueue.empty()) {
+                    _streamId = _impl->_streamId++;
+                } else {
+                    _streamId = _impl->_streamIdQueue.front();
+                    _impl->_streamIdQueue.pop();
+                }
+            }
+            _numaNodeId = _impl->_usedNumaNodes.at(
+                (_streamId % _impl->_config._streams)/
+                ((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1)/_impl->_usedNumaNodes.size()));
  #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
-            auto concurrency = (0 == config._threadsPerStream) ? tbb::task_arena::automatic : config._threadsPerStream;
-            if (ThreadBindingType::NUMA == config._threadBindingType) {
-                stream._taskArena.reset(new tbb::task_arena(tbb::task_arena::constraints(stream._numaNodeId, concurrency)));
-            } else if ((0 != config._threadsPerStream) || ThreadBindingType::CORES == config._threadBindingType) {
-                stream._taskArena.reset(new tbb::task_arena(concurrency));
-                if (ThreadBindingType::CORES == config._threadBindingType) {
-                    if (nullptr != _impl->_processMask) {
-                        stream._pinningObserver.reset(new PinningObserver{*stream._taskArena,
-                                                                          _impl->_processMask,
-                                                                          _impl->_ncpus,
-                                                                          stream._streamId,
-                                                                          config._threadsPerStream,
-                                                                          config._threadBindingStep,
-                                                                          config._threadBindingOffset});
+            auto concurrency = (0 == _impl->_config._threadsPerStream) ? tbb::task_arena::automatic : _impl->_config._threadsPerStream;
+            if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+                _taskArena.reset(new tbb::task_arena{tbb::task_arena::constraints{_numaNodeId, concurrency}});
+            } else if ((0 != _impl->_config._threadsPerStream) || (ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
+                _taskArena.reset(new tbb::task_arena{concurrency});
+                if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
+                    CpuSet processMask;
+                    int    ncpus = 0;
+                    std::tie(processMask, ncpus) = GetProcessMask();
+                    if (nullptr != processMask) {
+                        _observer.reset(new Observer{*_taskArena,
+                                                     std::move(processMask),
+                                                     ncpus,
+                                                     _streamId,
+                                                     _impl->_config._threadsPerStream,
+                                                     _impl->_config._threadBindingStep,
+                                                     _impl->_config._threadBindingOffset});
+                        _observer->observe(true);
                      }
                  }
              }
  #elif IE_THREAD == IE_THREAD_OMP
-            omp_set_num_threads(config._threadsPerStream);
-            if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != config._threadBindingType)) {
-                if (nullptr != _impl->_processMask) {
-                    parallel_nt(config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) {
-                        int thrIdx = stream._streamId * threadsPerStream + threadIndex + config._threadBindingOffset;
-                        PinThreadToVacantCore(thrIdx, config._threadBindingStep, _impl->_ncpus, _impl->_processMask);
+            omp_set_num_threads(_impl->_config._threadsPerStream);
+            if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
+                CpuSet processMask;
+                int    ncpus = 0;
+                std::tie(processMask, ncpus) = GetProcessMask();
+                if (nullptr != processMask) {
+                    parallel_nt(_impl->_config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) {
+                        int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex + _impl->_config._threadBindingOffset;
+                        PinThreadToVacantCore(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
                      });
                  }
              }
  #elif IE_THREAD == IE_THREAD_SEQ
-            if (ThreadBindingType::NUMA == config._threadBindingType) {
-                PinCurrentThreadToSocket(stream._numaNodeId);
-            } else if (ThreadBindingType::CORES == config._threadBindingType) {
-                PinThreadToVacantCore(stream._streamId + config._threadBindingOffset, config._threadBindingStep, _impl->_ncpus, _impl->_processMask);
+            if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
+                PinCurrentThreadToSocket(_numaNodeId);
+            } else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
+                CpuSet processMask;
+                int    ncpus = 0;
+                std::tie(processMask, ncpus) = GetProcessMask();
+                if (nullptr != processMask) {
+                    PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset, _impl->_config._threadBindingStep, ncpus, processMask);
+                }
              }
  #endif
-            _impl->_localStream.local() = &stream;
-            for (bool stopped = false; !stopped;) {
-                Task currentTask;
-                {  // waiting for the new task or for stop signal
-                    std::unique_lock<std::mutex> lock(_impl->_mutex);
-                    _impl->_queueCondVar.wait(lock, [&] { return !_impl->_taskQueue.empty() || (stopped = _impl->_isStopped); });
-                    if (!_impl->_taskQueue.empty()) {
-                        currentTask = std::move(_impl->_taskQueue.front());
-                        _impl->_taskQueue.pop();
+        }
+        ~Stream() {
+            {
+                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
+                _impl->_streamIdQueue.push(_streamId);
+            }
+#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
+            if (nullptr != _observer) {
+                _observer->observe(false);
+            }
+#endif
+        }
+
+        Impl* _impl     = nullptr;
+        int _streamId   = 0;
+        int _numaNodeId = 0;
+        bool _execute = false;
+        std::queue<Task> _taskQueue;
+#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
+        std::unique_ptr<tbb::task_arena>    _taskArena;
+        std::unique_ptr<Observer>           _observer;
+#endif
+    };
+
+    explicit Impl(const Config& config) :
+        _config{config},
+        _streams([this] {
+            return std::make_shared<Impl::Stream>(this);
+        }) {
+        auto numaNodes = getAvailableNUMANodes();
+        std::copy_n(std::begin(numaNodes),
+                    std::min(std::max(static_cast<std::size_t>(1),
+                                      static_cast<std::size_t>(_config._streams)),
+                             numaNodes.size()),
+                    std::back_inserter(_usedNumaNodes));
+        for (auto streamId = 0; streamId < _config._streams; ++streamId) {
+            _threads.emplace_back([this, streamId] {
+                annotateSetThreadName((_config._name + "_" + std::to_string(streamId)).c_str());
+                for (bool stopped = false; !stopped;) {
+                    Task task;
+                    {
+                        std::unique_lock<std::mutex> lock(_mutex);
+                        _queueCondVar.wait(lock, [&] { return !_taskQueue.empty() || (stopped = _isStopped); });
+                        if (!_taskQueue.empty()) {
+                            task = std::move(_taskQueue.front());
+                            _taskQueue.pop();
+                        }
+                    }
+                    if (task) {
+                        Execute(task, *(_streams.local()));
                      }
                  }
+            });
+        }
+    }
  
-                if (currentTask) {
+    void Enqueue(Task task) {
+        {
+            std::lock_guard<std::mutex> lock(_mutex);
+            _taskQueue.emplace(std::move(task));
+        }
+        _queueCondVar.notify_one();
+    }
+
+    void Execute(const Task& task, Stream& stream) {
  #if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
-                    if (nullptr != stream._taskArena) {
-                        stream._taskArena->execute(std::move(currentTask));
-                    } else {
-                        currentTask();
-                    }
+        auto& arena = stream._taskArena;
+        if (nullptr != arena) {
+            arena->execute(std::move(task));
+        } else {
+            task();
+        }
  #else
-                    currentTask();
+        task();
  #endif
+    }
+
+    void Defer(Task task) {
+        auto& stream = *(_streams.local());
+        stream._taskQueue.push(std::move(task));
+        if (!stream._execute) {
+            stream._execute = true;
+            try {
+                while (!stream._taskQueue.empty()) {
+                    Execute(stream._taskQueue.front(), stream);
+                    stream._taskQueue.pop();
                  }
-            }
-        });
+            } catch(...) {}
+            stream._execute = false;
+        }
      }
+
+    Config                                  _config;
+    std::mutex                              _streamIdMutex;
+    int                                     _streamId = 0;
+    std::queue<int>                         _streamIdQueue;
+    std::vector<std::thread>                _threads;
+    std::mutex                              _mutex;
+    std::condition_variable                 _queueCondVar;
+    std::queue<Task>                        _taskQueue;
+    bool                                    _isStopped = false;
+    std::vector<int>                        _usedNumaNodes;
+    ThreadLocal<std::shared_ptr<Stream>>    _streams;
+};
+
+
+int CPUStreamsExecutor::GetStreamId() {
+    auto stream = _impl->_streams.local();
+    return stream->_streamId;
+}
+
+int CPUStreamsExecutor::GetNumaNodeId() {
+    auto stream = _impl->_streams.local();
+    return stream->_numaNodeId;
+}
+
+CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) :
+    _impl{new Impl{config}} {
  }
  
  CPUStreamsExecutor::~CPUStreamsExecutor() {
@@ -191,12 +247,16 @@ CPUStreamsExecutor::~CPUStreamsExecutor() {
      }
  }
  
+void CPUStreamsExecutor::Execute(Task task) {
+    _impl->Defer(std::move(task));
+}
+
  void CPUStreamsExecutor::run(Task task) {
-    {
-        std::lock_guard<std::mutex> lock(_impl->_mutex);
-        _impl->_taskQueue.emplace(std::move(task));
+    if (0 == _impl->_config._streams) {
+        _impl->Defer(std::move(task));
+    } else {
+        _impl->Enqueue(std::move(task));
      }
-    _impl->_queueCondVar.notify_one();
  }
  
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp

index c2ca71e..81b26cc 100644 (file)
--- a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp
+++ b/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp
@@ -126,4 +126,4 @@ Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) {
      return {};
  }
  
-}  //  namespace InferenceEngine
+}  //  namespace InferenceEngine
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/xml_parse_utils.cpp b/inference-engine/src/inference_engine/xml_parse_utils.cpp

index df75d64..1a5cfd8 100644 (file)
--- a/inference-engine/src/inference_engine/xml_parse_utils.cpp
+++ b/inference-engine/src/inference_engine/xml_parse_utils.cpp
@@ -76,7 +76,7 @@ unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node& node, const char*
  std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* str) {
      auto attr = node.attribute(str);
      if (attr.empty())
-        THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
+        THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: '" << str << "' at offset "
                             << node.offset_debug();
      return attr.value();
  }
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.cpp b/inference-engine/src/ir_readers/ie_ir_parser.cpp

index 04cb4ef..d33ebb4 100644 (file)
--- a/inference-engine/src/ir_readers/ie_ir_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp
@@ -344,29 +344,40 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
      }
  
      std::shared_ptr<ngraph::Node> ngraphNode;
-    if (opsets.count(params.version)) {
-        auto opset = opsets.at(params.version);
-
-        for (const auto& creator : creators) {
-            if (creator->shouldCreate(params.type)) {
-                ngraphNode = creator->createLayer(inputs, node, weights, params);
-                break;
+    // Try to create operation from creators
+    for (const auto& creator : creators) {
+        if (creator->shouldCreate(params.type)) {
+            bool useCreator = false;
+            // Check that opset is registered
+            useCreator |= opsets.find(params.version) == opsets.end();
+            if (!useCreator) {
+                // Check that creator can create operation with the version from opset
+                const auto opset = opsets.at(params.version);
+                // Opset should contains the same version of operation or doesn't contain operation with current type
+                useCreator |= opset.contains_type(creator->getNodeType()) || !opset.contains_type(params.type);
              }
+            if (useCreator)
+                ngraphNode = creator->createLayer(inputs, node, weights, params);
+            break;
          }
+    }
  
-        if (!ngraphNode) {
-            if (!opset.contains_type(params.type)) {
-                THROW_IE_EXCEPTION << "Opset " << params.version << " doesn't contain the operation with type: " << params.type;
-            }
+    // Try to create operation from loaded opsets
+    if (!ngraphNode && opsets.count(params.version)) {
+        auto opset = opsets.at(params.version);
  
-            ngraphNode = std::shared_ptr<ngraph::Node>(opset.create(params.type));
-            ngraphNode->set_arguments(inputs);
-            XmlDeserializer visitor(node);
-            if (ngraphNode->visit_attributes(visitor))
-                ngraphNode->constructor_validate_and_infer_types();
+        if (!opset.contains_type(params.type)) {
+            THROW_IE_EXCEPTION << "Opset " << params.version << " doesn't contain the operation with type: " << params.type;
          }
+
+        ngraphNode = std::shared_ptr<ngraph::Node>(opset.create(params.type));
+        ngraphNode->set_arguments(inputs);
+        XmlDeserializer visitor(node);
+        if (ngraphNode->visit_attributes(visitor))
+            ngraphNode->constructor_validate_and_infer_types();
      }
  
+    // Create GenericIE operation for backward compatibility
      if (!ngraphNode && (params.version == "experimental" || params.version == "extension")) {
          // Try to create Generic node for backward compatibility
          std::map<std::string, Parameter> parameters;
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.hpp b/inference-engine/src/ir_readers/ie_ir_parser.hpp

index 9e095be..95256dd 100644 (file)
--- a/inference-engine/src/ir_readers/ie_ir_parser.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp
@@ -140,8 +140,7 @@ private:
                                                            const GenericLayerParams& layerParsePrms) = 0;
  
          bool shouldCreate(const std::string& nodeType) const;
-
-        std::shared_ptr<ngraph::Node> createOptionalParameter(const GenericLayerParams::LayerPortData& port);
+        virtual ngraph::NodeTypeInfo getNodeType() const = 0;
      };
  
      template <class T>
@@ -151,6 +150,9 @@ private:
          std::shared_ptr<ngraph::Node> createLayer(const ngraph::OutputVector& inputs, const pugi::xml_node& node,
                                                    const Blob::CPtr& weights,
                                                    const GenericLayerParams& layerParsePrms) override;
+        ngraph::NodeTypeInfo getNodeType() const override {
+            return T::type_info;
+        }
      };
  
      std::shared_ptr<ngraph::Node> createNode(const ngraph::OutputVector& inputs, const pugi::xml_node& node,
@@ -203,6 +205,12 @@ private:
                  std::vector<size_t> shape;
                  if (!getParameters<size_t>(node.child("data"), name, shape)) return;
                  static_cast<ngraph::Strides&>(*a) = ngraph::Strides(shape);
+            } else if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::op::TopKSortType>>(&adapter)) {
+                if (!getStrAttribute(node.child("data"), name, val)) return;
+                static_cast<ngraph::op::TopKSortType&>(*a) = ngraph::as_enum<ngraph::op::TopKSortType>(val);
+            } else if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::op::TopKMode>>(&adapter)) {
+                if (!getStrAttribute(node.child("data"), name, val)) return;
+                static_cast<ngraph::op::TopKMode&>(*a) = ngraph::as_enum<ngraph::op::TopKMode>(val);
              }
          }
          void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override {
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp

index 941b3e8..adc6680 100644 (file)
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -334,6 +334,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
          res->params = params;
          return res;
      });
+
+    addSpecificCreator({"ScatterElementsUpdate"}, [](const std::shared_ptr<::ngraph::Node>& node,
+        const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<ScatterElementsUpdateLayer>(attrs);
+        res->params = params;
+        return res;
+    });
  }
  
  CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
@@ -599,8 +608,10 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
          }
          size_t inputCount(0);
          for (size_t i = 0; i < layer->get_input_size(); i++) {
-            const auto &input = layer->get_inputs()[i];
-            if (isInternalLayer(input.get_output().get_node(), op_names, keep_constants)) continue;
+            const auto &constant = ngraph::as_type_ptr<ngraph::op::Constant>(layer->get_inputs()[i].get_output().get_node());
+            if (constant && isInternalConstLayer(constant, layer, keep_constants)) {
+                continue;
+            }
              inputCount++;
          }
          cnnLayer->insData.resize(inputCount);
diff --git a/inference-engine/src/legacy_api/src/ie_deprecated.cpp b/inference-engine/src/legacy_api/src/ie_deprecated.cpp

index 8084da3..dc886c9 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_deprecated.cpp
+++ b/inference-engine/src/legacy_api/src/ie_deprecated.cpp
@@ -13,6 +13,7 @@ IE_SUPPRESS_DEPRECATED_START
  namespace InferenceEngine {
  
  Precision CNNNetwork::getPrecision() const {
+    if (actual == nullptr) THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
      return actual->getPrecision();
  }
  
@@ -41,7 +42,7 @@ StatusCode ICNNNetwork::AddExtension(const IShapeInferExtensionPtr& extension, R
  };
  
  void CNNNetwork::AddExtension(InferenceEngine::IShapeInferExtensionPtr extension) {
-        CALL_STATUS_FNC(AddExtension, extension);
+    CALL_STATUS_FNC(AddExtension, extension);
  }
  
  CNNLayer::CNNLayer(const LayerParams& prms)
diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp

index d1266e1..a2030f9 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
+++ b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
@@ -3163,12 +3163,12 @@ void ScatterElementsUpdateValidator::checkShapes(const CNNLayer* layer, const ve
          THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'updates' tensors dimension";
  
      Precision inIdxPrecision = layer->insData[INDICES].lock()->getTensorDesc().getPrecision();
-    if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
-        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indices' precision. Only FP32 or I32 are supported!";
+    if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::I64)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indices' precision. Only FP32 or I32 or I64 are supported!";
  
      Precision inAxisPrecision = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
-    if (inAxisPrecision != Precision::FP32 && inAxisPrecision != Precision::I32)
-        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Axis' precision. Only FP32 or I32 are supported!";
+    if (inAxisPrecision != Precision::FP32 && inAxisPrecision != Precision::I32 && inIdxPrecision != Precision::I64)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Axis' precision. Only FP32 or I32 or I64 are supported!";
  
      if (layer->insData[DATA].lock()->getTensorDesc().getPrecision() !=
          layer->insData[UPDATES].lock()->getTensorDesc().getPrecision())
diff --git a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp

index 257cec3..169296a 100644 (file)
--- a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp
+++ b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp
@@ -92,6 +92,7 @@ public:
          if (!_body_reshaper)
              THROW_IE_EXCEPTION << "Request of apply reshape results while shape infer was not finished";
          _body_reshaper->apply();
+        _body_reshaper.reset(); // WA: reset _body_reshaper to release ownership for input data
      }
  
  private:
diff --git a/inference-engine/src/legacy_api/src/shape_infer/ie_reshaper.hpp b/inference-engine/src/legacy_api/src/shape_infer/ie_reshaper.hpp

index b9d9045..144e25f 100644 (file)
--- a/inference-engine/src/legacy_api/src/shape_infer/ie_reshaper.hpp
+++ b/inference-engine/src/legacy_api/src/shape_infer/ie_reshaper.hpp
@@ -80,7 +80,7 @@ public:
  
      /**
       * @brief Perform shape inference for the given input shapes but not apply it.
-     * In case of cusses call apply() method.
+     * In case of success call apply() method.
       * @param inputShapes - Map of input names (data) to their input shapes.
       * @throws exception if shape infer failed without corruption of original shapes
       */
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/power.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/power.hpp

new file mode 100644 (file)

index 0000000..b7edd02
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/power.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include "ie_layers.h"
+#include "low_precision_transformations/transformation_context.hpp"
+#include "low_precision_transformations/layer_transformation.hpp"
+
+namespace InferenceEngine {
+namespace details {
+
+class INFERENCE_ENGINE_API_CLASS(PowerTransformation) : public LayerTransformation {
+public:
+    PowerTransformation(const Params& params) : LayerTransformation(params) {}
+    ~PowerTransformation() override {}
+    void transform(TransformationContext& context, CNNLayer& layer) const override;
+    bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
+};
+
+}  // namespace details
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/low_precision_transformations/src/power.cpp b/inference-engine/src/low_precision_transformations/src/power.cpp

new file mode 100644 (file)

index 0000000..2f98240
--- /dev/null
+++ b/inference-engine/src/low_precision_transformations/src/power.cpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformations/power.hpp"
+
+#include <algorithm>
+#include <details/caseless.hpp>
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "low_precision_transformations/common/ie_lpt_exception.hpp"
+#include "low_precision_transformations/network_helper.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+bool PowerTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const {
+    if (!LayerTransformation::canBeTransformed(context, layer)) {
+        return false;
+    }
+
+    if (layer.insData.size() != 1) {
+        THROW_IE_LPT_EXCEPTION(layer) << "layer inputs '" << layer.insData.size() << "' is not correct";
+    }
+
+    if (!CaselessEq<std::string>()(layer.type, "Power")) {
+        THROW_IE_LPT_EXCEPTION(layer) << "layer '" << layer.name << "' is not correct";
+    }
+
+    const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
+    if (powerLayer == nullptr) {
+        THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
+    }
+    if (powerLayer->power != 1.f) {
+        return false;
+    }
+
+    const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
+    return !(parent->type != "ScaleShift");
+}
+
+void PowerTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
+    if (!canBeTransformed(context, layer)) {
+        return;
+    }
+
+    const PowerLayer* powerLayer = dynamic_cast<const PowerLayer*>(&layer);
+    if (powerLayer == nullptr) {
+        THROW_IE_LPT_EXCEPTION(layer) << "unexpected Power layer type";
+    }
+
+    const CNNLayerPtr parent = CNNNetworkHelper::getParent(layer, 0);
+
+    Blob::Ptr weightsBlob = CNNNetworkHelper::getBlob(parent, "weights");
+    auto wBuffer = weightsBlob->buffer().as<float*>();
+    for (size_t channel = 0ul; channel < weightsBlob->size(); ++channel) {
+        wBuffer[channel] = wBuffer[channel] * powerLayer->scale;
+    }
+
+    Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(parent, "biases");
+    auto sBuffer = shiftsBlob->buffer().as<float*>();
+    for (size_t channel = 0ul; channel < shiftsBlob->size(); ++channel) {
+        sBuffer[channel] = sBuffer[channel] * powerLayer->scale + powerLayer->offset;
+    }
+
+    const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
+    CNNNetworkHelper::removeLayer(context.network, std::make_shared<CNNLayer>(layer));
+    context.removeLayer(layer);
+    if (children.empty()) {
+        const std::string originalName = layer.name;
+        CNNNetworkHelper::renameLayer(context.network, parent->name, layer.name);
+    }
+}
diff --git a/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp b/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp

index 70b7ae7..e712803 100644 (file)
--- a/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp
@@ -17,7 +17,7 @@
  using namespace InferenceEngine;
  using namespace InferenceEngine::details;
  
-static const std::unordered_set<std::string> defaultIgnoreWithParents = {
+static const char * defaultIgnoreWithParents[] = {
      "Convolution",
      "FakeQuantize"
  };
@@ -25,7 +25,8 @@ static const std::unordered_set<std::string> defaultIgnoreWithParents = {
  ScaleShiftToConvolutionTransformation::ScaleShiftToConvolutionTransformation(const Params& params) :
      WeightableLayerTransformation(params),
      groupSize(1ul),
-    ignoreWithParents(defaultIgnoreWithParents) {
+    ignoreWithParents(defaultIgnoreWithParents, defaultIgnoreWithParents +
+        sizeof(defaultIgnoreWithParents) / sizeof(defaultIgnoreWithParents[0])) {
  }
  
  void ScaleShiftToConvolutionTransformation::transform(TransformationContext& context, CNNLayer& layer) const {
diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp

index 100f5a8..c591049 100644 (file)
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@@ -34,6 +34,7 @@
  #include "low_precision_transformations/permute.hpp"
  #include "low_precision_transformations/pooling.hpp"
  #include "low_precision_transformations/resample.hpp"
+#include "low_precision_transformations/power.hpp"
  #include "low_precision_transformations/reshape.hpp"
  #include "low_precision_transformations/scaleshift_to_convolution.hpp"
  #include "low_precision_transformations/squeeze.hpp"
@@ -186,7 +187,8 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
              { "ReLU", LayerTransformationPtr(new ActivationTransformation(params)) },
              { "MVN", LayerTransformationPtr(new MvnTransformation(params)) },
              { "Eltwise", LayerTransformationPtr(new EltwiseTransformation(params)) },
-            { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) }
+            { "Resample", LayerTransformationPtr(new ResampleTransformation(params)) },
+            { "Power", LayerTransformationPtr(new PowerTransformation(params)) }
          }),
          std::map<std::string, LayerTransformationPtr>({
              { "FakeQuantize", LayerTransformationPtr(new FuseFakeQuantizeAndScaleShiftTransformation(params)) },
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt

index 40abb8c..465be27 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -133,15 +133,15 @@ file(GLOB HEADERS
  addVersionDefines(mkldnn_plugin.cpp CI_BUILD_NUMBER MKL_VERSION)
  
  include_directories(
-        ${IE_MAIN_SOURCE_DIR}/include
          $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
-        ${CMAKE_CURRENT_SOURCE_DIR}
          ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${CMAKE_BINARY_DIR}/include)
+
+include_directories(SYSTEM
          ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common
          ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu
-        ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
-        ${CMAKE_BINARY_DIR}/include/
-)
+        ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include)
  
  if (GEMM STREQUAL "MKL")
      log_rpath_from_dir(MKL "${MKL}/lib")
@@ -167,8 +167,8 @@ add_library(mkldnn_plugin_layers_no_opt_s OBJECT ${CROSS_COMPILED_SOURCES})
  set_ie_threading_interface_for(mkldnn_plugin_layers_no_opt_s)
  target_compile_definitions(mkldnn_plugin_layers_no_opt_s PRIVATE "USE_STATIC_IE;IMPLEMENT_INFERENCE_ENGINE_PLUGIN")
  
-set(object_libraries mkldnn_plugin_layers_no_opt)
-set(mkldnn_plugin_object_libraries mkldnn_plugin_layers_no_opt_s)
+list(APPEND object_libraries mkldnn_plugin_layers_no_opt)
+list(APPEND mkldnn_plugin_object_libraries mkldnn_plugin_layers_no_opt_s)
  
  # SSE 4.2 optimized layers
  
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp

index 0558bd1..d1d27d4 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
+++ b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
@@ -55,6 +55,9 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
      InputsDataMap inputs = network.getInputsInfo();
      OutputsDataMap outputs = network.getOutputsInfo();
      for (auto iter : sortedLayers) {
+        if (_skipmarking.find(iter->type) != _skipmarking.end()) {
+            continue;
+        }
          for (size_t o = 0; o < iter->outData.size(); o++) {
              if (inputs.find(iter->outData[o]->getName()) == inputs.end()
                  && outputs.find(iter->outData[o]->getName()) == outputs.end()
@@ -109,6 +112,9 @@ void BF16Transformer::optimizeToFloat(InferenceEngine::CNNNetwork &network) {
      // 2b. go over all unknown layers for this algo and mark them as fp32 and add to the toAnalyzeTensors
      // 2c. go over all inputs to _initbf16 and if they are fp32 - add them to the toAnalyzeTensors
      for (auto iter : sortedLayers) {
+        if (_skipmarking.find(iter->type) != _skipmarking.end()) {
+            continue;
+        }
          if (_initbf16.find(iter->type) == _initbf16.end()
              && _complementbf16.find(iter->type) == _complementbf16.end()
              && _multiinput.find(iter->type) == _multiinput.end()) {
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.h b/inference-engine/src/mkldnn_plugin/bf16transformer.h

index 22becc6..5363383 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/bf16transformer.h
+++ b/inference-engine/src/mkldnn_plugin/bf16transformer.h
@@ -15,9 +15,12 @@ class BF16Transformer {
      const InferenceEngine::details::caseless_set<std::string> _initbf16 =
          { "convolution", "fullyconnected", "innerproduct" };
      const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
-        { "relu", "pooling", "norm", "gather" };
+        { "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "logistic",
+          "exp", "gelu", "clamp", "swish", "prelu", "pooling", "norm", "gather" };
      const InferenceEngine::details::caseless_set<std::string> _multiinput =
          { "concat", "eltwise" };
+    const InferenceEngine::details::caseless_set<std::string> _skipmarking =
+        { "const" };
  
      /**
      * Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp

index f444ff5..f6f2284 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -41,25 +41,7 @@ MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap network
  MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network,
                                       const Config &cfg,
                                       const MKLDNNExtensionManager::Ptr& extMgr) :
-    InferenceEngine::ExecutableNetworkThreadSafeDefault([&] ()->ITaskExecutor::Ptr {
-        ExecutorManager *executorManager = ExecutorManager::getInstance();
-
-        if (cfg.exclusiveAsyncRequests) {
-            // special case when all InferRequests are muxed into a single queue
-            return executorManager->getExecutor("CPU");;
-        } else {
-            const int env_threads = parallel_get_env_threads();
-            const auto& numa_nodes = getAvailableNUMANodes();
-            const auto numa_nodes_num = numa_nodes.size();
-            auto streamExecutorConfig = cfg.streamExecutorConfig;
-            // use logical cores only for single-socket targets in throughput mode
-            const int hw_cores = streamExecutorConfig._streams > 1 && numa_nodes_num == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
-            const int threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (env_threads ? env_threads : hw_cores);
-            streamExecutorConfig._threadsPerStream = std::max(1, threads/streamExecutorConfig._streams);
-            streamExecutorConfig._name = "CPUStreamsExecutor";
-            return executorManager->getIdleCPUStreamsExecutor(streamExecutorConfig);
-        }
-    } ()),
+    InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr},
      extensionManager(extMgr),
      _cfg{cfg},
      _name{network.getName()} {
@@ -101,7 +83,21 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
                      LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
                      "ScaleShift"));
              transformer.transform(*_clonedNetwork);
-            if (with_cpu_x86_bfloat16()) {
+
+            // Check if network is INT8 or Binary.
+            // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution:
+            // BF16 + INT8 or BF16 + BIN.
+            bool isFloatModel = true;
+            CNNNetworkIterator i(&network);
+            while (i != CNNNetworkIterator()) {
+                if (CaselessEq<std::string>()((*i)->type, "FakeQuantize")) {
+                    isFloatModel = false;
+                    break;
+                }
+                i++;
+            }
+
+            if (with_cpu_x86_bfloat16() && isFloatModel) {
                  BF16Transformer bf16Transformer;
                  CNNNetwork cnnetwork(_clonedNetwork);
                  if (cfg.enforceBF16 == true) {
@@ -126,6 +122,30 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
          }
      }
  
+    if (cfg.exclusiveAsyncRequests) {
+        // special case when all InferRequests are muxed into a single queue
+        _taskExecutor = ExecutorManager::getInstance()->getExecutor("CPU");
+    } else {
+        const int env_threads = parallel_get_env_threads();
+        const auto& numa_nodes = getAvailableNUMANodes();
+        const auto numa_nodes_num = numa_nodes.size();
+        auto streamExecutorConfig = cfg.streamExecutorConfig;
+        // use logical cores only for single-socket targets in throughput mode
+        const int hw_cores = streamExecutorConfig._streams > 1 && numa_nodes_num == 1 ? parallel_get_max_threads() : getNumberOfCPUCores();
+        const int threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (env_threads ? env_threads : hw_cores);
+        streamExecutorConfig._threadsPerStream = streamExecutorConfig._streams
+                                                ? std::max(1, threads/streamExecutorConfig._streams)
+                                                : threads;
+        streamExecutorConfig._name = "CPUStreamsExecutor";
+        _taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamExecutorConfig);
+    }
+    if (0 != cfg.streamExecutorConfig._streams) {
+        _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
+            IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
+    } else {
+        _callbackExecutor = _taskExecutor;
+    }
+
      _graphs = decltype(_graphs){[&] {
          // TODO: Remove `cloneNet` to `localNetwork` when `MKLDNNGraph::CreateGraph`
          //       is fixed and does not change content of network passed (CVS-26420)
@@ -230,7 +250,9 @@ void MKLDNNExecNetwork::GetMetric(const std::string &name, Parameter &result, Re
          Config engConfig = _graphs.begin()->get()->getProperty();
          auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
          IE_ASSERT(option != engConfig._config.end());
-        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(std::stoi(option->second)));
+        auto streams = std::stoi(option->second);
+        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, static_cast<unsigned int>(
+            streams ? streams : 1));
      } else {
          THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;
      }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp

index 8343523..6442c5b 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -704,7 +704,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
          return activationNode &&
              (activationNode->getAlgorithm() == eltwise_relu ||
              (conv->getCnnLayer()->precision == Precision::FP32 &&
-             conv->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
               isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp, eltwise_swish})));
      };
  
@@ -778,7 +777,6 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
  
      auto isSutableParentNode = [](MKLDNNNodePtr node) {
          return node->getType() == FullyConnected &&
-               node->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
                 node->getChildEdges().size() == 1;
      };
  
@@ -850,9 +848,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
          bool isSutableConv = (node->getType() == Convolution) &&
                               node->getCnnLayer()->precision == Precision::FP32;
          bool isSutableBinConv = node->getType() == BinaryConvolution;
-        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1 &&
-               !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
-                 node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
+        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
      };
  
      auto isSutableChildNode = [](MKLDNNNodePtr node) {
@@ -1125,9 +1121,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
      auto isSutableParentNode = [](MKLDNNNodePtr node) {
          return node->getType() == Convolution &&
                 node->getChildEdges().size() == 1 &&
-               node->getCnnLayer()->precision == Precision::FP32 &&
-             !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
-               node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
+               node->getCnnLayer()->precision == Precision::FP32;
      };
  
      auto isSutableChildNode = [&](MKLDNNNodePtr node) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

index f97d71f..35261ce 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -19,6 +19,7 @@
  #include <transformations/common_optimizations/common_optimizations.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
  #include <ngraph/opsets/opset1.hpp>
  #include <ngraph/opsets/opset2.hpp>
  #include <ngraph/op/fused/gelu.hpp>
@@ -55,10 +56,11 @@ Engine::Engine() {
  
  Engine::~Engine() {
      ExecutorManager::getInstance()->clear("CPUStreamsExecutor");
+    ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
  }
  
  InferenceEngine::ExecutableNetworkInternal::Ptr
-Engine::LoadExeNetworkImpl(const ICore * /*core*/, const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
+Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config) {
      // verification of supported input
      InferenceEngine::InputsDataMap _networkInputs;
      network.getInputsInfo(_networkInputs);
@@ -100,6 +102,7 @@ Engine::LoadExeNetworkImpl(const ICore * /*core*/, const InferenceEngine::ICNNNe
  
          // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
          ngraph::pass::CommonOptimizations().run_on_function(nGraphFunc);
+        ngraph::pass::ConvertOpSet3ToOpSet2(transformations_callback).run_on_function(nGraphFunc);
          ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
          ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
          clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h

index a95698a..310aef4 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
@@ -71,14 +71,11 @@ public:
      ~Engine() override;
  
      InferenceEngine::ExecutableNetworkInternal::Ptr
-    LoadExeNetworkImpl(const InferenceEngine::ICore * core, const InferenceEngine::ICNNNetwork &network,
+    LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network,
                         const std::map<std::string, std::string> &config) override;
  
      void AddExtension(InferenceEngine::IExtensionPtr extension) override;
-    /**
-     * @deprecated
-     * @param config
-     */
+
      void SetConfig(const std::map<std::string, std::string> &config) override;
  
      InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter>& options) const override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp

index 7ffa749..b83108c 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@@ -35,9 +35,6 @@ struct ExtensionsHolder {
      std::map<std::string, IShapeInferImpl::Ptr> si_list;
  };
  
-template <mkldnn::impl::cpu::cpu_isa_t T>
-class TExtensionsHolder : public ExtensionsHolder {};
-
  template<mkldnn::impl::cpu::cpu_isa_t Type>
  class MKLDNNExtensions : public IExtension {
  public:
@@ -89,11 +86,11 @@ public:
      }
  
      static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
-        static std::shared_ptr<TExtensionsHolder<Type>> localHolder;
+        static std::shared_ptr<ExtensionsHolder> localHolder;
          if (localHolder == nullptr) {
-            localHolder = std::make_shared<TExtensionsHolder<Type>>();
+            localHolder = std::make_shared<ExtensionsHolder>();
          }
-        return std::dynamic_pointer_cast<ExtensionsHolder>(localHolder);
+        return localHolder;
      }
  
  private:
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp

index 0259571..ce62371 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
@@ -812,7 +812,7 @@ static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr
      });
  }
  
-std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
+const std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
          {{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
              return true;
          })},  // NCHW -> NHWC case
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h

index edfeb87..2097b32 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
@@ -68,7 +68,7 @@ private:
          isApplicable isValidParams;
      };
  
-    static std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
+    static const std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
      std::shared_ptr<jit_uni_permute_kernel> permute_kernel;
  };
  
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp

index 870a8f6..63e4e7b 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -67,21 +67,23 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
      invertVectorCopyUtoI(poolingLayer->_stride, stride);
      invertVectorCopyUtoI(poolingLayer->_kernel, kernel);
      auto allPads = getPaddings(*poolingLayer);
-    invertVectorCopyUtoI(allPads.begin, paddingL);
-    invertVectorCopyUtoI(allPads.end, paddingR);
+    invertVectorCopyUtoI(allPads.begin, data_pad_begin);
+    invertVectorCopyUtoI(allPads.end, data_pad_end);
+    effective_pad_begin = data_pad_begin;
+    effective_pad_end.resize(data_pad_end.size());
  
      auto parentDims = getParentEdgeAt(0)->getDims();
      auto childDims = getChildEdgeAt(0)->getDims();
      if ((parentDims.ndims() < 4) || (parentDims.ndims() > 5))
          THROW_IE_EXCEPTION << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
  
-    for (int i = 0; i < paddingR.size(); i++) {
+    for (int i = 0; i < effective_pad_end.size(); i++) {
          int krn = kernel[i];
          int src = getParentEdgeAt(0)->getDims()[2 + i];
          int dst = getChildEdgeAt(0)->getDims()[2 + i];
  
-        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
-        paddingR[i] = (dst - calc_dst) * stride[i];
+        int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1;
+        effective_pad_end[i] = (dst - calc_dst) * stride[i];
      }
      if (inputPrecision == Precision::I8 || inputPrecision == Precision::U8) {
          // i8 layers supports only ndhwc and nhwc layouts
@@ -138,13 +140,20 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::Tens
      algorithm alg;
      if (type == PoolingLayer::PoolType::AVG) {
          bool not_zero_l = false;
-        for (auto lr : paddingL) {
+        for (auto lr : data_pad_begin) {
              if (lr) {
                  not_zero_l = true;
                  break;
              }
          }
-        if (!exclude_pad && not_zero_l)
+        bool not_zero_r = false;
+        for (auto pr : data_pad_end) {
+            if (pr) {
+                not_zero_r = true;
+                break;
+            }
+        }
+        if (!exclude_pad && (not_zero_l || not_zero_r))
              alg = pooling_avg_include_padding;
          else
              alg = pooling_avg_exclude_padding;
@@ -158,24 +167,20 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::Tens
      std::shared_ptr<pooling_forward::desc> desc_ptr(
              new pooling_forward::desc(prop_kind::forward_scoring, alg,
                                        in_candidate, out_candidate,
-                                      stride, kernel, paddingL, paddingR,
+                                      stride, kernel, effective_pad_begin, effective_pad_end,
                                        mkldnn::padding_kind::zero));
  
-    bool not_zero_r = false;
-    for (auto pr : paddingR) {
-        if (pr) {
-            not_zero_r = true;
-            break;
-        }
-    }
-    if (alg == pooling_avg_include_padding && not_zero_r) {
+    if (alg == pooling_avg_include_padding) {
          // In case of AVG including paddings the norm coeff should be calculated
          // with tacking into account original pads. So we need to restore
-        // original values (R_padding = L_padding).
+        // original values for end paddings.
          //
          // WA. Because mkldnn uses different formula to calculate AVG norm coeff
          //     in compare with Caffe. In mkldnn coeff is always 1/(KH*KW)
-        for (int i = 0; i < paddingL.size(); i++) desc_ptr->data.padding[1][i] = paddingL[i];
+        for (int i = 0; i < data_pad_end.size(); i++) {
+            if (data_pad_end[i] != effective_pad_end[i])
+            desc_ptr->data.padding[1][i] = static_cast<ptrdiff_t>(data_pad_end[i]);
+        }
      }
  
      descs.emplace_back(desc_ptr);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h

index 764af52..92208d1 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
@@ -34,10 +34,21 @@ private:
      InferenceEngine::PoolingLayer::PoolType type = InferenceEngine::PoolingLayer::MAX;
      bool exclude_pad = false;
      std::vector<ptrdiff_t> stride;
-    std::vector<ptrdiff_t> paddingL;
-    std::vector<ptrdiff_t> paddingR;
      std::vector<ptrdiff_t> kernel;
  
+    /// Effective padding. Used to define correct output shape by MKLDNN
+    /// reshape formula: (iw - kernel + pad_l + pad_r) / strides[i - 2] + 1
+    /// should be passed into pooling desc constructor.
+    std::vector<ptrdiff_t> effective_pad_begin;
+    std::vector<ptrdiff_t> effective_pad_end;
+
+    /// Effective pad value. Describe how much zero element added to input
+    /// data tensor. May be less than "Effective padding" values.
+    /// If pooling window is out of this padding, the region of averaging
+    /// is decreased.
+    std::vector<ptrdiff_t> data_pad_begin;
+    std::vector<ptrdiff_t> data_pad_end;
+
      InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
      InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
  
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp

index 9d7c188..a85ba1a 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_resample_node.cpp
@@ -39,7 +39,7 @@ struct jit_uni_resample_nearest_kernel_f32 : public jit_uni_resample_nearest_ker
      DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_resample_nearest_kernel_f32)
  
      explicit jit_uni_resample_nearest_kernel_f32(jit_resample_config_params jcp, const mkldnn_primitive_attr &attr)
-    : jit_uni_resample_nearest_kernel(jcp, attr), jit_generator() {
+            : jit_uni_resample_nearest_kernel(jcp, attr), jit_generator() {
          const auto &p = attr_.post_ops_;
          for (int i = 0; i < p.len_; i++) {
              auto &post_op = p.entry_[i];
@@ -321,6 +321,14 @@ void MKLDNNResampleNode::initSupportedPrimitiveDescriptors() {
          }
      }
  
+    if (inputPrecision == Precision::BF16) {
+        inputPrecision = Precision::FP32;
+    }
+
+    if (outputPrecision == Precision::BF16) {
+        outputPrecision = Precision::FP32;
+    }
+
      auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision);
      auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision);
  
@@ -587,7 +595,7 @@ void MKLDNNResampleNode::execute(mkldnn::stream strm) {
  
  // f32 and no fused, f32->input is f32, no fuse->output is f32
  void MKLDNNResampleNode::NearestNeighbor_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                          float fx, float fy, float fz, int OD, int OH, int OW) {
+                                             float fx, float fy, float fz, int OD, int OH, int OW) {
      std::vector<int> index_buffer(OD * OH * OW);
      for (int oz = 0; oz < OD; oz++) {
          float iz = oz * fz;
@@ -640,7 +648,7 @@ void MKLDNNResampleNode::NearestNeighbor_PLN(const float *in_ptr_, float *out_pt
  // int8->input may be int8, fused->output may be int8
  template <typename in_data_t, typename out_data_t>
  void MKLDNNResampleNode::NearestNeighbor_BLK(const in_data_t *in_ptr_, out_data_t *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                          float fx, float fy, float fz, int OD, int OH, int OW) {
+                                             float fx, float fy, float fz, int OD, int OH, int OW) {
      std::vector<int> index_d(OD);
      std::vector<int> index_h(OH);
      std::vector<int> index_w(OW);
@@ -792,7 +800,7 @@ static inline float triangleCoeff(float x) {
  
  template <typename in_data_t, typename out_data_t>
  void MKLDNNResampleNode::LinearInterpolation(const in_data_t *in_ptr_, out_data_t *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                          float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) {
+                                             float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) {
      if (IW == OW && IH == OH && ID == OD) {
          size_t size = B * C * ID * IH * IW;
          if (input_prec == Precision::FP32) {
diff --git a/inference-engine/src/multi_device/CMakeLists.txt b/inference-engine/src/multi_device/CMakeLists.txt

new file mode 100644 (file)

index 0000000..d38f952
--- /dev/null
+++ b/inference-engine/src/multi_device/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "MultiDevicePlugin")
+
+if(ENABLE_LTO)
+    ie_enable_lto()
+endif()
+
+file(GLOB SOURCES
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+)
+
+file(GLOB HEADERS
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
+)
+
+ie_add_plugin(NAME ${TARGET_NAME}
+              DEVICE_NAME "MULTI"
+              SOURCES ${SOURCES} ${HEADERS}
+              VERSION_DEFINES_FOR multi_device.cpp)
+
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
+
+set_ie_threading_interface_for(${TARGET_NAME})
diff --git a/inference-engine/src/multi_device/multi_device.cpp b/inference-engine/src/multi_device/multi_device.cpp

new file mode 100644 (file)

index 0000000..2f09d28
--- /dev/null
+++ b/inference-engine/src/multi_device/multi_device.cpp
@@ -0,0 +1,548 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <string>
+#include <vector>
+#include <iostream>
+#include <memory>
+#include <utility>
+#include <map>
+#include <unordered_map>
+
+#include "ie_metric_helpers.hpp"
+#include <ie_api.h>
+#include <cpp_interfaces/base/ie_plugin_base.hpp>
+#include <cpp_interfaces/base/ie_infer_async_request_base.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ie_plugin_config.hpp>
+#include "multi_device.hpp"
+
+namespace MultiDevicePlugin {
+    using namespace InferenceEngine;
+// ------------------------------MultiDeviceInferRequest----------------------------
+MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap&   networkInputs,
+                                                 const OutputsDataMap&  networkOutputs)
+        : InferRequestInternal(networkInputs, networkOutputs) {
+    // Allocate all input blobs
+    for (const auto &it : networkInputs) {
+        Layout l = it.second->getLayout();
+        Precision p = it.second->getPrecision();
+        SizeVector dims = it.second->getTensorDesc().getDims();
+
+        TensorDesc desc = TensorDesc(p, dims, l);
+        _inputs[it.first] = make_blob_with_precision(desc);
+        _inputs[it.first]->allocate();
+    }
+    // Allocate all output blobs
+    for (const auto &it : networkOutputs) {
+        Layout l = it.second->getLayout();
+        Precision p = it.second->getPrecision();
+        SizeVector dims = it.second->getTensorDesc().getDims();
+
+        TensorDesc desc = TensorDesc(p, dims, l);
+        _outputs[it.first] = make_blob_with_precision(desc);
+        _outputs[it.first]->allocate();
+    }
+}
+
+void MultiDeviceInferRequest::SetBlobsToAnotherRequest(InferRequest& req) {
+    for (const auto &it : _networkInputs) {
+        Blob::Ptr blob;
+        auto &name = it.first;
+        // this request is already in BUSY state, so using the internal functions safely
+        GetBlob(name.c_str(), blob);
+        req.SetBlob(name.c_str(), blob);
+    }
+    for (const auto &it : _networkOutputs) {
+        Blob::Ptr blob;
+        auto &name = it.first;
+        // this request is already in BUSY state, so using the internal functions safely
+        GetBlob(name.c_str(), blob);
+        req.SetBlob(name.c_str(), blob);
+    }
+}
+
+MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
+    const MultiDeviceInferRequest::Ptr&         inferRequest,
+    const bool                                  needPerfCounters,
+    const MultiDeviceExecutableNetwork::Ptr&    multiDeviceExecutableNetwork,
+    const ITaskExecutor::Ptr&                   callbackExecutor) :
+    AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor),
+    _multiDeviceExecutableNetwork{multiDeviceExecutableNetwork},
+    _inferRequest{inferRequest},
+    _needPerfCounters{needPerfCounters} {
+    struct ThisRequestExecutor : public ITaskExecutor {
+        explicit ThisRequestExecutor(MultiDeviceAsyncInferRequest* _this_) : _this{_this_} {}
+        void run(Task task) override {
+            auto workerInferRequest = _this->_workerInferRequest;
+            workerInferRequest->_task = std::move(task);
+            workerInferRequest->_inferRequest.StartAsync();
+        };
+        MultiDeviceAsyncInferRequest* _this = nullptr;
+    };
+    _pipeline = {
+        {_multiDeviceExecutableNetwork, [this] {
+            _workerInferRequest = MultiDeviceExecutableNetwork::_thisWorkerInferRequest;
+            _inferRequest->SetBlobsToAnotherRequest(_workerInferRequest->_inferRequest);
+        }},
+        {std::make_shared<ThisRequestExecutor>(this), [this] {
+            auto status = _workerInferRequest->_status;
+            if (InferenceEngine::StatusCode::OK != status) {
+                if (nullptr != InferenceEngine::CurrentException()) {
+                    std::rethrow_exception(InferenceEngine::CurrentException());
+                } else {
+                    THROW_IE_EXCEPTION << InferenceEngine::details::as_status << status;
+                }
+            }
+            if (_needPerfCounters) {
+                _perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts();
+            }
+        }}
+    };
+}
+
+void MultiDeviceAsyncInferRequest::Infer_ThreadUnsafe() {
+    InferUsingAsync();
+}
+
+void MultiDeviceAsyncInferRequest::GetPerformanceCounts_ThreadUnsafe(std::map<std::string, InferenceEngineProfileInfo> &perfMap) const {
+    perfMap = std::move(_perfMap);
+}
+
+MultiDeviceAsyncInferRequest::~MultiDeviceAsyncInferRequest() {
+    StopAndWait();
+}
+
+// ------------------------------MultiDeviceExecutableNetwork----------------------------
+
+thread_local MultiDeviceExecutableNetwork::WorkerInferRequest* MultiDeviceExecutableNetwork::_thisWorkerInferRequest = nullptr;
+
+struct IdleGuard {
+    explicit IdleGuard(MultiDeviceExecutableNetwork::WorkerInferRequest* workerInferRequestPtr,
+                       MultiDeviceExecutableNetwork::NotBusyWorkerRequests& notBusyWorkerRequests) :
+        _workerInferRequestPtr{workerInferRequestPtr},
+        _notBusyWorkerRequests{&notBusyWorkerRequests} {
+    }
+    ~IdleGuard() {
+        if (nullptr != _notBusyWorkerRequests) {
+            _notBusyWorkerRequests->push(_workerInferRequestPtr);
+        }
+    }
+    MultiDeviceExecutableNetwork::NotBusyWorkerRequests* Release() {
+        auto notBusyWorkerRequests = _notBusyWorkerRequests;
+        _notBusyWorkerRequests = nullptr;
+        return notBusyWorkerRequests;
+    }
+    MultiDeviceExecutableNetwork::WorkerInferRequest*     _workerInferRequestPtr = nullptr;
+    MultiDeviceExecutableNetwork::NotBusyWorkerRequests*  _notBusyWorkerRequests = nullptr;
+};
+
+MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>&                 networksPerDevice,
+                                                           const DeviceMap<DeviceInformation>&                                  networkDevices,
+                                                           const std::unordered_map<std::string, InferenceEngine::Parameter>&   config,
+                                                           const bool                                                           needPerfCounters) :
+    InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
+    _devicePriorities{networkDevices},
+    _networksPerDevice{networksPerDevice},
+    _config{config},
+    _needPerfCounters{needPerfCounters} {
+    _taskExecutor.reset();
+    for (auto&& networkValue : _networksPerDevice) {
+        auto& device  = networkValue.first;
+        auto& network = networkValue.second;
+
+        auto itNumRequests = _devicePriorities.find(device);
+        unsigned int optimalNum = 0;
+        try {
+            optimalNum = network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+        } catch (const details::InferenceEngineException &iie) {
+            THROW_IE_EXCEPTION
+                    << "Every device used with the Multi-Device should "
+                    << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
+                    << "Failed to query the metric for the " << device << " with error:" << iie.what();
+        }
+        const auto numRequests = (_devicePriorities.end() == itNumRequests ||
+            itNumRequests->second.numRequestsPerDevices == -1) ? optimalNum : itNumRequests->second.numRequestsPerDevices;
+        auto& workerRequests = _workerRequests[device];
+        auto& idleWorkerRequests = _idleWorkerRequests[device];
+        workerRequests.resize(numRequests);
+        auto* idleWorkerRequestsPtr = &(idleWorkerRequests);
+        for (auto&& workerRequest : workerRequests) {
+            workerRequest._inferRequest = network.CreateInferRequest();
+            auto* workerRequestPtr = &workerRequest;
+            idleWorkerRequests.push(workerRequestPtr);
+            workerRequest._inferRequest.SetCompletionCallback<std::function<void(InferRequest, StatusCode)>>(
+                [workerRequestPtr, this, device, idleWorkerRequestsPtr] (InferRequest , StatusCode status) mutable {
+                    IdleGuard idleGuard{workerRequestPtr, *idleWorkerRequestsPtr};
+                    workerRequestPtr->_status = status;
+                    {
+                        auto capturedTask = std::move(workerRequestPtr->_task);
+                        capturedTask();
+                    }
+                    if (!_terminate) {
+                        idleGuard.Release()->push(workerRequestPtr);
+                        ScheduleToWorkerInferRequest();
+                    }
+                });
+        }
+    }
+}
+
+void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest() {
+    auto devices = [&] {
+        std::lock_guard<std::mutex> lock(_mutex);
+        return _devicePriorities;
+    }();
+    for (auto&& device : devices) {
+        auto& idleWorkerRequests = _idleWorkerRequests[device.first];
+        WorkerInferRequest* workerRequestPtr = nullptr;
+        if (idleWorkerRequests.try_pop(workerRequestPtr)) {
+            IdleGuard idleGuard{workerRequestPtr, idleWorkerRequests};
+            Task inferPipelineTask;
+            if (_inferPipelineTasks.try_pop(inferPipelineTask)) {
+                _thisWorkerInferRequest = workerRequestPtr;
+                inferPipelineTask();
+                idleGuard.Release();
+                break;
+            }
+        }
+    }
+}
+
+void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
+    if (!_terminate) {
+        _inferPipelineTasks.push(std::move(inferPipelineTask));
+        ScheduleToWorkerInferRequest();
+    }
+}
+
+MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
+    {
+        std::lock_guard<std::mutex> lock(_mutex);
+        _devicePriorities.clear();
+    }
+    _terminate = true;
+    /* NOTE: The only threads that use `MultiDeviceExecutableNetwork` Context are those that are used by Worker infer requests.
+     *       But AsyncInferRequest destructor should waits for all asynchronous tasks that are used by the request
+     */
+    _workerRequests.clear();
+}
+
+InferenceEngine::InferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+                                                                                                InferenceEngine::OutputsDataMap networkOutputs) {
+    return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs);
+}
+
+void MultiDeviceExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
+    auto syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
+    syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
+    auto asyncTreadSafeImpl = std::make_shared<MultiDeviceAsyncInferRequest>(std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl),
+                                                                             _needPerfCounters,
+                                                                             std::static_pointer_cast<MultiDeviceExecutableNetwork>(shared_from_this()),
+                                                                             _callbackExecutor);
+    asyncRequest.reset(new InferRequestBase<MultiDeviceAsyncInferRequest>(asyncTreadSafeImpl), [](IInferRequest *p) { p->Release(); });
+    asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
+}
+
+void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config,
+        InferenceEngine::ResponseDesc * /* resp */) {
+    auto priorities = config.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+    if (priorities == config.end() || config.size() > 1) {
+        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str <<
+            "The only config supported for the Network's SetConfig is MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES";
+    } else {
+        auto multiPlugin = std::dynamic_pointer_cast<MultiDeviceInferencePlugin>(this->_plugin);
+        assert(multiPlugin != nullptr);
+        auto metaDevices = multiPlugin->ParseMetaDevices(priorities->second, {});
+
+        if (std::any_of(metaDevices.begin(), metaDevices.end(), [](const std::pair<DeviceName, DeviceInformation> & kvp) {
+                return kvp.second.numRequestsPerDevices != -1;
+            })) {
+            THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "You can only change device priorities but not number of requests"
+                     <<" with the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES!";
+        }
+
+        {
+            std::lock_guard<std::mutex> lock{_mutex};
+            for (auto && device : metaDevices) {
+                if (_devicePriorities.find(device.first) == _devicePriorities.end()) {
+                    THROW_IE_EXCEPTION << NOT_FOUND_str << "You can only change device priorities but not add new devices with"
+                        << " the Network's SetConfig(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES." << device.first <<
+                            " device was not in the original device list!";
+                }
+            }
+            _devicePriorities = metaDevices;
+
+            // update value in config
+            _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
+        }
+    }
+}
+
+void MultiDeviceExecutableNetwork::GetConfig(const std::string &name, InferenceEngine::Parameter &result,
+        InferenceEngine::ResponseDesc * /* resp */) const {
+    auto res = _config.find(name);
+    if (res != _config.end()) {
+        result =  res->second;
+    } else {
+        THROW_IE_EXCEPTION << NOT_FOUND_str << name <<" not found in the ExecutableNetwork config";
+    }
+}
+
+void MultiDeviceExecutableNetwork::GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const {
+    if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
+        unsigned int res = 0u;
+        for (auto n : _networksPerDevice) {
+            try {
+                res += n.second.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+            } catch (const details::InferenceEngineException &iie) {
+                  THROW_IE_EXCEPTION
+                        << "Every device used with the Multi-Device should "
+                        << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
+                        << "Failed to query the metric for the " << n.first << " with error:" << iie.what();
+           }
+        }
+        result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, res);
+    } else if (name == METRIC_KEY(NETWORK_NAME)) {
+        auto it = _networksPerDevice.begin();
+        IE_ASSERT(it != _networksPerDevice.end());
+        result = IE_SET_METRIC(NETWORK_NAME, it->second.GetMetric(
+            METRIC_KEY(NETWORK_NAME)).as<std::string>());
+    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
+        result = IE_SET_METRIC(SUPPORTED_METRICS, {
+            METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
+            METRIC_KEY(SUPPORTED_METRICS),
+            METRIC_KEY(NETWORK_NAME),
+            METRIC_KEY(SUPPORTED_CONFIG_KEYS)
+        });
+    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+        std::vector<std::string> configKeys = { MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES };
+        result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported Network metric: " << name;
+    }
+}
+
+// ------------------------------MultiDeviceInferencePlugin----------------------------
+
+namespace {
+
+std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
+                                                const std::map<std::string, std::string> & local) {
+    for (auto && kvp : local) {
+        config[kvp.first] = kvp.second;
+    }
+    return config;
+}
+
+}  // namespace
+
+std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
+    const std::map<std::string, std::string> & config, const std::string & deviceName) const {
+    std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+    std::map<std::string, std::string> supportedConfig;
+    for (auto&& key : supportedConfigKeys) {
+        auto itKey = config.find(key);
+        if (config.end() != itKey) {
+            supportedConfig[key] = itKey->second;
+        }
+    }
+    return supportedConfig;
+}
+
+DeviceMap<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(const std::string& priorities,
+                                                                          const std::map<std::string, std::string> & config) const {
+    DeviceMap<DeviceInformation> metaDevices;
+
+    // parsing the string and splitting to tokens
+    std::vector<std::string> devicesWithRequests;
+    // parsing the string and splitting the comma-separated tokens
+    std::string::size_type i = 0;
+    std::string::size_type idelimeter;
+    while ((idelimeter = priorities.find(',', i)) != std::string::npos) {
+        devicesWithRequests.push_back(priorities.substr(i, idelimeter - i));
+        i = idelimeter + 1;
+    }
+    // last token in the string (which has no comma after that)
+    devicesWithRequests.push_back(priorities.substr(i, priorities.length() - i));
+
+    auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
+        DeviceIDParser deviceParser(deviceWithID);
+        std::string deviceName = deviceParser.getDeviceName();
+        std::map<std::string, std::string> tconfig = mergeConfigs(_config, config);
+
+        // set device ID if any
+        std::string deviceIDLocal = deviceParser.getDeviceID();
+        if (!deviceIDLocal.empty()) {
+            tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
+        }
+
+        return GetSupportedConfig(tconfig, deviceName);
+    };
+
+    for (auto && d : devicesWithRequests) {
+        auto openingBracket = d.find_first_of('(');
+        auto closingBracket = d.find_first_of(')', openingBracket);
+        auto device_name = d.substr(0, openingBracket);
+
+        int numRequests = -1;
+        if (closingBracket != std::string::npos && openingBracket < closingBracket) {
+            numRequests = std::stol(d.substr(openingBracket + 1, closingBracket - 1));
+
+            if (numRequests <= 0) {
+                THROW_IE_EXCEPTION << "Priority value for '" << device_name << "' must be > 0, while " << numRequests
+                    << "is passed";
+            }
+        }
+
+        // create meta device
+        metaDevices[device_name] = { getDeviceConfig(device_name), numRequests };
+    }
+
+    return metaDevices;
+}
+
+Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
+        const std::map<std::string, Parameter> & options) const {
+    if (name == MULTI_CONFIG_KEY(DEVICE_PRIORITIES)) {
+        auto it = _config.find(MULTI_CONFIG_KEY(DEVICE_PRIORITIES));
+        if (it == _config.end()) {
+            THROW_IE_EXCEPTION << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
+        } else {
+            return { it->second };
+        }
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported config key: " << name;
+    }
+}
+
+void MultiDeviceInferencePlugin::SetConfig(const std::map<std::string, std::string> & config) {
+    for (auto && kvp : config) {
+        _config[kvp.first] = kvp.second;
+    }
+}
+
+IE_SUPPRESS_DEPRECATED_START
+
+INFERENCE_PLUGIN_API(InferenceEngine::StatusCode) CreatePluginEngine(
+        InferenceEngine::IInferencePlugin *&plugin,
+        InferenceEngine::ResponseDesc *resp) noexcept {
+    try {
+        plugin = make_ie_compatible_plugin(
+                {{2, 1},
+                 CI_BUILD_NUMBER,
+                 "MultiDevicePlugin"}, std::make_shared<MultiDeviceInferencePlugin>());
+        return OK;
+    }
+    catch (std::exception &ex) {
+        return DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
+    }
+}
+
+IE_SUPPRESS_DEPRECATED_END
+
+MultiDeviceInferencePlugin::MultiDeviceInferencePlugin() {
+    _pluginName = "MULTI";
+}
+
+InferenceEngine::Parameter MultiDeviceInferencePlugin::GetMetric(const std::string& name,
+                                         const std::map<std::string, InferenceEngine::Parameter> & options) const {
+    if (name == METRIC_KEY(SUPPORTED_METRICS)) {
+        std::vector<std::string> metrics;
+        metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
+        metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME));
+        metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
+    } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
+        std::string name = { "MULTI" };
+        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
+    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+        std::vector<std::string> configKeys = { MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES };
+        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported metric key " << name;
+    }
+}
+
+ExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const ICNNNetwork &network,
+                                                                              const std::map<std::string, std::string>& config) {
+    if (GetCore() == nullptr) {
+        THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object";
+    }
+
+    // TODO: do we really need a clone?
+    ICNNNetwork::Ptr clonedNetwork = cloneNet(network);
+
+    auto fullConfig = mergeConfigs(_config, config);
+    auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+    if (priorities == fullConfig.end()) {
+        THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
+    }
+
+    DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
+
+    // collect the settings that are applicable to the devices we are loading the network to
+    std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
+    multiNetworkConfig.insert(*priorities);
+
+    DeviceMap<ExecutableNetwork> executableNetworkPerDevice;
+    for (auto& p : metaDevices) {
+        auto & deviceName = p.first;
+        auto & metaDevice = p.second;
+        auto & deviceConfig = metaDevice.config;
+        executableNetworkPerDevice.insert({ deviceName, GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfig) });
+        multiNetworkConfig.insert(deviceConfig.begin(), deviceConfig.end());
+    }
+    if (executableNetworkPerDevice.empty())
+        THROW_IE_EXCEPTION << NOT_FOUND_str << "Failed to load Executable network to any device "
+                                            <<  "that the MULTI device is initialized to work with";
+
+    auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
+    bool enablePerfCounters = (fullConfig.end() != perfConfig) && (perfConfig->second == PluginConfigParams::YES);
+
+    return std::make_shared<MultiDeviceExecutableNetwork>(executableNetworkPerDevice,
+                                                          metaDevices,
+                                                          multiNetworkConfig,
+                                                          enablePerfCounters);
+}
+
+void MultiDeviceInferencePlugin::QueryNetwork(const ICNNNetwork&                        network,
+                                              const std::map<std::string, std::string>& config,
+                                              QueryNetworkResult&                       queryResult) const {
+    if (GetCore() == nullptr) {
+        THROW_IE_EXCEPTION << "Please, work with MULTI device via InferencEngine::Core object";
+    }
+
+    queryResult.rc = StatusCode::OK;
+    queryResult.supportedLayersMap.clear();
+
+    auto fullConfig = mergeConfigs(_config, config);
+    auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
+    if (priorities == fullConfig.end()) {
+        THROW_IE_EXCEPTION << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
+    }
+
+    DeviceMap<DeviceInformation> metaDevices = ParseMetaDevices(priorities->second, fullConfig);
+    std::map<std::string, QueryNetworkResult> queryResults;
+
+    for (auto&& value : metaDevices) {
+        auto& deviceName = value.first;
+        auto& metaDevice = value.second;
+        queryResults[deviceName] = GetCore()->QueryNetwork(network, deviceName, metaDevice.config);
+    }
+
+    details::CNNNetworkIterator i(&network);
+    while (i != details::CNNNetworkIterator()) {
+        CNNLayer::Ptr layer = *i;
+        bool layerIsInQueryResultsForAllDevices = std::all_of(std::begin(queryResults), std::end(queryResults),
+            [&](const std::map<std::string, QueryNetworkResult>::value_type& qr) {
+                return qr.second.supportedLayersMap.end() != qr.second.supportedLayersMap.find(layer->name);});
+        if (layerIsInQueryResultsForAllDevices) {
+            queryResult.supportedLayersMap[layer->name] = GetName();
+        }
+        i++;
+    }
+}
+}  // namespace MultiDevicePlugin
diff --git a/inference-engine/src/multi_device/multi_device.hpp b/inference-engine/src/multi_device/multi_device.hpp

new file mode 100644 (file)

index 0000000..29a4021
--- /dev/null
+++ b/inference-engine/src/multi_device/multi_device.hpp
@@ -0,0 +1,176 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include <atomic>
+#include <mutex>
+#include <queue>
+#include <unordered_map>
+#include <map>
+#include <vector>
+#include <utility>
+#include <memory>
+#include <string>
+
+#include <cpp/ie_plugin_cpp.hpp>
+#include <ie_plugin_dispatcher.hpp>
+#include <cpp_interfaces/impl/ie_plugin_internal.hpp>
+#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
+#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
+#include "ie_iinfer_request.hpp"
+#include "details/ie_exception_conversion.hpp"
+#include <ie_parallel.hpp>
+
+#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
+#include <tbb/concurrent_queue.h>
+#endif
+
+namespace MultiDevicePlugin {
+
+using DeviceName = std::string;
+
+struct DeviceInformation {
+    std::map<std::string, std::string> config;
+    int numRequestsPerDevices;
+};
+
+template<typename T>
+using DeviceMap = std::unordered_map<DeviceName, T>;
+
+class MultiDeviceInferRequest : public InferenceEngine::InferRequestInternal {
+public:
+    using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
+    explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap&  networkInputs,
+                                     const InferenceEngine::OutputsDataMap& networkOutputs);
+    void GetPerformanceCounts(std::map<std::string, InferenceEngineProfileInfo>&) const override {
+        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+    }
+    void InferImpl() override {
+        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+    }
+    // Multi-Device impl specific: sets the data (blobs from the device-less requets to the specific device request)
+    void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req);
+};
+
+#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
+template <typename T>
+using ThreadSafeQueue = tbb::concurrent_queue<T>;
+#else
+template <typename T>
+class ThreadSafeQueue {
+public:
+    void push(T value) {
+        std::lock_guard<std::mutex> lock(_mutex);
+        _queue.push(std::move(value));
+    }
+
+    bool try_pop(T& value) {
+        std::lock_guard<std::mutex> lock(_mutex);
+        if (!_queue.empty()) {
+            value = std::move(_queue.front());
+            _queue.pop();
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    bool empty() {
+        std::lock_guard<std::mutex> lock(_mutex);
+        return _queue.empty();
+    }
+
+protected:
+    std::queue<T>   _queue;
+    std::mutex      _mutex;
+};
+#endif
+
+class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault,
+                                     public ITaskExecutor {
+public:
+    using Ptr = std::shared_ptr<MultiDeviceExecutableNetwork>;
+    struct WorkerInferRequest {
+        InferenceEngine::InferRequest   _inferRequest;
+        Task                            _task;
+        InferenceEngine::StatusCode     _status = InferenceEngine::StatusCode::OK;
+    };
+    using NotBusyWorkerRequests = ThreadSafeQueue<WorkerInferRequest*>;
+
+    explicit MultiDeviceExecutableNetwork(const DeviceMap<InferenceEngine::ExecutableNetwork>&                  networksPerDevice,
+                                          const DeviceMap<DeviceInformation>&                                        networkDevices,
+                                          const std::unordered_map<std::string, InferenceEngine::Parameter>&    config,
+                                          const bool                                                            needPerfCounters = false);
+
+    void SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config, InferenceEngine::ResponseDesc *resp) override;
+    void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+    void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
+    void run(Task inferTask) override;
+    void CreateInferRequest(InferenceEngine::IInferRequest::Ptr& asyncRequest) override;
+    InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
+                                                                      InferenceEngine::OutputsDataMap networkOutputs) override;
+    ~MultiDeviceExecutableNetwork() override;
+
+    void ScheduleToWorkerInferRequest();
+
+    static thread_local WorkerInferRequest*                     _thisWorkerInferRequest;
+    std::atomic_bool                                            _terminate = {false};
+    std::mutex                                                  _mutex;
+    DeviceMap<DeviceInformation>                                _devicePriorities;
+    DeviceMap<InferenceEngine::ExecutableNetwork>               _networksPerDevice;
+    ThreadSafeQueue<Task>                                       _inferPipelineTasks;
+    DeviceMap<NotBusyWorkerRequests>                            _idleWorkerRequests;
+    DeviceMap<std::vector<WorkerInferRequest>>                  _workerRequests;
+    std::unordered_map<std::string, InferenceEngine::Parameter> _config;
+    bool                                                        _needPerfCounters = false;
+};
+
+class MultiDeviceAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
+public:
+    using Ptr = std::shared_ptr<MultiDeviceAsyncInferRequest>;
+
+    explicit MultiDeviceAsyncInferRequest(const MultiDeviceInferRequest::Ptr&           inferRequest,
+                                          const bool                                    needPerfCounters,
+                                          const MultiDeviceExecutableNetwork::Ptr&      multiDeviceExecutableNetwork,
+                                          const InferenceEngine::ITaskExecutor::Ptr&    callbackExecutor);
+    void Infer_ThreadUnsafe() override;
+    void GetPerformanceCounts_ThreadUnsafe(std::map<std::string, InferenceEngineProfileInfo> &_perfMap) const override;
+    ~MultiDeviceAsyncInferRequest() override;
+
+protected:
+    MultiDeviceExecutableNetwork::Ptr                                   _multiDeviceExecutableNetwork;
+    MultiDeviceInferRequest::Ptr                                        _inferRequest;
+    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>  _perfMap;
+    bool                                                                _needPerfCounters = false;
+    MultiDeviceExecutableNetwork::WorkerInferRequest*                   _workerInferRequest = nullptr;
+};
+
+class MultiDeviceInferencePlugin : public InferenceEngine::InferencePluginInternal {
+public:
+    MultiDeviceInferencePlugin();
+    ~MultiDeviceInferencePlugin() override = default;
+
+    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork& network,
+                                                                       const std::map<std::string, std::string>& config) override;
+
+    void SetConfig(const std::map<std::string, std::string>& config) override;
+    Parameter GetConfig(const std::string& name,
+                        const std::map<std::string, Parameter> & options) const override;
+    void QueryNetwork(const InferenceEngine::ICNNNetwork&       network,
+                      const std::map<std::string, std::string>& config,
+                      InferenceEngine::QueryNetworkResult&      res) const override;
+    InferenceEngine::Parameter GetMetric(const std::string& name,
+                                         const std::map<std::string, InferenceEngine::Parameter>& options) const override;
+
+    DeviceMap<DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
+                                                  const std::map<std::string, std::string> & config) const;
+
+protected:
+    std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
+                                                          const DeviceName & deviceName) const;
+};
+
+}  // namespace MultiDevicePlugin
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp

index 907ff10..fb0221b 100644 (file)
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_plugin_internal.hpp
@@ -102,7 +102,7 @@ public:
          _core = core;
      }
  
-    const ICore* GetCore() const noexcept override {
+    ICore* GetCore() const noexcept override {
          return _core;
      }
  
@@ -137,7 +137,6 @@ public:
          THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
      }
  
-
      RemoteContext::Ptr GetDefaultContext() override {
          THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
      }
@@ -154,7 +153,7 @@ protected:
       * @param config string-string map of config parameters relevant only for this load operation
       * @return Shared pointer to the ExecutableNetwork object
       */
-    virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICore* core, const ICNNNetwork& network,
+    virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICNNNetwork& network,
                                                                const std::map<std::string, std::string>& config) = 0;
  
      /**
@@ -163,16 +162,13 @@ protected:
       * @note The function is used in
       * InferencePluginInternal::LoadNetwork(const ICNNNetwork&, const std::map<std::string, std::string>&, RemoteContext::Ptr)
       * which performs common steps first and calls this plugin-dependent method implementation after.
-     * @param core A pointer to ICore interface.
       * @param network A network object
       * @param context A remote context
       * @param config string-string map of config parameters relevant only for this load operation
       * @return Shared pointer to the ExecutableNetwork object
       */
-    virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICore* core, const ICNNNetwork& network,
-                                                              RemoteContext::Ptr context,
+    virtual ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const ICNNNetwork& network, RemoteContext::Ptr context,
                                                                const std::map<std::string, std::string>& config) {
-        (void)core;
          (void)network;
          (void)context;
          (void)config;
@@ -190,21 +186,21 @@ protected:
      void cloneAndCreateExecutableNetwork(IExecutableNetwork::Ptr& executableNetwork, const ICNNNetwork& network,
                                           const std::map<std::string, std::string>& config,
                                           RemoteContext::Ptr context = nullptr) {
-        InputsDataMap networkInputs;
-        OutputsDataMap networkOutputs;
+        InputsDataMap networkInputs, networkInputsCloned;
+        OutputsDataMap networkOutputs, networkOutputsCloned;
          network.getInputsInfo(networkInputs);
          network.getOutputsInfo(networkOutputs);
-        copyInputOutputInfo(networkInputs, networkOutputs, _networkInputs, _networkOutputs);
+        copyInputOutputInfo(networkInputs, networkOutputs, networkInputsCloned, networkOutputsCloned);
  
          ExecutableNetworkInternal::Ptr impl;
          if (nullptr == context) {
-            impl = LoadExeNetworkImpl(GetCore(), network, config);
+            impl = LoadExeNetworkImpl(network, config);
          } else {
-            impl = LoadExeNetworkImpl(GetCore(), network, context, config);
+            impl = LoadExeNetworkImpl(network, context, config);
          }
  
-        impl->setNetworkInputs(_networkInputs);
-        impl->setNetworkOutputs(_networkOutputs);
+        impl->setNetworkInputs(networkInputsCloned);
+        impl->setNetworkOutputs(networkOutputsCloned);
          impl->SetPointerToPluginInternal(shared_from_this());
  
          executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(impl), [](details::IRelease* p) {
@@ -243,8 +239,6 @@ protected:
      }
  
      std::string _pluginName;  //!< A device name that plugins enables
-    InferenceEngine::InputsDataMap _networkInputs;  //!< Holds information about network inputs info
-    InferenceEngine::OutputsDataMap _networkOutputs;  //!< Holds information about network outputs data
      std::map<std::string, std::string> _config;  //!< A map config keys -> values
      ICore* _core = nullptr;  //!< A pointer to ICore interface
  };
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp

index bf80615..b82758b 100644 (file)
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -217,7 +217,7 @@ public:
       * @brief Gets reference to ICore interface
       * @return Reference to ICore interface
       */
-    virtual const ICore* GetCore() const noexcept = 0;
+    virtual ICore* GetCore() const noexcept = 0;
  
      /**
       * @brief      Queries a plugin about support layers in network
diff --git a/inference-engine/src/plugin_api/ie_icore.hpp b/inference-engine/src/plugin_api/ie_icore.hpp

index 40dcf88..d773a3e 100644 (file)
--- a/inference-engine/src/plugin_api/ie_icore.hpp
+++ b/inference-engine/src/plugin_api/ie_icore.hpp
@@ -32,6 +32,7 @@ public:
      virtual std::shared_ptr<ITaskExecutor> GetTaskExecutor() const = 0;
  
      /**
+     * @deprecated Use ICore::GetMetric, ICore::LoadNetwork, ICore::QueryNetwork instead
       * @brief Returns reference to plugin by a device name
       * @param deviceName - a name of device
       * @return Reference to plugin
@@ -56,6 +57,55 @@ public:
      virtual CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const = 0;
  
      /**
+     * @brief Creates an executable network from a network object.
+     *
+     * Users can create as many networks as they need and use
+     *        them simultaneously (up to the limitation of the hardware resources)
+     *
+     * @param network CNNNetwork object acquired from Core::ReadNetwork
+     * @param deviceName Name of device to load network to
+     * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
+     * operation
+     * @return An executable network reference
+     */
+    virtual ExecutableNetwork LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
+                                          const std::map<std::string, std::string>& config = {}) = 0;
+
+    /**
+     * @brief Creates an executable network from a previously exported network
+     * @param deviceName Name of device load executable network on
+     * @param networkModel network model stream
+     * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
+     * operation*
+     * @return An executable network reference
+     */
+    virtual ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName = {},
+                                            const std::map<std::string, std::string>& config = {}) = 0;
+
+    /**
+     * @brief Query device if it supports specified network with specified configuration
+     *
+     * @param deviceName A name of a device to query
+     * @param network Network object to query
+     * @param config Optional map of pairs: (config parameter name, config parameter value)
+     * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
+     */
+    virtual QueryNetworkResult QueryNetwork(const ICNNNetwork& network, const std::string& deviceName,
+                                            const std::map<std::string, std::string>& config) const = 0;
+
+    /**
+     * @brief Gets general runtime metric for dedicated hardware.
+     *
+     * The method is needed to request common device properties
+     * which are executable network agnostic. It can be device name, temperature, other devices-specific values.
+     *
+     * @param deviceName - A name of a device to get a metric value.
+     * @param name - metric name to request.
+     * @return Metric value corresponding to metric key.
+     */
+    virtual Parameter GetMetric(const std::string& deviceName, const std::string& name) const = 0;
+
+    /**
       * @brief Default virtual destructor
       */
      virtual ~ICore() = default;
diff --git a/inference-engine/src/plugin_api/ie_profiling.hpp b/inference-engine/src/plugin_api/ie_profiling.hpp

index 74ee2f2..aee0bbe 100644 (file)
--- a/inference-engine/src/plugin_api/ie_profiling.hpp
+++ b/inference-engine/src/plugin_api/ie_profiling.hpp
@@ -138,7 +138,7 @@ inline static void annotateEnd(IttTaskHandles& h, IttBlock&) {
  #define IE_STR(x) IE_STR_(x)
  #define IE_STR_(x) #x
  
-class ProfilingTask;
+struct ProfilingTask;
  
  struct IttStatic {};
  
@@ -179,8 +179,8 @@ struct ProfilingTask {
      }
  
  private:
-friend void annotateBegin(IttStatic&, IttProfilingTask& t);
-friend void annotateEnd(IttStatic&, IttProfilingTask& t);
+    friend void annotateBegin(IttStatic&, IttProfilingTask& t);
+    friend void annotateEnd(IttStatic&, IttProfilingTask& t);
  
      std::string name;
  #ifdef ENABLE_PROFILING_ITT
diff --git a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp

index 6f1525b..f5e53cc 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp
@@ -44,6 +44,8 @@ public:
  
      void run(Task task) override;
  
+    void Execute(Task task) override;
+
      int GetStreamId() override;
  
      int GetNumaNodeId() override;
diff --git a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp

index d53c6e4..449ab52 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp
@@ -121,6 +121,12 @@ public:
      * @return `ID` of current NUMA Node, or throws exceptions if called not from stream thread
      */
      virtual int  GetNumaNodeId() = 0;
+
+    /**
+    * @brief Execute the task in the current thread using streams executor configuration and constraints
+    * @param task A task to start
+    */
+    virtual void Execute(Task task) = 0;
  };
  
  
diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt

index 5943b29..4c78228 100644 (file)
--- a/inference-engine/src/preprocessing/CMakeLists.txt
+++ b/inference-engine/src/preprocessing/CMakeLists.txt
@@ -17,15 +17,15 @@ file(GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
  #
  function(ie_avx512_core_optimization_flags flags)
      if(WIN32)
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "/QxCORE-AVX512" PARENT_SCOPE)
-        elseif(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
              set(${flags} "/arch:AVX512" PARENT_SCOPE)
          else()
              message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
          endif()
      else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL Intel)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
              set(${flags} "-xCORE-AVX512" PARENT_SCOPE)
          else()
              set(${flags} "-mavx512f -mavx512bw -mavx512dq -mfma" PARENT_SCOPE)
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/common_optimizations_tbl.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/common_optimizations_tbl.hpp

index 53103c4..c0a53fc 100644 (file)
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/common_optimizations_tbl.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/common_optimizations_tbl.hpp
@@ -14,4 +14,14 @@
  //   #include <transformations/transformations_tbl.hpp>
  //   #undef NGRAPH_PASS
  
-NGRAPH_PASS(NopElimination, ::ngraph::pass)
+// This pass must be called first in pipeline
+NGRAPH_PASS(InitNodeInfo, ::ngraph::pass)
+NGRAPH_PASS(ConvertPriorBox, ::ngraph::pass)  // WA: ConvertPriorBox must be executed before CF
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(RemoveFilteringBoxesBySize, ::ngraph::pass) // Resolves dynamism (replaces NonZero), CF needed
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(StridedSliceOptimization, ::ngraph::pass) // depends on CF
+NGRAPH_PASS(NopElimination, ::ngraph::pass) // may introduce fake dynamism
+NGRAPH_PASS(AlgebraicSimplification, ::ngraph::pass) // may introduce fake dynamism
+NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
+NGRAPH_PASS(ConvertScatterElementsToScatter, ::ngraph::pass) // partially depends on CF
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp

index 8fc8839..49f115d 100644 (file)
--- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy_tbl.hpp
@@ -14,8 +14,6 @@
  //   #include <transformations/transformations_tbl.hpp>
  //   #undef NGRAPH_PASS
  
-NGRAPH_PASS(InitNodeInfo, ::ngraph::pass)
-NGRAPH_PASS(ConvertPriorBox, ::ngraph::pass)
  NGRAPH_PASS(ConstantFolding, ::ngraph::pass)
  NGRAPH_PASS(ConvertReduceToPooling, ::ngraph::pass)
  NGRAPH_PASS(ConvertMod, ::ngraph::pass)
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_broadcast3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_broadcast3.hpp

new file mode 100644 (file)

index 0000000..45ca68c
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_broadcast3.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertBroadcast3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertBroadcast3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+    ConvertBroadcast3() : GraphRewrite() {
+        convert_broadcast3();
+    }
+
+private:
+    void convert_broadcast3();
+};
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_nms3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_nms3.hpp

new file mode 100644 (file)

index 0000000..22df5fc
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_nms3.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertNMS3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertNMS3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+    ConvertNMS3() : GraphRewrite() {
+        convert_nms3();
+    }
+
+private:
+    void convert_nms3();
+};
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp

new file mode 100644 (file)

index 0000000..b7c6b98
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ie_api.h>
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertOpSet3ToOpSet2);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertOpSet3ToOpSet2: public ngraph::pass::FunctionPass, public ngraph::pass::PassParam {
+public:
+    explicit ConvertOpSet3ToOpSet2(const PassParam::param_callback & callback = PassParam::getDefaultCallback())
+            : FunctionPass(), PassParam(callback) {}
+
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp

new file mode 100644 (file)

index 0000000..796642e
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef NGRAPH_PASS
+#warning "NGRAPH_PASS is not defined"
+#define NGRAPH_PASS(A, B)
+#endif
+
+// To register new pass you need to define NGRAPH_PASS
+// Usage example:
+//   ngraph::pass:Manager pm;
+//   #define NGRAPH_PASS(NAME, NAMESPACE)   pm.register_pass<NAMESPACE::NAME>();
+//   #include <transformations/transformations_tbl.hpp>
+//   #undef NGRAPH_PASS
+
+NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
+NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
+NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
+NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)
+
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shapeof3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shapeof3.hpp

new file mode 100644 (file)

index 0000000..781c710
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shapeof3.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertShapeOf3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertShapeOf3: public ngraph::pass::GraphRewrite {
+public:
+    ConvertShapeOf3() : GraphRewrite() {
+        convert_shapeof3();
+    }
+
+private:
+    void convert_shapeof3();
+};
diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_topk3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_topk3.hpp

new file mode 100644 (file)

index 0000000..0fff6ed
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_topk3.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include "transformations/utils/pass_param.hpp"
+
+namespace ngraph {
+namespace pass {
+
+    class INFERENCE_ENGINE_API_CLASS(ConvertTopK3);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertTopK3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
+public:
+    ConvertTopK3() : GraphRewrite() {
+        convert_topk3();
+    }
+
+private:
+    void convert_topk3();
+};
diff --git a/inference-engine/src/transformations/include/transformations/convert_scatter_elements_to_scatter.hpp b/inference-engine/src/transformations/include/transformations/convert_scatter_elements_to_scatter.hpp

new file mode 100644 (file)

index 0000000..f89d3c4
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/convert_scatter_elements_to_scatter.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class INFERENCE_ENGINE_API_CLASS(ConvertScatterElementsToScatter);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertScatterElementsToScatter: public ngraph::pass::GraphRewrite {
+public:
+    ConvertScatterElementsToScatter() : GraphRewrite() {
+        convert_scatter_elements_to_scatter();
+    }
+
+private:
+    void convert_scatter_elements_to_scatter();
+};
diff --git a/inference-engine/src/transformations/include/transformations/optimize_strided_slice.hpp b/inference-engine/src/transformations/include/transformations/optimize_strided_slice.hpp

new file mode 100644 (file)

index 0000000..13741ad
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/optimize_strided_slice.hpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/slice_plan.hpp>
+#include <ngraph/util.hpp>
+
+namespace ngraph {
+namespace pass {
+
+        class INFERENCE_ENGINE_API_CLASS(StridedSliceOptimization);
+        class INFERENCE_ENGINE_API_CLASS(UselessStridedSliceEraser);
+        class INFERENCE_ENGINE_API_CLASS(SharedStridedSliceEraser);
+        class INFERENCE_ENGINE_API_CLASS(GroupedStridedSliceOptimizer);
+    }  // namespace pass
+}  // namespace ngraph
+
+
+/*
+ * Description:
+ *      UselessStridedSliceEraser transformation removes StridedSlice operations
+ *      with equal input and output shapes.
+ */
+
+class ngraph::pass::UselessStridedSliceEraser: public ngraph::pass::FunctionPass {
+public:
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+/*
+ * Description:
+ *      SharedStridedSliceEraser replaces group of StridedSlice operations with first
+ *      StridedSlice in this group. All SrtideSluces in this group must be equal and
+ *      consume the same output port.
+ */
+
+class ngraph::pass::SharedStridedSliceEraser: public ngraph::pass::FunctionPass {
+public:
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+/*
+ * Description:
+ *      GroupedStridedSliceOptimizer replaces group of StridedSlice operations with VariadicSplit
+ *      All StridedSlice operations must slice data with the same axis and stride = 1.
+ */
+
+class ngraph::pass::GroupedStridedSliceOptimizer: public ngraph::pass::FunctionPass {
+public:
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
+};
+
+class ngraph::pass::StridedSliceOptimization: public ngraph::pass::FunctionPass {
+public:
+    bool run_on_function(std::shared_ptr<ngraph::Function> f) override {
+        bool rewritten = UselessStridedSliceEraser().run_on_function(f);
+        rewritten |= SharedStridedSliceEraser().run_on_function(f);
+        rewritten |= GroupedStridedSliceOptimizer().run_on_function(f);
+        return rewritten;
+    }
+};
diff --git a/inference-engine/src/transformations/include/transformations/remove_filtering_boxes_by_size.hpp b/inference-engine/src/transformations/include/transformations/remove_filtering_boxes_by_size.hpp

new file mode 100644 (file)

index 0000000..316c799
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/remove_filtering_boxes_by_size.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <ie_api.h>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+        class INFERENCE_ENGINE_API_CLASS(RemoveFilteringBoxesBySize);
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::RemoveFilteringBoxesBySize: public ngraph::pass::GraphRewrite {
+public:
+    RemoveFilteringBoxesBySize() : GraphRewrite() {
+        remove_filtering_boxes_by_size();
+    }
+
+private:
+    void remove_filtering_boxes_by_size();
+};
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp

index f791470..467f076 100644 (file)
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -2,12 +2,20 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
-#include "transformations/common_optimizations/common_optimizations.hpp"
-
  #include <memory>
  
+#include "transformations/common_optimizations/common_optimizations.hpp"
+#include "transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp"
+#include "transformations/optimize_strided_slice.hpp"
+#include "transformations/convert_scatter_elements_to_scatter.hpp"
+#include "transformations/remove_filtering_boxes_by_size.hpp"
+#include "transformations/init_node_info.hpp"
+
  #include <ngraph/pass/manager.hpp>
  #include <ngraph/pass/nop_elimination.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+
  
  bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::Function> f) {
      ngraph::pass::Manager CommonOptimizations;
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp

index 09857f0..28dbb08 100644 (file)
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
@@ -27,7 +27,6 @@
  #include <transformations/convert_opset1_to_legacy/convert_power_to_power_ie.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_prelu_to_relu_ie.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_proposal_to_proposal_ie.hpp>
-#include <transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
  #include <transformations/convert_reduce_to_pooling.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.hpp>
  #include <transformations/convert_subtract.hpp>
@@ -47,7 +46,6 @@
  #include <transformations/pull_transpose_through_fq.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_strided_slice_to_strided_slice_ie.hpp>
  #include <transformations/convert_opset1_to_legacy/convert_hard_sigmoid_to_hard_sigmoid_ie.hpp>
-#include <transformations/init_node_info.hpp>
  
  #include <ngraph/pass/constant_folding.hpp>
  #include <ngraph/pass/manager.hpp>
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.cpp

index 01954ec..d0bbcc1 100644 (file)
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.cpp
@@ -7,6 +7,7 @@
  #include <memory>
  #include <vector>
  
+#include <ngraph/opsets/opset3.hpp>
  #include <ngraph/opsets/opset1.hpp>
  
  #include <ngraph_ops/prior_box_ie.hpp>
@@ -108,12 +109,31 @@ void ngraph::pass::ConvertPriorBox::convert_prior_box() {
              input_2 = convert2->input_value(0).get_node_shared_ptr();
          }
  
-        auto shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
-        auto shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
+        // the input can be either ShapeOf-1 or ShapeOf-3
+        std::shared_ptr<ngraph::op::Op> shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
+        std::shared_ptr<ngraph::op::Op> shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
  
          if (!shape_of1 || !shape_of2) {
+            shape_of1 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_1);
+            shape_of2 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_2);
+        }
+        if (!shape_of1 || !shape_of2) {
              return false;
          }
+        // keep this code for a while if will decide to run this transformation again in the opset1->legacy
+        // the input can be either ShapeOf or Convert(ShapeOf)
+//        if (!shape_of1 || !shape_of2) {
+//            auto shapeof1_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_1);
+//            auto shapeof2_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_2);
+//            if (!shapeof1_convert || !shapeof2_convert)
+//                return false;
+//            shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof1_convert->input_value(0).get_node_shared_ptr());
+//            shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof2_convert->input_value(0).get_node_shared_ptr());
+//            if (!shape_of1 || !shape_of2)
+//                return false;
+//            ops_to_replace.push_back(shapeof1_convert);
+//            ops_to_replace.push_back(shapeof2_convert);
+//        }
  
          ops_to_replace.push_back(shape_of1);
          ops_to_replace.push_back(shape_of2);
@@ -228,12 +248,31 @@ void ngraph::pass::ConvertPriorBox::convert_prior_box_clustered() {
              input_2 = convert2->input_value(0).get_node_shared_ptr();
          }
  
-        auto shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
-        auto shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
+        // the input can be either ShapeOf-1 or ShapeOf-3
+        std::shared_ptr<ngraph::op::Op> shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_1);
+        std::shared_ptr<ngraph::op::Op> shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf> (input_2);
  
          if (!shape_of1 || !shape_of2) {
+            shape_of1 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_1);
+            shape_of2 = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(input_2);
+        }
+        if (!shape_of1 || !shape_of2) {
              return false;
          }
+        // keep this code for a while if will decide to run this transformation again in the opset1->legacy
+        // the input can be either ShapeOf or Convert(ShapeOf)
+//        if (!shape_of1 || !shape_of2) {
+//            auto shapeof1_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_1);
+//            auto shapeof2_convert = std::dynamic_pointer_cast<ngraph::opset1::Convert> (input_2);
+//            if (!shapeof1_convert || !shapeof2_convert)
+//                return false;
+//            shape_of1 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof1_convert->input_value(0).get_node_shared_ptr());
+//            shape_of2 = std::dynamic_pointer_cast<ngraph::opset1::ShapeOf>(shapeof2_convert->input_value(0).get_node_shared_ptr());
+//            if (!shape_of1 || !shape_of2)
+//                return false;
+//            ops_to_replace.push_back(shapeof1_convert);
+//            ops_to_replace.push_back(shapeof2_convert);
+//        }
  
          ops_to_replace.push_back(shape_of1);
          ops_to_replace.push_back(shape_of2);
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_broadcast3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_broadcast3.cpp

new file mode 100644 (file)

index 0000000..7e78ba6
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_broadcast3.cpp
@@ -0,0 +1,61 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/rt_info.hpp>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+void ngraph::pass::ConvertBroadcast3::convert_broadcast3() {
+    auto weights = std::make_shared<pattern::op::Label>(element::f32, Shape {1});
+    auto shp = std::make_shared<pattern::op::Label>(element::i64, Shape {1});
+    auto axes = std::make_shared<pattern::op::Label>(element::i64, Shape {1});
+    auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(weights, shp, axes);
+
+    auto broadcast_no_axes = std::make_shared<ngraph::opset3::Broadcast>(weights, shp);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+        auto broadcast = std::dynamic_pointer_cast<ngraph::opset3::Broadcast>(m.get_match_root());
+        if (!broadcast) {
+            return false;
+        }
+
+        auto input = broadcast->input_value(0);
+        auto target_shape = broadcast->input_value(1);
+
+        auto last_node = input.get_node_shared_ptr();
+        auto broadcast_type = broadcast->get_broadcast_spec();
+
+        if (broadcast_type == op::BroadcastType::NUMPY) {
+            last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, op::AutoBroadcastType::NUMPY);
+            ngraph::copy_runtime_info(broadcast, last_node);
+        } else if (broadcast_type == op::BroadcastType::PDPD) {
+            last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, op::AutoBroadcastType::PDPD);
+            ngraph::copy_runtime_info(broadcast, last_node);
+        } else if (broadcast_type == op::BroadcastType::NONE) {
+            last_node = std::make_shared<ngraph::opset1::Broadcast>(input, target_shape, broadcast->input_value(2), op::AutoBroadcastType::NONE);
+            ngraph::copy_runtime_info(broadcast, last_node);
+        } else if (broadcast_type == op::BroadcastType::BIDIRECTIONAL) {
+            auto constant_one = std::make_shared<ngraph::opset1::Constant>(input.get_element_type(), Shape({1}), std::vector<int>{1});
+            auto broadcast_ones = std::make_shared<ngraph::opset1::Broadcast>(constant_one, target_shape, op::AutoBroadcastType::NUMPY);
+            last_node = std::make_shared<ngraph::opset1::Multiply>(input, broadcast_ones);
+            ngraph::copy_runtime_info(broadcast, {last_node, broadcast_ones, constant_one});
+        }
+
+        last_node->set_friendly_name(broadcast->get_friendly_name());
+
+        ngraph::replace_node(m.get_match_root(), last_node);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(broadcast, "ConvertBroadcast3");
+    auto m_no_axes = std::make_shared<ngraph::pattern::Matcher>(broadcast_no_axes, "ConvertBroadcast3NoAxes");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+    this->add_matcher(m_no_axes, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_nms3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_nms3.cpp

new file mode 100644 (file)

index 0000000..eebef00
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_nms3.cpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertNMS3::convert_nms3() {
+    auto boxes = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1000, 4});
+    auto scores = std::make_shared<pattern::op::Label>(element::f32, Shape{1, 1, 1000});
+    auto max_output_boxes_per_class = ngraph::opset3::Constant::create(element::i64, Shape{}, {10});
+    auto iou_threshold = ngraph::opset3::Constant::create(element::f32, Shape{}, {0.75});
+    auto score_threshold = ngraph::opset3::Constant::create(element::f32, Shape{}, {0.7});
+    auto nms = std::make_shared<ngraph::opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                                                                   iou_threshold, score_threshold);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher &m) {
+        auto nms = std::dynamic_pointer_cast<ngraph::opset3::NonMaxSuppression>(m.get_match_root());
+        if (!nms) {
+            return false;
+        }
+
+        Output<Node> last;
+        ngraph::NodeVector new_ops;
+
+        auto new_nms = std::make_shared<ngraph::opset2::NonMaxSuppression>(nms->input_value(0), nms->input_value(1),
+                nms->input_value(2), nms->input_value(3), nms->input_value(4),
+                static_cast<const op::v1::NonMaxSuppression::BoxEncodingType>(nms->get_box_encoding()),
+                nms->get_sort_result_descending());
+
+        new_ops.push_back(new_nms);
+        // if the output is the i32 then it matches behavior of the v1::NonMaxSuppression otherwise need to insert Convert
+        if (nms->get_output_type() == element::i32) {
+            last = new_nms;
+        } else {
+            last = std::make_shared<ngraph::opset2::Convert>(new_nms, nms->get_output_type());
+            new_ops.push_back(last.get_node_shared_ptr());
+        }
+
+        last.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name());
+        ngraph::copy_runtime_info(nms, new_ops);
+        ngraph::replace_node(nms, last.get_node_shared_ptr());
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(nms, "ConvertNMS3");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp

new file mode 100644 (file)

index 0000000..0e6af43
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp"
+
+#include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_nms3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+#include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/pass/manager.hpp>
+
+bool ngraph::pass::ConvertOpSet3ToOpSet2::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    ngraph::pass::Manager OpSet3ToOpSet2;
+    std::vector<std::shared_ptr<ngraph::pass::PassBase> > transforms;
+
+#define NGRAPH_PASS(NAME, NAMESPACE) transforms.push_back(OpSet3ToOpSet2.register_pass<NAMESPACE::NAME>());
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp>
+#undef NGRAPH_PASS
+
+    for (auto & t : transforms) {
+        if (auto t_param = std::dynamic_pointer_cast<PassParam>(t)) {
+            t_param->setCallback(transformation_callback);
+        }
+    }
+    OpSet3ToOpSet2.run_passes(f);
+    return true;
+}
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shapeof3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shapeof3.cpp

new file mode 100644 (file)

index 0000000..609ac20
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shapeof3.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertShapeOf3::convert_shapeof3() {
+    auto input = std::make_shared<pattern::op::Label>(element::i64, Shape{1, 1, 1, 1});
+    auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+        auto shapeof = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf> (m.get_match_root());
+        if (!shapeof) {
+            return false;
+        }
+
+        Output<Node> last;
+        ngraph::NodeVector new_ops;
+
+        auto new_shapeof = std::make_shared<ngraph::opset1::ShapeOf>(shapeof->input_value(0));
+        new_ops.push_back(new_shapeof);
+        // if the output is the i64 then it matches behavior of the v1::ShapeOf otherwise need to insert Convert
+        if (shapeof->get_output_type() == element::i64) {
+            last = new_shapeof;
+        } else {
+            last = std::make_shared<ngraph::opset1::Convert>(new_shapeof, shapeof->get_output_type());
+            new_ops.push_back(last.get_node_shared_ptr());
+        }
+
+        last.get_node_shared_ptr()->set_friendly_name(shapeof->get_friendly_name());
+        ngraph::copy_runtime_info(shapeof, new_ops);
+        ngraph::replace_node(shapeof, last.get_node_shared_ptr());
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(shapeof, "ConvertShapeOf3");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
+\ No newline at end of file
diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_topk3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_topk3.cpp

new file mode 100644 (file)

index 0000000..08197aa
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_topk3.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_opset3_to_opset2/convert_topk3.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+void ngraph::pass::ConvertTopK3::convert_topk3() {
+    auto input = std::make_shared<pattern::op::Label>(element::i64, Shape{1, 1, 1, 1});
+    auto k = ngraph::opset3::Constant::create(element::i64, Shape{}, {10});
+    auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 0, "min", "value", element::i64);
+    // this is a temporary workaround to avoid bug that TopK-3 does not have clone_with_new_inputs so the TopK-3 clone
+    // generates TopK-1 operation
+    auto topk_v1 = std::make_shared<ngraph::opset1::TopK>(input, k, 0, "min", "value", element::i64);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+        std::shared_ptr<ngraph::op::v1::TopK> topk = std::dynamic_pointer_cast<ngraph::opset3::TopK> (m.get_match_root());
+        if (!topk) {
+            topk = std::dynamic_pointer_cast<ngraph::opset1::TopK> (m.get_match_root());
+        }
+        if (!topk) {
+            return false;
+        }
+        Output<Node> last;
+        ngraph::NodeVector new_ops;
+
+        auto new_topk = std::make_shared<ngraph::opset2::TopK>(topk->input_value(0), topk->input_value(1),
+                topk->get_axis(), topk->get_mode(), topk->get_sort_type(), element::i32);
+        new_ops.push_back(new_topk);
+        // if the output is the i32 then it matches behavior of the v1::TopK otherwise need to insert Convert
+        if (topk->get_index_element_type() == element::i32) {
+            last = new_topk->output(1);
+        } else {
+            last = std::make_shared<ngraph::opset2::Convert>(new_topk->output(1), topk->get_index_element_type());
+            new_ops.push_back(last.get_node_shared_ptr());
+        }
+
+        new_topk->set_friendly_name(topk->get_friendly_name());
+        ngraph::copy_runtime_info(topk, new_ops);
+        topk->output(0).replace(new_topk->output(0));
+        topk->output(1).replace(last);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(topk, "ConvertTopK3");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+    auto m2 = std::make_shared<ngraph::pattern::Matcher>(topk_v1, "ConvertTopK3");
+    this->add_matcher(m2, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
diff --git a/inference-engine/src/transformations/src/transformations/convert_scatter_elements_to_scatter.cpp b/inference-engine/src/transformations/src/transformations/convert_scatter_elements_to_scatter.cpp

new file mode 100644 (file)

index 0000000..42154e4
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/convert_scatter_elements_to_scatter.cpp
@@ -0,0 +1,211 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/convert_scatter_elements_to_scatter.hpp"
+
+#include <memory>
+#include <vector>
+#include <numeric>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/validation_util.hpp>
+
+void ngraph::pass::ConvertScatterElementsToScatter::convert_scatter_elements_to_scatter() {
+    auto data = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+    auto indices = std::make_shared<pattern::op::Label>(element::i64, Shape{1});
+    auto updates = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+    auto axis = ngraph::opset3::Constant::create(element::i64, {1}, {0});
+
+    auto broadcast_shape = std::make_shared<pattern::op::Label>(element::i64, Shape{1});
+    auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(indices, broadcast_shape);
+
+    auto scatter = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(data, broadcast, updates, axis);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& m) {
+        auto scatter = m.get_match_root();
+        auto broadcast = scatter->input_value(1).get_node_shared_ptr();
+        auto axis_const = std::dynamic_pointer_cast<ngraph::opset3::Constant>(scatter->input_value(3).get_node_shared_ptr());
+
+        if (!axis_const) {
+            return false;
+        }
+
+        auto indices_input = broadcast->input_value(0);
+
+        const auto data_pshape = scatter->input(0).get_partial_shape();
+        const auto indices_pshape = indices_input.get_partial_shape();
+        const auto updates_pshape = scatter->input(2).get_partial_shape();
+
+        // Check that ScatterElementsUpdate and Broadcast inputs has static shapes
+        if (data_pshape.rank().is_dynamic() || indices_pshape.rank().is_dynamic() || updates_pshape.rank().is_dynamic()) {
+            return false;
+        }
+
+        const uint64_t data_rank = data_pshape.rank().get_length();
+        const uint64_t updates_rank = updates_pshape.rank().get_length();
+        const uint64_t indices_rank = indices_pshape.rank().get_length();
+
+
+        // Check that axis Constant has {} or {1} shape
+        if (shape_size(axis_const->get_shape()) > 1) {
+            return false;
+        }
+
+        const size_t axis =  ngraph::normalize_axes(scatter->get_friendly_name(),
+                                                  axis_const->cast_vector<int64_t>(),
+                                                  data_pshape.rank())[0];
+
+        struct Range {
+            uint64_t l, r;
+            Range(const uint64_t & l, const uint64_t & r) : l(l), r(r) {
+                if (l > r) throw ngraph_error("Range values are inconsistent");
+            }
+
+            uint64_t size() const {
+                return r - l;
+            }
+
+            bool operator!= (const Range & rhs) const {
+                return (r - l != rhs.r - rhs.l);
+            }
+
+            static
+            bool is_valid(const int64_t & l, const int64_t & r) {
+                return (l >= 0 && l <= r);
+            }
+
+            static
+            bool is_empty(const uint64_t & l, const uint64_t & r) {
+                return l == r;
+            }
+        };
+
+        auto compare_shapes_ranges = [](const PartialShape & lhsShape, const PartialShape & rhsShape, const Range & lhsRange, const Range & rhsRange) -> bool {
+            // Check that ranges are equal and suits to Shapes sizes
+            if (lhsRange != rhsRange ||
+                lhsRange.r > lhsShape.rank().get_length() ||
+                rhsRange.r > rhsShape.rank().get_length()) {
+                return false;
+            }
+
+            // Check that Shape values in ranges are equal
+            for (size_t lhsIndex = lhsRange.l, rhsIndex = rhsRange.l; lhsIndex < lhsRange.r; ++lhsIndex, ++rhsIndex) {
+                if (lhsShape[lhsIndex].is_dynamic() || rhsShape[rhsIndex].is_dynamic() ||
+                    lhsShape[lhsIndex] != rhsShape[rhsIndex]) {
+                    return false;
+                }
+            }
+
+            return true;
+        };
+
+        auto product = [](const Shape & shape, const Range & range) -> uint64_t {
+            uint64_t prod(1);
+            for (size_t dim = range.l; dim < range.r; ++dim) {
+                prod *= shape[dim];
+            }
+            return prod;
+        };
+
+        /* To transform ScatterElementsUpdate to ScatterUpdate input shapes must match this rules:
+         *
+         *     data_shape[d_0, d_1, ... , d_n]
+         *
+         *     indices_shape[i_0, i_1, ... , i_n]
+         *
+         *     updates_shape[d_0, d_1, i_0(axis), i_1, ... , i_n, d_axis + 1, ... , d_n]
+         *
+         * EXAMPLE:
+         *     In this example the input shapes are suits the rules above and ScatterElementsUpdate can be replaced with ScatterUpdate
+         *
+         *     axis = 1               | (axis)
+         *                           \/
+         *
+         *     data_shape    [1000, 256,    10, 15]
+         *
+         *     index_shape   [      125, 2        ]
+         *
+         *     updates_shape [1000, 125, 2, 10, 15]
+         *
+         */
+
+        // data_shape and updates_shape dims must be equal up to axis dimension
+        if (!compare_shapes_ranges(data_pshape, updates_pshape, {0, axis}, {0, axis})) {
+            return false;
+        }
+
+        // data_shape dims starting right after axis dim must match last updates_shape dimensions
+        if (!Range::is_valid(updates_rank - (data_rank - (axis + 1)), updates_rank)) {
+            return false;
+        }
+
+        const Range updates_last{updates_rank - (data_rank - (axis + 1)), updates_rank};
+        if (!compare_shapes_ranges(data_pshape, updates_pshape, {axis + 1, data_rank}, updates_last)) {
+            return false;
+        }
+
+        // indices_shape dims product must match updates_shape dims starting from axis dimension
+        if (!Range::is_valid(axis, updates_last.l) && !Range::is_empty(axis, updates_last.l)) {
+            return false;
+        }
+
+        NodeVector new_ops;
+
+        // In case of static shapes we check that indices dims product match with updates dims
+        if (updates_pshape.is_static() && indices_pshape.is_static()) {
+            const auto updated_range_prod = product(updates_pshape.get_shape(), {axis, updates_last.l});
+            const auto indices_range_prod = product(indices_pshape.get_shape(), {0, indices_rank});
+
+            if (updated_range_prod != indices_range_prod) {
+                return false;
+            }
+
+            // if indices_shape do not match updates_shape dims{axis, updates_last.l}
+            // we reshape indices to updates_shape
+            const auto updates_shape = updates_pshape.get_shape();
+            const auto indices_shape = indices_pshape.get_shape();
+            Shape indices_new_shape(updates_shape.begin() + axis, updates_shape.begin() + updates_last.l);
+            if (indices_shape != indices_new_shape) {
+                indices_input = std::make_shared<ngraph::opset3::Reshape>(indices_input,
+                        opset3::Constant::create(element::i64, Shape{indices_new_shape.size()}, indices_new_shape), false);
+                new_ops.push_back(indices_input.get_node_shared_ptr());
+            }
+        } else {
+            // Tight constrain for dynamic case:
+            // 1. indices_pshape 1...N dimensions must be equal to 1
+            // 2. updates_pshape axis interval size = 1
+
+            for (size_t dim = 1; dim < indices_rank; ++dim) {
+                if (indices_pshape[dim] != 1) return false;
+            }
+
+            if (Range(axis, updates_last.l).size() != 1) {
+                return false;
+            }
+
+            // Squeeze 1 dims for indices input
+            if (indices_rank > 1) {
+                std::vector<int64_t> squeeze_axes(indices_rank - 1ul);
+                std::iota(squeeze_axes.begin(), squeeze_axes.end(), 1);
+                indices_input = std::make_shared<ngraph::opset3::Squeeze>(indices_input,
+                        opset3::Constant::create(element::i64, Shape{squeeze_axes.size()}, squeeze_axes));
+                new_ops.push_back(indices_input.get_node_shared_ptr());
+            }
+        }
+
+        auto scatter_update = std::make_shared<ngraph::opset3::ScatterUpdate>(scatter->input_value(0),
+                                                                              indices_input,
+                                                                              scatter->input_value(2),
+                                                                              scatter->input_value(3));
+        new_ops.push_back(scatter_update);
+        scatter_update->set_friendly_name(scatter->get_friendly_name());
+        ngraph::copy_runtime_info({scatter, broadcast}, {new_ops});
+        ngraph::replace_node(scatter, scatter_update);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(scatter, "ConvertScatterElementsToScatter");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
+\ No newline at end of file
diff --git a/inference-engine/src/transformations/src/transformations/optimize_strided_slice.cpp b/inference-engine/src/transformations/src/transformations/optimize_strided_slice.cpp

new file mode 100644 (file)

index 0000000..c03d5f4
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/optimize_strided_slice.cpp
@@ -0,0 +1,203 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include <transformations/optimize_strided_slice.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+bool ngraph::pass::UselessStridedSliceEraser::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    bool rewritten = false;
+    for (auto & node : f->get_ordered_ops()) {
+        auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node);
+        if (!ss || ss->get_output_partial_shape(0).is_dynamic() || ss->get_input_partial_shape(0).is_dynamic())
+            continue;
+        if (ss->input(0).get_shape() != ss->output(0).get_shape())
+            continue;
+        rewritten |= replace_output_update_name(ss->output(0), ss->input_value(0));
+    }
+    return rewritten;
+}
+
+ngraph::SlicePlan get_slice_plan(std::shared_ptr<ngraph::opset1::StridedSlice> slice) {
+    auto convert_mask_to_axis_set = [](const std::vector<int64_t>& mask) {
+        ngraph::AxisSet axis_set{};
+        for (size_t i = 0; i < static_cast<size_t>(mask.size()); ++i) {
+            if (mask[i] == 1)
+                axis_set.emplace(i);
+        }
+        return axis_set;
+    };
+
+    auto data = slice->input_value(0).get_node_shared_ptr();
+    auto begin = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(1).get_node_shared_ptr());
+    auto end = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(2).get_node_shared_ptr());
+    auto strides = std::dynamic_pointer_cast<ngraph::opset1::Constant>(slice->input_value(3).get_node_shared_ptr());
+    if (!begin || !end || !strides || slice->input(0).get_partial_shape().is_dynamic())
+        return ngraph::SlicePlan();
+
+    auto begin_vec = begin->cast_vector<int64_t>();
+    auto end_vec = end->cast_vector<int64_t>();
+    auto strides_vec = strides->cast_vector<int64_t>();
+    const auto begin_mask = convert_mask_to_axis_set(slice->get_begin_mask());
+    const auto end_mask = convert_mask_to_axis_set(slice->get_end_mask());
+
+    ngraph::SlicePlan plan = ngraph::make_slice_plan(slice->input(0).get_shape(),
+                                                     begin_vec,
+                                                     end_vec,
+                                                     strides_vec,
+                                                     begin_mask,
+                                                     end_mask,
+                                                     convert_mask_to_axis_set(slice->get_new_axis_mask()),
+                                                     convert_mask_to_axis_set(slice->get_shrink_axis_mask()),
+                                                     convert_mask_to_axis_set(slice->get_ellipsis_mask()));
+    return plan;
+}
+
+
+bool strided_slices_perform_the_same(std::shared_ptr<ngraph::opset1::StridedSlice> lhs,
+                                     std::shared_ptr<ngraph::opset1::StridedSlice> rhs) {
+    auto lhs_plan = get_slice_plan(lhs);
+    auto rhs_plan = get_slice_plan(rhs);
+
+    auto empty_plan = ngraph::SlicePlan();
+    if (lhs_plan == empty_plan || rhs_plan == empty_plan)
+        return false;
+    return lhs_plan == rhs_plan;
+}
+
+bool ngraph::pass::SharedStridedSliceEraser::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    bool graph_rewritten = false;
+
+    std::map<ngraph::Output<Node>, std::vector<std::shared_ptr<ngraph::opset1::StridedSlice>>> source_to_ss;
+    for (const auto & node : f->get_ordered_ops()) {
+        if (auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node)) {
+            source_to_ss[ss->input_value(0)].push_back(ss);
+        }
+    }
+
+    for (auto& pair : source_to_ss) {
+        if (pair.second.size() < 2)
+            continue;
+        auto root_ss = pair.second[0];
+        for (auto& child_ss : pair.second) {
+            if (root_ss->get_instance_id() != child_ss->get_instance_id() && strided_slices_perform_the_same(root_ss, child_ss)) {
+                graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0));
+            }
+        }
+    }
+    return graph_rewritten;
+}
+
+bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_function(std::shared_ptr<ngraph::Function> f) {
+    bool graph_rewritten = false;
+    using planned_slice = std::pair<std::shared_ptr<ngraph::opset1::StridedSlice>, ngraph::SlicePlan>;
+
+    std::map<ngraph::Output<Node>, std::vector<planned_slice>> source_to_ss_with_plan;
+    for (const auto & node : f->get_ordered_ops()) {
+        if (auto ss = std::dynamic_pointer_cast<ngraph::opset1::StridedSlice>(node)) {
+            auto slice_plan = get_slice_plan(ss);
+            if (slice_plan == ngraph::SlicePlan())
+                continue;
+            source_to_ss_with_plan[ss->input_value(0)].push_back({ss, slice_plan});
+        }
+    }
+
+    for (auto& pair : source_to_ss_with_plan) {
+        if (pair.second.size() < 2)
+            continue;
+
+        bool valid_for_replacement = true;
+
+        auto root_plan = pair.second[0].second;
+        for (const auto & ss_plan : pair.second) {
+            valid_for_replacement &= (ss_plan.second.begins.size() == root_plan.begins.size());
+            valid_for_replacement &= (ss_plan.first->get_ellipsis_mask().empty() &&
+                                      ss_plan.first->get_new_axis_mask().empty() &&
+                                      ss_plan.first->get_shrink_axis_mask().empty());
+        }
+
+        if (!valid_for_replacement) continue;
+
+        auto input_shape = pair.first.get_shape();
+        auto axis = -1;
+
+        using OutputToPatrition = struct {
+            Output<Node> output;
+            int64_t begin;
+            int64_t end;
+        };
+
+        std::vector<OutputToPatrition> output_to_partition;
+        for (size_t i = 0; i < input_shape.size(); ++i) {
+            for (const auto & ss_plan : pair.second) {
+                if (ss_plan.second.begins[i] != 0 || ss_plan.second.ends[i] != input_shape[i]) {
+                    if (axis == -1 || axis == i)
+                        axis = i;
+                    else
+                        valid_for_replacement = false;
+                    if (ss_plan.second.strides[i] != 1)
+                        valid_for_replacement = false;
+                    output_to_partition.push_back({ss_plan.first->output(0), ss_plan.second.begins[i], ss_plan.second.ends[i]});
+                }
+                if (!valid_for_replacement) break;
+            }
+            if (!valid_for_replacement) break;
+        }
+
+        if (!valid_for_replacement) continue;
+        if (output_to_partition.size() < 2) continue;
+
+        std::sort(output_to_partition.begin(), output_to_partition.end(),
+                [](OutputToPatrition lhs, OutputToPatrition rhs)
+            {return lhs.begin < rhs.begin;});
+
+        std::vector<std::pair<Output<Node>, uint64_t>> output_to_size;
+        uint64_t prev_r = 0;
+        for (auto & record : output_to_partition) {
+            valid_for_replacement &= (record.begin >= prev_r);
+            prev_r = record.end;
+        }
+        valid_for_replacement &= (prev_r <= input_shape[axis]);
+        if (!valid_for_replacement) continue;
+
+        prev_r = 0;
+        Output<Node> fake_output;
+        for (auto & record : output_to_partition) {
+            if (record.begin > prev_r)
+                output_to_size.emplace_back(fake_output, record.begin - prev_r);
+            prev_r = record.end;
+            output_to_size.emplace_back(record.output, record.end - record.begin);
+        }
+        if (prev_r < input_shape[axis]) {
+            output_to_size.emplace_back(fake_output, input_shape[axis] - prev_r);
+        }
+
+        auto axis_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {axis});
+
+        std::vector<int64_t> size_splits;
+        for (const auto & item : output_to_size)
+            size_splits.push_back(item.second);
+        auto size_splits_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{size_splits.size()}, size_splits);
+        auto variadic_split = std::make_shared<ngraph::opset1::VariadicSplit>(pair.first, axis_const, size_splits_const);
+
+        auto i = 0;
+        NodeVector ops_to_replace;
+        for (auto & record : output_to_size) {
+            if (record.first == fake_output) {
+                std::make_shared<ngraph::opset1::Result>(variadic_split->output(i));
+            } else {
+                record.first.replace(variadic_split->output(i));
+                ops_to_replace.push_back(record.first.get_node_shared_ptr());
+            }
+            ++i;
+        }
+        copy_runtime_info(ops_to_replace, variadic_split);
+    }
+    return graph_rewritten;
+}
+
diff --git a/inference-engine/src/transformations/src/transformations/remove_filtering_boxes_by_size.cpp b/inference-engine/src/transformations/src/transformations/remove_filtering_boxes_by_size.cpp

new file mode 100644 (file)

index 0000000..58e2d61
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/remove_filtering_boxes_by_size.cpp
@@ -0,0 +1,106 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/rt_info.hpp>
+
+#include "transformations/remove_filtering_boxes_by_size.hpp"
+
+void ngraph::pass::RemoveFilteringBoxesBySize::remove_filtering_boxes_by_size() {
+    // variadic split
+    auto data = std::make_shared<pattern::op::Label>(element::f32, Shape{1000, 4});
+    auto sizes = opset3::Constant::create(element::i64, Shape{4}, std::vector<int64_t >({1, 1, 1, 1}));
+    auto axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+    auto split = std::make_shared<ngraph::opset3::VariadicSplit>(data, axis, sizes);
+
+    // sub -> add
+    auto sub_2_0 =  std::make_shared<ngraph::opset3::Subtract>(split->output(2), split->output(0));
+    auto term_1 = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+    auto add_1 = std::make_shared<ngraph::opset3::Add>(sub_2_0, term_1);
+
+    auto sub_3_1 =  std::make_shared<ngraph::opset3::Subtract>(split->output(3), split->output(1));
+    auto term_2 = std::make_shared<pattern::op::Label>(element::f32, Shape{1});
+    auto add_2 = std::make_shared<ngraph::opset3::Add>(sub_3_1, term_2);
+
+    // concat
+    auto concat = std::make_shared<ngraph::opset3::Concat>(ngraph::OutputVector({split->output(0), split->output(1), add_1->output(0), add_2->output(0)}), 1);
+
+    // second variadic split
+    auto sizes_1 = opset3::Constant::create(element::i64, Shape{4}, std::vector<int64_t >({1, 1, 1, 1}));
+    auto axis_1 = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+    auto split_1 = std::make_shared<ngraph::opset3::VariadicSplit>(concat, axis_1, sizes_1);
+
+    // squeeze
+    auto squeeze_1_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+    auto squeeze_1 = std::make_shared<ngraph::opset3::Squeeze>(split_1->output(2), squeeze_1_axis);
+
+    auto squeeze_2_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+    auto squeeze_2 = std::make_shared<ngraph::opset3::Squeeze>(split_1->output(3), squeeze_2_axis);
+
+    // less
+    auto less_1_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector<float >({0}));
+    auto less_1 = std::make_shared<ngraph::opset3::Less>(squeeze_1, less_1_constant);
+
+    auto less_2_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector<float >({0}));
+    auto less_2 = std::make_shared<ngraph::opset3::Less>(squeeze_2, less_2_constant);
+
+    // Logical Not
+    auto not_1 = std::make_shared<ngraph::opset3::LogicalNot>(less_1);
+    auto not_2 = std::make_shared<ngraph::opset3::LogicalNot>(less_2);
+
+    // cast
+    auto cast_11 = std::make_shared<ngraph::opset3::Convert>(not_1, ngraph::element::u8);
+    auto cast_12 = std::make_shared<ngraph::opset3::Convert>(cast_11, ngraph::element::boolean);
+
+    auto cast_21 = std::make_shared<ngraph::opset3::Convert>(not_2, ngraph::element::u8);
+    auto cast_22 = std::make_shared<ngraph::opset3::Convert>(cast_21, ngraph::element::boolean);
+
+    // logical and
+    auto and_1 = std::make_shared<ngraph::opset3::LogicalAnd>(cast_12, cast_22);
+
+    // cast
+    auto cast_31 = std::make_shared<ngraph::opset3::Convert>(and_1, ngraph::element::u8);
+    auto cast_32 = std::make_shared<ngraph::opset3::Convert>(cast_31, ngraph::element::f32);
+
+    // nonzero
+    auto non_zero = std::make_shared<ngraph::opset3::NonZero>(cast_32);
+
+    auto order = opset3::Constant::create(element::i64, Shape{2}, std::vector<int64_t >({1, 0}));
+    auto transpose = std::make_shared<ngraph::opset3::Transpose>(non_zero, order);
+
+    auto squeeze_3_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector<int64_t >({1}));
+    auto squeeze_3 = std::make_shared<ngraph::opset3::Squeeze>(transpose, squeeze_3_axis);
+
+    auto cast = std::make_shared<ngraph::opset3::Convert>(squeeze_3, ngraph::element::i64);
+
+    ngraph::graph_rewrite_callback callback = [data](pattern::Matcher& m) {
+        auto start = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+        auto step = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({1}));
+
+        auto pattern_map = m.get_pattern_map();
+
+        auto input = pattern_map[data];
+        auto output = m.get_match_root();
+
+        auto input_shape = std::make_shared<ngraph::opset3::ShapeOf>(input);
+
+        auto axis = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+        auto index = opset3::Constant::create(element::i64, Shape{}, std::vector<int64_t >({0}));
+        auto stop = std::make_shared<ngraph::opset3::Gather>(input_shape, index, axis);
+
+        auto range = std::make_shared<ngraph::opset3::Range>(start, stop, step);
+
+        range->set_friendly_name(output->get_friendly_name());
+        // TODO: add copy_runtime_info
+        ngraph::replace_node(output, range);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(cast, "RemoveFilteringBoxesBySize");
+    this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
+\ No newline at end of file
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/out_shape_of_reshape.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/out_shape_of_reshape.hpp

new file mode 100644 (file)

index 0000000..6bb56c1
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/out_shape_of_reshape.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/op/op.hpp>
+
+namespace ngraph { namespace vpu { namespace op {
+
+class OutShapeOfReshape : public ngraph::op::Op {
+public:
+    static constexpr NodeTypeInfo type_info{"OutShapeOfReshape", 1};
+    const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    OutShapeOfReshape(
+            const Output<Node>& inDataShape,
+            const Output<Node>& outShapeDescriptor,
+            bool specialZero);
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+    bool getSpecialZero() const { return m_specialZero; }
+    void setSpecialZero(bool special_zero) { m_specialZero = special_zero; }
+
+private:
+    bool m_specialZero;
+};
+
+}  // namespace op
+}  // namespace vpu
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_broadcast.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_broadcast.hpp

new file mode 100644 (file)

index 0000000..9fc45c7
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_broadcast.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/broadcast_base.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+
+#include <memory>
+#include <vector>
+
+namespace ngraph { namespace vpu { namespace op {
+
+class StaticShapeBroadcast : public ::ngraph::op::util::BroadcastBase {
+public:
+    static constexpr NodeTypeInfo type_info{"StaticShapeBroadcast", 0};
+
+    const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    StaticShapeBroadcast(const Output<Node>& arg,
+                         const Output<Node>& targetShape,
+                         const Output<Node>& axesMapping,
+                         const ngraph::op::BroadcastModeSpec& broadcastSpec = ngraph::op::BroadcastType::EXPLICIT);
+
+    StaticShapeBroadcast(const Output<Node>& arg,
+                         const Output<Node>& targetShape,
+                         const ngraph::op::BroadcastModeSpec& broadcastSpec = ngraph::op::BroadcastType::NUMPY);
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& newInputs) const override;
+
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+    PartialShape getEvaluatedShape() const { return m_evaluatedOutputShape; }
+    void setEvaluatedShape(const PartialShape& shape) { m_evaluatedOutputShape = shape; }
+
+private:
+    PartialShape m_evaluatedOutputShape;
+};
+
+}  // namespace op
+}  // namespace vpu
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp

index eb28ba4..5a9c752 100644 (file)
--- a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp
@@ -18,13 +18,24 @@ public:
  
      const NodeTypeInfo& get_type_info() const override { return type_info; }
  
-    explicit StaticShapeNonZero(const Output<ngraph::Node>& input);
+    explicit StaticShapeNonZero(const Output<ngraph::Node>& input, const element::Type& output_type = element::i64);
  
      void validate_and_infer_types() override;
  
      std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
  
      bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+
+    bool evaluate(const HostTensorVector& output_values,
+                  const HostTensorVector& input_values) override;
+
+    element::Type get_output_type() const { return m_output_type; }
+    void set_output_type(element::Type output_type) { m_output_type = output_type; }
+    // Overload collision with method on Node
+    using Node::set_output_type;
+
+protected:
+    element::Type m_output_type;
  };
  
  }  // namespace op
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp

index 0ab93a4..f61ad57 100644 (file)
--- a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp
@@ -16,7 +16,7 @@ using Transformations = std::unordered_map<ngraph::NodeTypeInfo, std::function<v
  class DynamicToStaticShape {
  public:
      explicit DynamicToStaticShape(const Transformations& specificTransformations = {});
-    void transform(ngraph::Function& function) const;
+    void transform(std::shared_ptr<ngraph::Function> function) const;
  
  private:
      Transformations transformations;
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp

new file mode 100644 (file)

index 0000000..2ef7bbb
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeBroadcast(std::shared_ptr<ngraph::Node> target);
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp

new file mode 100644 (file)

index 0000000..43dd69e
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeConcat(std::shared_ptr<ngraph::Node> target);
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp

new file mode 100644 (file)

index 0000000..467f3ff
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeGather(std::shared_ptr<ngraph::Node> node);
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp

new file mode 100644 (file)

index 0000000..6771ade
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeReshape(std::shared_ptr<ngraph::Node> transpose);
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp

new file mode 100644 (file)

index 0000000..a8be57a
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp
@@ -0,0 +1,16 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace vpu {
+
+class DynamicToStaticShapeShapeOf : public ngraph::pass::GraphRewrite {
+public:
+    DynamicToStaticShapeShapeOf();
+};
+
+} //namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp

new file mode 100644 (file)

index 0000000..6c89afa
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeVariadicSplit(std::shared_ptr<ngraph::Node> node);
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/simple_math.hpp b/inference-engine/src/vpu/common/include/vpu/utils/simple_math.hpp

index e2df86e..aafe688 100644 (file)
--- a/inference-engine/src/vpu/common/include/vpu/utils/simple_math.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/simple_math.hpp
@@ -11,6 +11,8 @@
  #include <string>
  #include <utility>
  
+#include "error.hpp"
+#include <vpu/utils/optional.hpp>
  #include <vpu/utils/small_vector.hpp>
  
  //
@@ -22,30 +24,107 @@
  
  namespace vpu {
  
-class SimpleMathExpression final {
+template <typename T>
+Optional<int> parseNumber(const std::string& s) {
+    T value;
+    if ((std::istringstream(s) >> value >> std::ws).eof()) {
+        return {value};
+    }
+    return {};
+}
+
+namespace details {
+
+#define OPERATOR(OP)                                                           \
+  IntOrFloat operator OP(const IntOrFloat &other) const {                      \
+    if (isInt && other.isInt) {                                                \
+      return IntOrFloat{value.i OP other.value.i};                             \
+    }                                                                          \
+    const float lhs = isInt ? value.i : value.f;                               \
+    const float rhs = other.isInt ? other.value.i : other.value.f;             \
+    return IntOrFloat{lhs OP rhs};                                             \
+  }
+
+class IntOrFloat final {
+    union {
+        int i;
+        float f;
+    } value{};
+    bool isInt = true;
+
  public:
-    void setVariables(const std::map<char, int>& vars) { _vars = vars; }
+    explicit IntOrFloat(int x) : isInt{true} {
+        value.i = x;
+    }
+    explicit IntOrFloat(float x) : isInt{false} {
+        value.f = x;
+    }
+    explicit IntOrFloat(const std::string& x) {
+        const auto integer = parseNumber<int>(x);
+        if (integer.hasValue()) {
+            *this = IntOrFloat(integer.get());
+            return;
+        }
+        const auto fp = parseNumber<float>(x);
+        if (fp.hasValue()) {
+            *this = IntOrFloat(fp.get());
+            return;
+        }
+        VPU_THROW_FORMAT("Failed to convert string to number: '%s'", x);
+    }
  
-    void parse(const std::string& expression);
+    explicit operator std::string() const {
+        return isInt ? std::to_string(value.i) : std::to_string(value.f);
+    }
+
+    float toFloat() const { return isInt ? static_cast<float>(value.i) : value.f; }
  
+    OPERATOR(+)
+    OPERATOR(-)
+    OPERATOR(*)
+    OPERATOR(/)
+
+    IntOrFloat operator %(const IntOrFloat & other) const {
+        if (isInt && other.isInt) {
+            return IntOrFloat{value.i % other.value.i};
+        }
+        THROW_IE_EXCEPTION << "Can't apply modulus operation to floating point value";
+    }
+};
+
+} // namespace details
+
+class MathExpression final {
+public:
+    void setVariables(const std::map<std::string, std::string>& variables) {
+        for (const auto& var : variables) {
+            // if string converts to float, it also will be able to convert to int
+            if (parseNumber<float>(var.second).hasValue()) {
+                _vars.emplace(var.first, details::IntOrFloat{var.second});
+            }
+        }
+    }
+
+    void parse(const std::string& expression);
      int evaluate() const;
  
  private:
-    struct Token final {
-        enum TokenType {
-            Value,
-            Operator,
-        };
+    enum class TokenType {
+        Value,
+        Operator,
+        Function
+    };
  
+    struct Token {
          TokenType type;
-        int value;
-        char op;
+        details::IntOrFloat value;
+        std::string opName;
  
-        explicit Token(TokenType t = Value, int v = 0, char o = 0) : type(t), value(v), op(o) {}
+        explicit Token(TokenType type, details::IntOrFloat value, std::string name)
+            : type(type), value(value), opName(std::move(name)) {}
      };
  
-private:
-    std::map<char, int> _vars;
+    std::map<std::string, details::IntOrFloat> _vars;
      SmallVector<Token> _parsedTokens;
  };
  
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp

index a33c37e..1a29f8e 100644 (file)
--- a/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp
@@ -26,8 +26,9 @@ void DynamicShapeResolver::validate_and_infer_types() {
      const auto& dataElementType = get_input_element_type(0);
      NODE_VALIDATION_CHECK(this, dataElementType.is_static(), "(", get_friendly_name(), ") does not support dynamic element type for data tensor");
      const auto& dimsElementType = get_input_element_type(1);
-    NODE_VALIDATION_CHECK(this, dimsElementType.is_static() && dimsElementType.compatible(ngraph::element::i64), "(", get_friendly_name(),
-        ") supports only i64 number type for dims tensor, but ", dimsElementType, " provided");
+    NODE_VALIDATION_CHECK(this, dimsElementType.is_static() && (dimsElementType.compatible(ngraph::element::i64) ||
+                                                                dimsElementType.compatible(ngraph::element::i32)),
+        "(", get_friendly_name(), ") supports only i64 and i32 number type for dims tensor, but ", dimsElementType, " provided");
  
      const auto& dataShape = get_input_shape(0);
      const auto& dimsShape = get_input_shape(1);
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/out_shape_of_reshape.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/out_shape_of_reshape.cpp

new file mode 100644 (file)

index 0000000..4688e7c
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/out_shape_of_reshape.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+
+namespace ngraph { namespace vpu { namespace op {
+
+constexpr NodeTypeInfo OutShapeOfReshape::type_info;
+
+OutShapeOfReshape::OutShapeOfReshape(
+        const Output<Node>& inDataShape,
+        const Output<Node>& outShapeDescriptor,
+        bool specialZero) : Op({inDataShape, outShapeDescriptor}), m_specialZero(specialZero) {
+    constructor_validate_and_infer_types();
+}
+
+void OutShapeOfReshape::validate_and_infer_types() {
+    NODE_VALIDATION_CHECK(this, get_input_size() == 2,
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") must have only 2 inputs, provided: ", get_input_size());
+
+    const auto& inDataShapeTensorShape = get_input_partial_shape(0);
+    NODE_VALIDATION_CHECK(this, inDataShapeTensorShape.is_static(),
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") doesn't support dynamic input data shape");
+    NODE_VALIDATION_CHECK(this, inDataShapeTensorShape.rank().get_length() == 1,
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") must have input data shape tensor with rank 1, provided: ",
+                          inDataShapeTensorShape.rank().get_length());
+
+    const auto& outShapeDescriptorTensorShape = get_input_partial_shape(1);
+    NODE_VALIDATION_CHECK(this, outShapeDescriptorTensorShape.is_static(),
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") doesn't support dynamic output shape descriptor");
+    NODE_VALIDATION_CHECK(this, outShapeDescriptorTensorShape.rank().get_length() == 1,
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") must have output shape descriptor tensor with rank 1, provided: ",
+                          outShapeDescriptorTensorShape.rank().get_length());
+
+    const auto& inDataShapeTensorType = get_input_element_type(0);
+    NODE_VALIDATION_CHECK(this,
+                          inDataShapeTensorType.is_static() &&
+                          inDataShapeTensorType.is_integral_number(),
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") input data type needs to be an integral type. Got: ",
+                          inDataShapeTensorType);
+    const auto& outShapeDescriptorTensorType = get_input_element_type(1);
+    NODE_VALIDATION_CHECK(this,
+                          outShapeDescriptorTensorType.is_static() &&
+                          outShapeDescriptorTensorType.is_integral_number(),
+                          "OutShapeOfReshape (", get_friendly_name(),
+                          ") shape descriptor type needs to be an integral type. Got: ",
+                          outShapeDescriptorTensorType);
+
+    set_output_type(0, element::i64, outShapeDescriptorTensorShape);
+}
+
+std::shared_ptr<Node> OutShapeOfReshape::clone_with_new_inputs(const OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<OutShapeOfReshape>(new_args.at(0), new_args.at(1), m_specialZero);
+}
+
+bool OutShapeOfReshape::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    visitor.on_attribute("special_zero", m_specialZero);
+    return true;
+}
+
+
+}  // namespace op
+}  // namespace vpu
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp

new file mode 100644 (file)

index 0000000..333947c
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp
@@ -0,0 +1,147 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+
+#include "vpu/utils/error.hpp"
+
+#include "ngraph/opsets/opset3.hpp"
+#include "ngraph/evaluator.hpp"
+
+namespace ngraph { namespace vpu { namespace op {
+
+namespace {
+
+HostTensorVector evaluateShapeOf(Node* node, const HostTensorVector&) {
+    auto shapeOf = as_type<opset3::ShapeOf>(node);
+    const auto inputValue = shapeOf->input_value(0);
+    const auto outputValue = shapeOf->output(0);
+    const auto inputTensors =
+            HostTensorVector{std::make_shared<runtime::HostTensor>(inputValue)};
+    const auto outputTensors =
+            HostTensorVector{std::make_shared<runtime::HostTensor>(outputValue)};
+
+    shapeOf->evaluate(outputTensors, inputTensors);
+    return outputTensors;
+}
+
+HostTensorVector evaluateConstant(Node* node, const HostTensorVector&) {
+    const auto constantNode = as_type<opset3::Constant>(node);
+    const auto constant = std::make_shared<opset3::Constant>(*constantNode);
+
+    const auto outputTensor = std::make_shared<runtime::HostTensor>(constant);
+
+    return {outputTensor};
+}
+
+HostTensorVector evaluateOp(Node* node, const HostTensorVector& inputTensors) {
+    HostTensorVector outputTensors;
+    for (const auto& output : node->outputs()) {
+        outputTensors.push_back(std::make_shared<HostTensor>(output));
+    }
+
+    node->evaluate(outputTensors, inputTensors);
+    return outputTensors;
+}
+
+PartialShape evaluateTargetShape(const Output<Node>& value) {
+    static Evaluator<HostTensorPtr>::op_handler_map handlers = {
+            {opset3::ShapeOf::type_info,  evaluateShapeOf},
+            {opset3::Constant::type_info, evaluateConstant},
+            {opset3::Gather::type_info,   evaluateOp},
+            {opset3::Concat::type_info,   evaluateOp}};
+    Evaluator<HostTensorPtr>::value_map value_map;
+    Evaluator<HostTensorPtr> evaluator(handlers, value_map);
+
+    const auto shapeTensor = evaluator.evaluate(value);
+    if (!shapeTensor || !shapeTensor->get_is_allocated()) {
+        return PartialShape::dynamic();
+    }
+    const auto shapeConstNode = std::make_shared<opset3::Constant>(shapeTensor);
+    const auto resultShape = Shape{shapeConstNode->cast_vector<size_t>()};
+
+    return resultShape;
+}
+
+}  // namespace
+
+constexpr NodeTypeInfo StaticShapeBroadcast::type_info;
+
+StaticShapeBroadcast::StaticShapeBroadcast(const Output<Node>& arg,
+                                           const Output<Node>& targetShape,
+                                           const Output<Node>& axesMapping,
+                                           const ngraph::op::BroadcastModeSpec& broadcastSpec)
+        : ::ngraph::op::util::BroadcastBase{arg, targetShape, axesMapping, broadcastSpec},
+          m_evaluatedOutputShape{PartialShape::dynamic()} {
+    constructor_validate_and_infer_types();
+}
+
+StaticShapeBroadcast::StaticShapeBroadcast(const Output<Node>& arg,
+                                           const Output<Node>& targetShape,
+                                           const ngraph::op::BroadcastModeSpec& broadcastSpec)
+        : ::ngraph::op::util::BroadcastBase{arg, targetShape, broadcastSpec},
+          m_evaluatedOutputShape{PartialShape::dynamic()} {
+    constructor_validate_and_infer_types();
+}
+
+void StaticShapeBroadcast::validate_and_infer_types() {
+    if (m_mode.m_type == ngraph::op::BroadcastType::EXPLICIT) {
+        NODE_VALIDATION_CHECK(this, get_input_size() == 3,
+                              "StaticShapeBroadcast (", get_friendly_name(), ") ",
+                              "with explicit mode must have 3 inputs, provided: ",
+                              get_input_size());
+    } else if (m_mode.m_type == ngraph::op::BroadcastType::NUMPY) {
+        NODE_VALIDATION_CHECK(this, get_input_size() == 2,
+                              "StaticShapeBroadcast (", get_friendly_name(), ") ",
+                              "with numpy mode must have 2 inputs, provided: ",
+                              get_input_size());
+    } else {
+        NODE_VALIDATION_CHECK(this, false,
+                              "StaticShapeBroadcast (", get_friendly_name(), ") ",
+                              "doesn't support ", m_mode.m_type, " mode");
+    }
+
+    ::ngraph::op::util::BroadcastBase::validate_and_infer_types();
+
+    if (get_output_partial_shape(0).is_dynamic()) {
+        // Try to evaluate output shape. After some transformations further, we may not be able
+        // to evaluate the target shape again, then we will leave the evaluated shape unchanged.
+        // For example, DynamicToStaticShapeShapeOf remove ShapeOf and pass the second input of DSR.
+        const auto evaluatedTargetShape = evaluateTargetShape(input_value(1));
+        if (evaluatedTargetShape.is_static()) {
+            m_evaluatedOutputShape = evaluatedTargetShape;
+        }
+        NODE_VALIDATION_CHECK(this, m_evaluatedOutputShape.is_static(),
+                              "StaticShapeBroadcast (", get_friendly_name(), ") ",
+                              "can't evaluate output shape, got: ", m_evaluatedOutputShape);
+        set_output_type(0, get_input_element_type(0), m_evaluatedOutputShape);
+    }
+}
+
+std::shared_ptr<Node> StaticShapeBroadcast::clone_with_new_inputs(const OutputVector& newInputs) const {
+    check_new_args_count(this, newInputs);
+    if (newInputs.size() == 2) {
+        return std::make_shared<StaticShapeBroadcast>(
+                newInputs.at(0), newInputs.at(1), m_mode);
+    } else {
+        return std::make_shared<StaticShapeBroadcast>(
+                newInputs.at(0), newInputs.at(1), newInputs.at(2), m_mode);
+    }
+}
+
+bool StaticShapeBroadcast::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    std::string mode;
+    if (m_mode.m_type == ngraph::op::BroadcastType::EXPLICIT) {
+        mode = "explicit";
+    } else if (m_mode.m_type == ngraph::op::BroadcastType::NUMPY) {
+        mode = "numpy";
+    }
+    visitor.on_attribute("mode", mode);
+
+    return true;
+}
+
+}  // namespace op
+}  // namespace vpu
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp

index a694274..eff8a6d 100644 (file)
--- a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp
@@ -4,12 +4,14 @@
  
  #include "vpu/ngraph/operations/static_shape_nonzero.hpp"
  
+#include "ngraph/runtime/host_tensor.hpp"
+
  namespace ngraph { namespace vpu { namespace op {
  
  constexpr NodeTypeInfo StaticShapeNonZero::type_info;
  
-StaticShapeNonZero::StaticShapeNonZero(const Output<Node>& input)
-        : Op({input}) {
+StaticShapeNonZero::StaticShapeNonZero(const Output<Node>& input, const element::Type& output_type)
+        : Op({input}), m_output_type(output_type) {
      constructor_validate_and_infer_types();
  }
  
@@ -28,21 +30,132 @@ void StaticShapeNonZero::validate_and_infer_types() {
                            "StaticShapeNonZero input data type needs to be a numeric type. Got: ",
                            input_et);
  
+    NODE_VALIDATION_CHECK(this,
+                          m_output_type == element::i32 || m_output_type == element::i64,
+                          "StaticShapeNonZero output data type can be either i32 or i64");
+
      const auto total_dim_size = Dimension(shape_size(arg_shape.to_shape()));
-    set_output_type(0, element::i64, {arg_shape.rank(), total_dim_size});
-    set_output_type(1, element::i64, {Dimension(2)});
+    set_output_type(0, m_output_type, {arg_shape.rank(), total_dim_size});
+    set_output_type(1, m_output_type, {Dimension(2)});
  }
  
  std::shared_ptr<Node> StaticShapeNonZero::copy_with_new_args(
          const NodeVector& new_args) const {
      check_new_args_count(this, new_args);
-    return std::make_shared<StaticShapeNonZero>(new_args.at(0));
+    return std::make_shared<StaticShapeNonZero>(new_args.at(0), m_output_type);
  }
  
  bool StaticShapeNonZero::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    visitor.on_attribute("output_type", m_output_type);
      return true;
  }
  
+namespace {
+
+template <typename InType, typename OutType>
+void staticShapeNonZeroReference(const InType* input, OutType* outIndices, OutType* outShape, const Shape& inputShape) {
+    auto strides = row_major_strides(inputShape);
+    auto totalDimSize = shape_size(inputShape);
+
+    const auto getCoord = [&strides](int offset){
+        std::vector<size_t> coord;
+        for (const size_t& stride : strides) {
+            coord.insert(coord.begin(), offset / stride);
+            offset %= stride;
+        }
+
+        return coord;
+    };
+
+    const auto addCoordToIndices = [&outIndices, &totalDimSize](const std::vector<size_t> &coord,
+                                                                size_t nonZeroCount) {
+        for (int j = 0; j < coord.size(); ++j) {
+            outIndices[j * totalDimSize + nonZeroCount] = coord[j];
+        }
+    };
+
+    const InType zeroValue = InType{0};
+    const auto isNonZero = [&input, &zeroValue](size_t i) {
+        return input[i] != zeroValue;
+    };
+
+    size_t nonZeroCount = 0;
+    for (size_t i = 0; i < totalDimSize; ++i) {
+        if (isNonZero(i)) {
+            addCoordToIndices(getCoord(i), nonZeroCount++);
+        }
+    }
+
+    outShape[0] = nonZeroCount;
+    outShape[1] = inputShape.size();
+}
+
+template <element::Type_t InType>
+bool evaluate(const HostTensorPtr& input,
+              const HostTensorPtr& outIndices,
+              const HostTensorPtr& outShape) {
+    bool rc = true;
+
+    switch (outIndices->get_element_type()) {
+        case element::Type_t::i64:
+            staticShapeNonZeroReference(input->get_data_ptr<InType>(),
+                                        outIndices->get_data_ptr<element::Type_t::i64>(),
+                                        outShape->get_data_ptr<element::Type_t::i64>(),
+                                        input->get_shape());
+            break;
+        case element::Type_t::i32:
+            staticShapeNonZeroReference(input->get_data_ptr<InType>(),
+                                        outIndices->get_data_ptr<element::Type_t::i32>(),
+                                        outShape->get_data_ptr<element::Type_t::i32>(),
+                                        input->get_shape());
+            break;
+        default: rc = false; break;
+    }
+
+    return rc;
+}
+
+bool evaluateStaticShapeNonZero(const HostTensorPtr& input,
+                                const HostTensorPtr& outIndices,
+                                const HostTensorPtr& outShape) {
+    bool rc = true;
+
+    switch (input->get_element_type()) {
+        TYPE_CASE(i8)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(i16)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(i32)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(i64)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(u8)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(u16)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(u32)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(u64)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(bf16)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(f32)(input, outIndices, outShape);
+            break;
+        TYPE_CASE(f64)(input, outIndices, outShape);
+            break;
+        default: rc = false; break;
+    }
+
+    return rc;
+}
+
+} // namespace
+
+bool StaticShapeNonZero::evaluate(const HostTensorVector& outputs,
+                                  const HostTensorVector& inputs) {
+    return evaluateStaticShapeNonZero(inputs[0], outputs[0], outputs[1]);
+}
+
  }  // namespace op
  }  // namespace vpu
  }  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp

index b7d4a8a..a412e77 100644 (file)
--- a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp
@@ -2,15 +2,22 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_roialign.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_transpose.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_squeeze.hpp"
  #include "vpu/ngraph/transformations/dynamic_to_static_shape_unsqueeze.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+
  #include "vpu/utils/error.hpp"
  
  #include "ngraph/opsets/opset3.hpp"
@@ -27,7 +34,13 @@ using namespace ngraph;
  
  bool isDynamic(const Node& node) {
      const auto& outputs = node.outputs();
-    return std::any_of(outputs.cbegin(), outputs.cend(), [](const Output<const Node>& output) { return output.get_partial_shape().is_dynamic(); });
+    return std::any_of(outputs.cbegin(), outputs.cend(), [](const Output<const Node>& output) {
+        VPU_THROW_UNLESS(output.get_partial_shape().rank() != ngraph::Rank::dynamic(),
+        "DynamicToStaticShape transformation: got dynamic rank for {} with type {} while only static is supported",
+        output.get_node_shared_ptr()->get_friendly_name(), output.get_node_shared_ptr()->get_type_name());
+
+        return output.get_partial_shape().is_dynamic();
+    });
  }
  
  bool validateStaticShapes(const ngraph::Function& function) {
@@ -44,12 +57,14 @@ const Transformations& getDefaultTransformations() {
          {ngraph::opset3::Add::type_info, dynamicToStaticShapeBinaryEltwise},
          {ngraph::opset3::Multiply::type_info, dynamicToStaticShapeBinaryEltwise},
          {ngraph::opset3::Subtract::type_info, dynamicToStaticShapeBinaryEltwise},
+        {ngraph::opset3::VariadicSplit::type_info, dynamicToStaticShapeVariadicSplit},
          {ngraph::opset3::Divide::type_info, dynamicToStaticShapeBinaryEltwise},
          {ngraph::opset3::Equal::type_info, dynamicToStaticShapeBinaryEltwise},
          {ngraph::opset3::Power::type_info, dynamicToStaticShapeBinaryEltwise},
          {ngraph::opset3::NonMaxSuppression::type_info, dynamicToStaticNonMaxSuppression},
          {ngraph::opset3::NonZero::type_info,   dynamicToStaticShapeNonZero},
          {ngraph::opset3::Transpose::type_info, dynamicToStaticShapeTranspose},
+        {ngraph::opset3::Concat::type_info,    dynamicToStaticShapeConcat},
          {ngraph::opset3::Convert::type_info,   dynamicToStaticUnaryElementwise},
          {ngraph::opset3::Clamp::type_info,     dynamicToStaticUnaryElementwise},
          {ngraph::opset3::Floor::type_info,     dynamicToStaticUnaryElementwise},
@@ -59,8 +74,11 @@ const Transformations& getDefaultTransformations() {
          {ngraph::opset3::Sigmoid::type_info,   dynamicToStaticUnaryElementwise},
          {ngraph::opset3::Sqrt::type_info,      dynamicToStaticUnaryElementwise},
          {ngraph::opset3::Squeeze::type_info,   dynamicToStaticShapeSqueeze},
+        {ngraph::opset3::Gather::type_info,    dynamicToStaticShapeGather},
          {ngraph::opset3::Unsqueeze::type_info, dynamicToStaticShapeUnsqueeze},
          {ngraph::opset3::ROIAlign::type_info,  dynamicToStaticShapeROIAlign},
+        {ngraph::opset3::Reshape::type_info,   dynamicToStaticShapeReshape},
+        {ngraph::opset3::Broadcast::type_info, dynamicToStaticShapeBroadcast},
      };
      return transformations;
  }
@@ -80,8 +98,8 @@ DynamicToStaticShape::DynamicToStaticShape(const Transformations& specificTransf
      transformations.emplace(ngraph::opset3::Result::type_info, [](const std::shared_ptr<ngraph::Node>&){});
  }
  
-void DynamicToStaticShape::transform(ngraph::Function& function) const {
-    for (const auto& operation : function.get_ordered_ops()) {
+void DynamicToStaticShape::transform(std::shared_ptr<ngraph::Function> function) const {
+    for (const auto& operation : function->get_ordered_ops()) {
          if (!isDynamic(*operation)) {
              continue;
          }
@@ -94,8 +112,11 @@ void DynamicToStaticShape::transform(ngraph::Function& function) const {
          transformation->second(operation);
      }
  
-    function.validate_nodes_and_infer_types();
-    validateStaticShapes(function);
+    // Should be executed after all dynamic-to-static transformations
+    DynamicToStaticShapeShapeOf().run_on_function(function);
+
+    function->validate_nodes_and_infer_types();
+    validateStaticShapes(*function);
  }
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp

new file mode 100644 (file)

index 0000000..1220025
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/utils/error.hpp"
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeBroadcast(std::shared_ptr<ngraph::Node> target) {
+    const auto broadcast = ngraph::as_type_ptr<ngraph::opset3::Broadcast>(target);
+    VPU_THROW_UNLESS(broadcast,
+                     "dynamicToStaticShapeBroadcast transformation is not applicable for {}, "
+                     "it should be {} instead",
+                     target, ngraph::opset3::Broadcast::type_info.name);
+
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> staticShapeBroadcast;
+    if (broadcast->get_broadcast_spec() == ngraph::op::BroadcastType::EXPLICIT) {
+        staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+                broadcast->input_value(0),
+                broadcast->input_value(1),
+                broadcast->input_value(2));
+    } else if (broadcast->get_broadcast_spec() == ngraph::op::BroadcastType::NUMPY) {
+        staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+                broadcast->input_value(0),
+                broadcast->input_value(1));
+    } else {
+        VPU_THROW_FORMAT("dynamicToStaticShapeBroadcast supports only explicit and numpy modes,"
+                         "provided {}", broadcast->get_broadcast_spec().m_type);
+    }
+
+    auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+            staticShapeBroadcast->output(0), broadcast->input_value(1));
+
+    ngraph::replace_node(std::move(target), std::move(dsr));
+}
+
+}  // namespace vpu
+
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_concat.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_concat.cpp

new file mode 100644 (file)

index 0000000..ae111b1
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_concat.cpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+#include <utility>
+
+namespace vpu {
+
+void dynamicToStaticShapeConcat(std::shared_ptr<ngraph::Node> target) {
+    const auto inputs = target->input_values();
+
+    ngraph::OutputVector dsrInputs;
+    ngraph::OutputVector staticInputs;
+    for (const auto& input : inputs) {
+        const auto inputNode = input.get_node_shared_ptr();
+        if (ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(inputNode)) {
+            dsrInputs.emplace_back(input);
+        } else {
+            staticInputs.emplace_back(input);
+        }
+    }
+
+    VPU_THROW_UNLESS(!dsrInputs.empty(),
+                     "DynamicToStaticShape transformation for {} of type {} expects at least "
+                     "one {} as input, actual types: {}", target->get_friendly_name(),
+                     target->get_type_info().name, ngraph::vpu::op::DynamicShapeResolver::type_info.name,
+                     std::accumulate(inputs.begin(), inputs.end(), std::string(), [](
+                             const std::string& typesStr, const ngraph::Output<ngraph::Node>& input) {
+                         return typesStr + input.get_node_shared_ptr()->get_type_info().name + ", ";
+                     }));
+
+    const auto firstDSRInputNode = dsrInputs.front().get_node_shared_ptr();
+    const auto shapeDataType = firstDSRInputNode->input(1).get_element_type();
+    const auto dataRank = firstDSRInputNode->get_output_partial_shape(0).rank().get_length();
+    const auto axis = ngraph::as_type_ptr<ngraph::opset3::Concat>(target)->get_concatenation_axis();
+
+    const auto shapeToConstant = [&shapeDataType, &dataRank](const ngraph::Shape& shape) {
+        return ngraph::opset3::Constant::create(
+                shapeDataType, {static_cast<size_t>(dataRank)}, shape)->output(0);
+    };
+
+    const auto getShapeFromDSR = [&target, &shapeDataType](const ngraph::Output<ngraph::Node>& dsrOutput) {
+        const auto dsrNode = dsrOutput.get_node_shared_ptr();
+        const auto dsrShapeInputValue = dsrNode->input_value(1);
+        VPU_THROW_UNLESS(dsrShapeInputValue.get_element_type() == shapeDataType,
+                         "DynamicToStaticShape transformation for {} of type {} expects input "
+                         "shape with {} type from {} argument of type {}, provided {}",
+                         target->get_friendly_name(), target->get_type_info().name,
+                         shapeDataType, dsrNode->get_friendly_name(), dsrNode->get_type_info().name,
+                         dsrShapeInputValue.get_element_type());
+        return dsrShapeInputValue;
+    };
+
+    const auto sumOfShapes = [](const ngraph::Output<ngraph::Node>& shape1,
+                                const ngraph::Output<ngraph::Node>& shape2) {
+        const auto shapeAccumulatorOp = std::make_shared<ngraph::opset3::Add>(shape1, shape2);
+        return shapeAccumulatorOp->output(0);
+    };
+
+    const auto divideDimsByNumOfInputsExceptAxis = [&target, &dataRank, &axis,
+                                                    &shapeDataType, &shapeToConstant](
+            const ngraph::Output<ngraph::Node>& shape) {
+        ngraph::Shape dividerValues(dataRank, target->get_input_size());
+        dividerValues[axis] = 1;
+        const auto divider = shapeToConstant(dividerValues);
+        const auto divide = std::make_shared<ngraph::opset3::Divide>(shape, divider);
+        return divide->output(0);
+    };
+
+    const auto getAdditionalShapeFromStatic = [&target, &dataRank, &axis](
+            const ngraph::OutputVector& staticInputs) {
+        ngraph::Shape accumulatedStaticShapeValue(dataRank, 0);
+        for (const auto& staticInput : staticInputs) {
+            const auto& staticInputPartialShape = staticInput.get_partial_shape();
+            VPU_THROW_UNLESS(staticInputPartialShape.is_static(),
+                             "DynamicToStaticShape transformation for {} of type {} expects static "
+                             "shape on inputs without DSR", target->get_friendly_name(),
+                             target->get_type_info().name);
+            accumulatedStaticShapeValue[axis] += static_cast<size_t>(staticInputPartialShape[axis]);
+        }
+        return accumulatedStaticShapeValue;
+    };
+
+    auto accumulatedShape = getShapeFromDSR(dsrInputs.front());
+    for (size_t dsrInputIdx = 1; dsrInputIdx < dsrInputs.size(); ++dsrInputIdx) {
+        const auto dsrInputShape = getShapeFromDSR(dsrInputs[dsrInputIdx]);
+        accumulatedShape = sumOfShapes(accumulatedShape, dsrInputShape);
+    }
+
+    if (dsrInputs.size() > 1) {
+        accumulatedShape = divideDimsByNumOfInputsExceptAxis(accumulatedShape);
+    }
+
+    if (!staticInputs.empty()) {
+        const auto accumulatedStaticShape = shapeToConstant(getAdditionalShapeFromStatic(staticInputs));
+        accumulatedShape = sumOfShapes(accumulatedShape, accumulatedStaticShape);
+    }
+
+    const auto copied = target->clone_with_new_inputs(target->input_values());
+    const auto outDsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+            copied, accumulatedShape);
+
+    ngraph::replace_node(std::move(target), outDsr);
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_gather.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_gather.cpp

new file mode 100644 (file)

index 0000000..88e30df
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_gather.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+
+namespace vpu {
+
+void dynamicToStaticShapeGather(std::shared_ptr<ngraph::Node> target) {
+    const auto gather = ngraph::as_type_ptr<ngraph::opset3::Gather>(target);
+    VPU_THROW_UNLESS(gather, "dynamicToStaticShapeGather transformation is not applicable for {}, it should be {} instead",
+            target, ngraph::opset3::Gather::type_info);
+
+    int64_t axis = gather->get_axis();
+    VPU_THROW_UNLESS(axis != std::numeric_limits<int64_t>::max() && axis >= 0,
+            "dynamicToStaticShapeGather: Unsupported Gather axis {} for node {}", axis, gather);
+
+    auto shapeToConstant = [&gather](const ngraph::Output<ngraph::Node> & output) -> std::shared_ptr<ngraph::opset3::Constant> {
+        VPU_THROW_UNLESS(output.get_partial_shape().is_static(),
+                         "DynamicToStaticShape transformation for {} of type {} expects static shape on inputs without DSR",
+                         gather->get_friendly_name(), gather->get_type_info());
+        return ngraph::opset3::Constant::create(ngraph::element::i64, {output.get_shape().size()}, output.get_shape());
+    };
+
+    const auto dataDSR = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(gather->input_value(0).get_node_shared_ptr());
+    const auto idxDSR = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(gather->input_value(1).get_node_shared_ptr());
+
+    VPU_THROW_UNLESS(dataDSR || idxDSR, "DynamicToStaticShape transformation for {} of type {} expects at least one DSR as input",
+                     gather->get_friendly_name(), gather->get_type_info());
+
+    const auto data_shape = dataDSR ? dataDSR->input_value(1) : shapeToConstant(gather->input_value(0));
+    const auto indices_shape = idxDSR ? idxDSR->input_value(1) : shapeToConstant(gather->input_value(1));
+
+    const auto copied = target->clone_with_new_inputs(target->input_values());
+
+
+    const auto & data_rank = data_shape.get_partial_shape();
+    const auto & indices_rank = indices_shape.get_partial_shape();
+    VPU_THROW_UNLESS(data_rank.is_static() && indices_rank.is_static(),
+            "DynamicToStaticShape transformation for {} doesn't support dynamic rank", gather);
+
+    const auto data_rank_value = data_rank[0].get_length();
+    const auto indices_rank_value = indices_rank[0].get_length();
+    ngraph::OutputVector output_dims;
+    if (axis) {
+        std::vector<int64_t> first_data_shape_part_indices(axis);
+        std::iota(first_data_shape_part_indices.begin(), first_data_shape_part_indices.end(), 0);
+        const auto first_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+                data_shape,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {first_data_shape_part_indices.size()}, first_data_shape_part_indices),
+                ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+        output_dims.push_back(first_data_shape_part);
+    }
+    if (indices_rank_value)
+        output_dims.push_back(indices_shape);
+    if (axis + 1 < data_rank_value) {
+        std::vector<int64_t> second_data_shape_part_indices(data_rank_value - axis - 1);
+        std::iota(second_data_shape_part_indices.begin(), second_data_shape_part_indices.end(), axis + 1);
+        const auto second_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+                data_shape,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {second_data_shape_part_indices.size()}, second_data_shape_part_indices),
+                ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+        output_dims.push_back(second_data_shape_part);
+    }
+    const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+    ngraph::replace_node(target, std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied, output_shape));
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp

index 570a68b..abc82a0 100644 (file)
--- a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
@@ -6,15 +6,21 @@
  
  #include "vpu/ngraph/operations/static_shape_nonzero.hpp"
  #include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/utils/error.hpp"
  
  #include "ngraph/graph_util.hpp"
+#include "ngraph/ops.hpp"
  
  #include <memory>
  
  namespace vpu {
  
-void dynamicToStaticShapeNonZero(std::shared_ptr<ngraph::Node> nonZero) {
-    auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(nonZero->input(0).get_source_output());
+void dynamicToStaticShapeNonZero(std::shared_ptr<ngraph::Node> node) {
+    auto nonZero = std::dynamic_pointer_cast<ngraph::op::v3::NonZero>(node);
+    VPU_THROW_UNLESS(nonZero, "dynamicToStaticShapeNonZero transformation for {} of type {} expects {} as node for replacement",
+                     node->get_friendly_name(), node->get_type_info(), ngraph::op::v3::NonZero::type_info);
+
+    auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(nonZero->input(0).get_source_output(), nonZero->get_output_type());
      staticShapeNonZero->set_friendly_name(nonZero->get_friendly_name() + "/static_shape");
  
      auto dynamicShapeResolver = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_reshape.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_reshape.cpp

new file mode 100644 (file)

index 0000000..c1267f5
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_reshape.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+
+namespace vpu {
+
+void dynamicToStaticShapeReshape(std::shared_ptr<ngraph::Node> target) {
+    const auto dsr = target->get_argument(0);
+    VPU_THROW_UNLESS(ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(dsr),
+                     "DynamicToStaticShape transformation for {} of type {} expects {} as input with index {}",
+                     target->get_friendly_name(), target->get_type_info(), ngraph::vpu::op::DynamicShapeResolver::type_info, 0);
+
+    const auto outShapeDescriptor = target->get_argument(1);
+    VPU_THROW_UNLESS(ngraph::as_type_ptr<ngraph::opset3::Constant>(outShapeDescriptor),
+                     "DynamicToStaticShape transformation for {] of type {} expects {} as input with index {}",
+                     target->get_friendly_name(), target->get_type_info(), ngraph::opset3::Constant::type_info, 1);
+
+    const auto reshape = std::dynamic_pointer_cast<ngraph::opset3::Reshape>(target);
+    const auto copied = reshape->clone_with_new_inputs(target->input_values());
+    const auto inDataShape = dsr->input(1).get_source_output();
+
+    const auto outShapeOfReshape = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+            inDataShape, outShapeDescriptor, reshape->get_special_zero());
+
+    ngraph::replace_node(std::move(target), std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+            copied, outShapeOfReshape));
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp

new file mode 100644 (file)

index 0000000..2157883
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/utils/error.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+
+namespace vpu {
+
+DynamicToStaticShapeShapeOf::DynamicToStaticShapeShapeOf() : GraphRewrite() {
+    // We don't set strict_mode when use pattern Matcher,
+    // so we can set any type and shape for input.
+    auto inputWithAnyTypeAndShape = std::make_shared<ngraph::pattern::op::Label>(
+            ngraph::element::dynamic, ngraph::PartialShape{});
+    auto shapeOfPattern = std::make_shared<ngraph::opset3::ShapeOf>(inputWithAnyTypeAndShape);
+
+    ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto shapeOfNode = std::dynamic_pointer_cast<ngraph::opset3::ShapeOf>(m.get_match_root());
+        if (!shapeOfNode) {
+            return false;
+        }
+
+        auto dsr = shapeOfNode->input_value(0).get_node_shared_ptr();
+        if (!ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(dsr)) {
+            return false;
+        }
+
+        ngraph::replace_node(shapeOfNode, dsr->input_value(1).get_node_shared_ptr());
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(shapeOfPattern, "DynamicToStaticShapeShapeOf");
+    this->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_transpose.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_transpose.cpp

index 3fa3dad..ac1bb3a 100644 (file)
--- a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_transpose.cpp
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_transpose.cpp
@@ -30,9 +30,9 @@ void dynamicToStaticShapeTranspose(std::shared_ptr<ngraph::Node> target) {
      const auto shape = dsr->input(1).get_source_output();
  
      const auto axis = std::make_shared<ngraph::opset3::Constant>(
-        ngraph::element::u64,
+        ngraph::element::i64,
          ngraph::Shape{std::initializer_list<std::size_t>{1}},
-        std::vector<std::size_t>{0});
+        std::vector<std::int64_t>{0});
      const auto scatterElementsUpdate = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(shape, transposition, shape, axis);
      ngraph::replace_node(std::move(target), std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied, scatterElementsUpdate));
  }
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp

new file mode 100644 (file)

index 0000000..9828c00
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp"
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+#include <vpu/utils/error.hpp>
+
+#include "ngraph/graph_util.hpp"
+#include "ngraph/opsets/opset3.hpp"
+
+#include <memory>
+#include <numeric>
+#include <ngraph/validation_util.hpp>
+
+namespace vpu {
+
+void dynamicToStaticShapeVariadicSplit(std::shared_ptr<ngraph::Node> target) {
+    const auto dsr = ngraph::as_type_ptr<ngraph::vpu::op::DynamicShapeResolver>(target->input_value(0).get_node_shared_ptr());
+    VPU_THROW_UNLESS(dsr, "DynamicToStaticShape transformation for {} of type {} expects {} as input with index {}",
+                     target->get_friendly_name(), target->get_type_info(), ngraph::vpu::op::DynamicShapeResolver::type_info, 0);
+
+    const auto axis_node = ngraph::as_type_ptr<ngraph::opset3::Constant>(target->input_value(1).get_node_shared_ptr());
+    VPU_THROW_UNLESS(axis_node, "dynamicToStaticShapeVariadic transformation is not applicable for {}, dynamic axis is not supported", target);
+
+    const auto data_rank = target->get_input_partial_shape(0).rank();
+    VPU_THROW_UNLESS(data_rank.is_static(), "dynamicToStaticShapeVariadic transformation for {} doesn't support dynamic rank", target);
+
+    int64_t axis = ngraph::normalize_axis(target->description(), axis_node->cast_vector<int64_t>()[0], data_rank);
+
+    const auto split_lengths_node = ngraph::as_type_ptr<ngraph::opset3::Constant>(target->input_value(2).get_node_shared_ptr());
+    VPU_THROW_UNLESS(split_lengths_node, "dynamicToStaticShapeVariadic transformation is not applicable for {}, dynamic split_length is not supported", target);
+    const auto split_lengths = split_lengths_node->cast_vector<int64_t>();
+
+    for (const auto & i : split_lengths) {
+        VPU_THROW_UNLESS(i != -1, "dynamicToStaticShapeVariadic transformation is not applicable for {}, split_length with -1 is not supported", target);
+        VPU_THROW_UNLESS(i > 0, "dynamicToStaticShapeVariadic transformation is not applicable for {}, non-positive split_length  is not supported", target);
+    }
+
+    const auto data_shape = dsr->input_value(1).get_node_shared_ptr();
+    const auto copied = target->clone_with_new_inputs(target->input_values());
+    const auto data_rank_value = data_rank.get_length();
+    ngraph::OutputVector first_shape_part, second_shape_part;
+    if (axis) {
+        std::vector<int64_t> first_data_shape_part_indices(axis);
+        std::iota(first_data_shape_part_indices.begin(), first_data_shape_part_indices.end(), 0);
+        const auto first_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+                data_shape,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {first_data_shape_part_indices.size()}, first_data_shape_part_indices),
+                ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+        first_shape_part.push_back(first_data_shape_part);
+    }
+    if (axis + 1 < data_rank_value) {
+        std::vector<int64_t> second_data_shape_part_indices(data_rank_value - axis - 1);
+        std::iota(second_data_shape_part_indices.begin(), second_data_shape_part_indices.end(), axis + 1);
+        const auto second_data_shape_part = std::make_shared<ngraph::opset3::Gather>(
+                data_shape,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {second_data_shape_part_indices.size()}, second_data_shape_part_indices),
+                ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0}));
+        second_shape_part.push_back(second_data_shape_part);
+    }
+    for (auto i = 0; i < split_lengths.size(); ++i) {
+        const auto dim = ngraph::opset3::Constant::create(data_shape->get_element_type(), {1}, {split_lengths[i]});
+        if (!first_shape_part.empty() || !second_shape_part.empty()) {
+            ngraph::OutputVector output_dims{dim};
+            output_dims.insert(output_dims.begin(), first_shape_part.begin(), first_shape_part.end());
+            output_dims.insert(output_dims.end(), second_shape_part.begin(), second_shape_part.end());
+            const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+            target->output(i).replace(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied->output(i), output_shape));
+        } else {
+            target->output(i).replace(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(copied->output(i), dim));
+        }
+    }
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/common/src/utils/simple_math.cpp b/inference-engine/src/vpu/common/src/utils/simple_math.cpp

index 4b9917c..79a8179 100644 (file)
--- a/inference-engine/src/vpu/common/src/utils/simple_math.cpp
+++ b/inference-engine/src/vpu/common/src/utils/simple_math.cpp
@@ -10,8 +10,6 @@
  #include <set>
  #include <stack>
  #include <map>
-#include <stdexcept>
-#include <utility>
  #include <functional>
  
  #include <vpu/utils/error.hpp>
@@ -20,102 +18,109 @@ namespace vpu {
  
  namespace {
  
-const std::set<char> whitespaces = {
-    ' ',
-    '\t',
+using ValueType = details::IntOrFloat;
+
+struct Operator {
+    int priority;
+    std::function<ValueType(ValueType, ValueType)> op;
  };
  
-// priority, function
-using Operator = std::pair<int, std::function<int(int, int)>>;
+static const std::map<std::string, Operator> operators = {
+    { "+", { 0, std::plus<ValueType>() }},
+    { "-", { 0, std::minus<ValueType>() }},
+    { "*", { 1, std::multiplies<ValueType>() }},
+    { "/", { 1, std::divides<ValueType>() }},
+    { "%", { 1, std::modulus<ValueType>() }}
+};
  
-const std::map<char, Operator> operators = {
-    { '+', { 0, std::plus<int>() } },
-    { '-', { 0, std::minus<int>() } },
-    { '*', { 1, std::multiplies<int>() } },
-    { '/', { 1, std::divides<int>()  } },
-    { '%', { 1, std::modulus<int>()  } },
+static const std::map<std::string, std::function<ValueType(ValueType)>> function = {
+        {"floor", [](ValueType x) { return ValueType{std::floor(x.toFloat())}; }},
+        {"ceil" , [](ValueType x) { return ValueType{std::ceil(x.toFloat())}; }},
+        {"round", [](ValueType x) { return ValueType{std::round(x.toFloat())}; }},
+        {"abs"  , [](ValueType x) { return ValueType{std::abs(x.toFloat())}; }},
+        {"sqrt" , [](ValueType x) { return ValueType{std::sqrt(x.toFloat())}; }}
  };
  
+bool isFunction(const std::string& token) {
+    return function.find(token) != function.end();
+}
+bool isOperator(const std::string& token) {
+    return operators.find(token) != operators.end();
+}
+int opPriority(const std::string& token) {
+    return operators.at(token).priority;
+}
+
  }  // namespace
  
-void SimpleMathExpression::parse(const std::string& expression) {
+void MathExpression::parse(const std::string& expression) {
      _parsedTokens.clear();
+    std::stack<std::string> tokenStack;
  
-    std::stack<char> operatorStack;
-
-    // While there are tokens to be read.
-    for (size_t i = 0; i != expression.length(); i++) {
-        // Ignore whitespaces;
-        while (whitespaces.find(expression[i]) != whitespaces.end()) {
-            i++;
+    for (auto it = begin(expression); it != end(expression); ++it) {
+        if (*it == ' ' || *it == '\t') {
+            continue;
          }
  
-        // Read a token.
-        auto curr = expression[i];
-
-        // If the token is a number, then push it to the output queue.
-        if (std::isdigit(curr)) {
+        // parse number
+        if (std::isdigit(*it)) {
              size_t len = 0;
-            auto value = std::stoi(expression.substr(i), &len);
+            const auto value = std::stof(&*it, &len);
  
-            _parsedTokens.emplace_back(Token(Token::Value, value, 0));
-
-            i += (len - 1);
+            _parsedTokens.emplace_back(TokenType::Value, ValueType{value}, "");
  
+            std::advance(it, len - 1);
              continue;
          }
  
-        // If the token is a variable, then push it's value to the output queue.
-        if (_vars.find(curr) != _vars.end()) {
-            _parsedTokens.emplace_back(Token(Token::Value, _vars.at(curr), 0));
+        // parse variable/function
+        if (std::isalpha(*it)) {
+            const auto end_token = std::find_if_not(it, end(expression),
+                [](char c) { return std::isalnum(c) || c == '_'; });
+            const auto token = std::string(it, end_token);
+            std::advance(it, token.length() - 1);
  
-            continue;
+            if (isFunction(token)) {
+                tokenStack.push(token);
+                continue;
+            }
+            if (_vars.find(token) != _vars.end()) {
+                _parsedTokens.emplace_back(TokenType::Value, ValueType{_vars.at(token)}, "");
+                continue;
+            }
          }
  
-        // If the token is an operator, then:
-        if (operators.find(curr) != operators.end()) {
-            // While there is an operator at the top of the operator stack with
-            //   greater than or equal to precedence:
-            //     pop operators from the operator stack, onto the output queue;
-            while (!operatorStack.empty() &&
-                   (operators.find(operatorStack.top()) != operators.end()) &&
-                   (operators.at(operatorStack.top()).first >= operators.at(curr).first)) {
-                auto op = operatorStack.top();
-                operatorStack.pop();
-
-                _parsedTokens.emplace_back(Token(Token::Operator, 0, op));
+        // parse operator
+        if (isOperator(std::string(1, *it))) {
+            while (!tokenStack.empty()
+                   && (isFunction(tokenStack.top())
+                       || (isOperator(tokenStack.top())
+                           && opPriority(tokenStack.top()) >= opPriority(std::string(1, *it))))) {
+                const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+                                                                    : TokenType::Function;
+                _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+                tokenStack.pop();
              }
  
-            //     push the read operator onto the operator stack.
-            operatorStack.push(curr);
-
+            tokenStack.push(std::string(1, *it));
              continue;
          }
  
-        // If the token is a left bracket (i.e. "("), then:
-        //   push it onto the operator stack.
-        if (curr == '(') {
-            operatorStack.push(curr);
-
+        if (*it == '(') {
+            tokenStack.push("(");
              continue;
          }
  
-        // If the token is a right bracket (i.e. ")"), then:
-        if (curr == ')') {
-            // While the operator at the top of the operator stack is not a left bracket:
-            //   pop operators from the operator stack onto the output queue;
-            while (!operatorStack.empty() &&
-                   operatorStack.top() != '(') {
-                _parsedTokens.emplace_back(Token(Token::Operator, 0, operatorStack.top()));
-
-                operatorStack.pop();
+        if (*it == ')') {
+            while (!tokenStack.empty() && tokenStack.top() != "(") {
+                const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+                                                                    : TokenType::Function;
+                _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+                tokenStack.pop();
              }
  
-            //   pop the left bracket from the stack.
-            // If the stack runs out without finding a left bracket, then there are mismatched parentheses.
-            if (!operatorStack.empty() &&
-                operatorStack.top() == '(') {
-                operatorStack.pop();
+            if (!tokenStack.empty()) {
+                tokenStack.pop();
              } else {
                  VPU_THROW_EXCEPTION << "Mismatched parentheses in " << expression;
              }
@@ -123,51 +128,54 @@ void SimpleMathExpression::parse(const std::string& expression) {
              continue;
          }
  
-        // Unknown token
-        VPU_THROW_EXCEPTION << "Unknown token " << curr << " in " << expression;
+        VPU_THROW_EXCEPTION << "Unknown token " << *it << " in " << expression;
      }
  
-    // If there are no more tokens to read:
-    //   while there are still operator tokens on the stack:
-    //     if the operator token on the top of the stack is a bracket, then
-    //       there are mismatched parentheses;
-    //     pop the operator onto the output queue.
-    while (!operatorStack.empty()) {
-        if (operatorStack.top() == '(') {
+    while (!tokenStack.empty()) {
+        if (tokenStack.top() == "(") {
              VPU_THROW_EXCEPTION << "Mismatched parentheses in " << expression;
          }
-
-        _parsedTokens.emplace_back(Token(Token::Operator, 0, operatorStack.top()));
-
-        operatorStack.pop();
+        const auto tokenType = isOperator(tokenStack.top()) ? TokenType::Operator
+                                                            : TokenType::Function;
+        _parsedTokens.emplace_back(tokenType, ValueType{0}, tokenStack.top());
+        tokenStack.pop();
      }
  }
  
-int SimpleMathExpression::evaluate() const {
-    std::stack<int> values;
-    for (const auto& t : _parsedTokens) {
-        switch (t.type) {
-        case Token::Value:
-            values.push(t.value);
-            break;
-        case Token::Operator: {
-            if (values.size() < 2) {
-                VPU_THROW_EXCEPTION << "Illegal expression: not enough values for operator evaluation";
-            }
+int MathExpression::evaluate() const {
+    std::stack<ValueType> values;
  
-            // pop last 2 values and apply operand
-            auto val2 = values.top();
-            values.pop();
+    for (const auto& token : _parsedTokens) {
+        switch (token.type) {
+            case TokenType::Value:
+                values.push(token.value);
+                break;
+            case TokenType::Operator: {
+                if (values.size() < 2) {
+                    VPU_THROW_EXCEPTION << "Illegal expression: not enough values for operator evaluation";
+                }
  
-            auto val1 = values.top();
-            values.pop();
+                auto val2 = values.top();
+                values.pop();
  
-            values.push(operators.at(t.op).second(val1, val2));
+                auto val1 = values.top();
+                values.pop();
  
-            break;
-        }
-        default:
-            VPU_THROW_EXCEPTION << "Illegal expression: unhandled token";
+                values.push(operators.at(token.opName).op(val1, val2));
+                break;
+            }
+            case TokenType::Function: {
+                if (values.empty()) {
+                    VPU_THROW_EXCEPTION << "Illegal expression: not enough values for function evaluation";
+                }
+                auto val1 = values.top();
+                values.pop();
+
+                values.push(function.at(token.opName)(val1));
+                break;
+            }
+            default:
+                VPU_THROW_EXCEPTION << "Illegal expression: unhandled token";
          }
      }
  
@@ -175,7 +183,7 @@ int SimpleMathExpression::evaluate() const {
          VPU_THROW_EXCEPTION << "Illegal expression: not enough operators";
      }
  
-    return values.top();
+    return values.top().toFloat();
  }
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/custom_kernels/binary_convolution1x1.cl b/inference-engine/src/vpu/custom_kernels/binary_convolution1x1.cl

index 5954902..05bd7e7 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/binary_convolution1x1.cl
+++ b/inference-engine/src/vpu/custom_kernels/binary_convolution1x1.cl
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
  ushort extract_weights(uchar val, int bit)
diff --git a/inference-engine/src/vpu/custom_kernels/binary_convolution3x3.cl b/inference-engine/src/vpu/custom_kernels/binary_convolution3x3.cl

index 6e24222..db23c37 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/binary_convolution3x3.cl
+++ b/inference-engine/src/vpu/custom_kernels/binary_convolution3x3.cl
@@ -1,3 +1,6 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/binary_layers.cl b/inference-engine/src/vpu/custom_kernels/binary_layers.cl

index 5a81221..1924f33 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/binary_layers.cl
+++ b/inference-engine/src/vpu/custom_kernels/binary_layers.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/convolution1x1.cl b/inference-engine/src/vpu/custom_kernels/convolution1x1.cl

index b9e2bef..6ae0e2c 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/convolution1x1.cl
+++ b/inference-engine/src/vpu/custom_kernels/convolution1x1.cl
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
  __kernel void Convolution1x1_NCHW(
diff --git a/inference-engine/src/vpu/custom_kernels/convolution3x3.cl b/inference-engine/src/vpu/custom_kernels/convolution3x3.cl

index c1d2970..5c054ed 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/convolution3x3.cl
+++ b/inference-engine/src/vpu/custom_kernels/convolution3x3.cl
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
  __kernel void Convolution3x3(const __global half*  in_param,
diff --git a/inference-engine/src/vpu/custom_kernels/correlate.cl b/inference-engine/src/vpu/custom_kernels/correlate.cl

new file mode 100644 (file)

index 0000000..0a7b3ae
--- /dev/null
+++ b/inference-engine/src/vpu/custom_kernels/correlate.cl
@@ -0,0 +1,449 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define MAX_OPENCL_BUFF_SIZE 64*1024
+
+// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
+#define USE_MANUAL_DMA 1
+
+#if defined (USE_MANUAL_DMA)
+void dmacpyLineSrcStrideStart(global half* from, private half* to, int size, int src_width, int src_stride)
+{
+    item_dma_event_t copyEvent = WorkItemDmaCreateStrideTransaction(from, to, src_width, src_width, src_stride, src_width, size, 0);
+    WaitWorkItemDmaEvents(1, &copyEvent);
+}
+
+void dmacpyLineDstStrideStart(private half* from, global half* to, int size, int src_width, int src_stride)
+{
+    item_dma_event_t copyEvent = WorkItemDmaCreateStrideTransaction(from, to, src_width, src_width, src_width, src_stride, size, 0);
+    WaitWorkItemDmaEvents(1, &copyEvent);
+}
+#endif
+
+void memzero(void * ptr, size_t num)
+{
+    float4* line0_ = (float4*) ptr;
+    #pragma unroll 16
+    for (int i = 0; i < num/16; i++)
+    {
+        line0_[i] = (float4){0.f, 0.f, 0.f, 0.f};
+    }
+    uchar* ptr_ = (uchar*) ptr;
+    for (int i = num/16*16; i < num; i++)
+    {
+        ptr_[i] = 0;
+    }
+}
+
+void __attribute__((noinline)) crosscorrh(__private const half* restrict line0,
+                                           __private const half* restrict line1,
+                                           __private half* restrict dline,
+                                           int topwidth,
+                                           int max_displacement,
+                                           int neighborhood_grid_radius,
+                                           int kernel_size,
+                                           int padding,
+                                           int bottomwidth,
+                                           int stride1,
+                                           int stride2,
+                                           int max_channels,
+                                           int cur_subchannels)
+{
+    if (max_channels == 64)
+    {
+        for (int i = 0; i < kernel_size; i++)
+        {
+            int x1 = max_displacement - padding + i;
+            int offset1 = x1 >= 0 ? 0 : (-x1 + stride1 - 1)/stride1;
+            x1 += offset1*stride1;
+
+            for (int blockIdx_x = offset1; blockIdx_x < topwidth && x1 < bottomwidth; blockIdx_x++, x1 += stride1)
+            {
+                int x2 = x1 - neighborhood_grid_radius*stride2;
+                int offset2 = x2 >= 0 ? 0 : (-x2 + stride2 - 1)/stride2;
+                x2 += offset2*stride2;
+
+                for (int top_channel_x = offset2 - neighborhood_grid_radius;
+                     top_channel_x <= neighborhood_grid_radius && x2 < bottomwidth;
+                     top_channel_x++, x2 += stride2)
+                {
+                    half8 sum4 = (half8){0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+
+                    half8* src0 = (half8*)(line0 + x1*max_channels);
+                    half8* src1 = (half8*)(line1 + x2*max_channels);
+
+                    #pragma unroll 8
+                    for (int ch = 0; ch < max_channels/8; ch++)
+                        sum4 += (src0[ch])*(src1[ch]);
+
+                    half sum = __builtin_shave_sau_sumx_f16_r(sum4);
+                    dline[(top_channel_x + neighborhood_grid_radius)*topwidth + blockIdx_x] += (sum);
+                }
+            }
+        }
+    }
+    else
+    {
+        int neighborhood_grid_width = 2*neighborhood_grid_radius + 1;
+
+        for (int blockIdx_x = 0; blockIdx_x < topwidth; blockIdx_x++)
+        {
+            for (int i = 0; i < kernel_size; i++)
+            {
+                int x1 = blockIdx_x*stride1 + max_displacement + i - padding;
+
+                if ((x1 >= 0) && (x1 < bottomwidth))
+                {
+                    int o_min =                                 - neighborhood_grid_radius*stride2;
+                    int o_max = neighborhood_grid_width*stride2 - neighborhood_grid_radius*stride2;
+                    if ((o_min) <  (                    - x1)) o_min -= ((x1 + o_min - (stride2 - 1))/stride2)*stride2;
+                    if ((o_max) >= (bottomwidth+stride2 - x1)) o_max -= ((x1 + o_max -  bottomwidth )/stride2)*stride2;
+
+                    int o = o_min;
+                    for (; o <= o_max - 4*stride2; o += 4*stride2)
+                    {
+                        half8* bottom0   = (half8*)(line0 + x1*max_channels);
+                        half8* bottom1_0 = (half8*)(line1 + (x1 + o + 0*stride2)*max_channels);
+                        half8* bottom1_1 = (half8*)(line1 + (x1 + o + 1*stride2)*max_channels);
+                        half8* bottom1_2 = (half8*)(line1 + (x1 + o + 2*stride2)*max_channels);
+                        half8* bottom1_3 = (half8*)(line1 + (x1 + o + 3*stride2)*max_channels);
+
+                        int c = 0;
+
+                        half8 sum40 = 0;
+                        half8 sum41 = 0;
+                        half8 sum42 = 0;
+                        half8 sum43 = 0;
+
+                        for (; c <= cur_subchannels/8 - 4; c += 4)
+                        {
+                            sum40 += bottom0[c + 0] * bottom1_0[c + 0];
+                            sum40 += bottom0[c + 1] * bottom1_0[c + 1];
+                            sum40 += bottom0[c + 2] * bottom1_0[c + 2];
+                            sum40 += bottom0[c + 3] * bottom1_0[c + 3];
+
+                            sum41 += bottom0[c + 0] * bottom1_1[c + 0];
+                            sum41 += bottom0[c + 1] * bottom1_1[c + 1];
+                            sum41 += bottom0[c + 2] * bottom1_1[c + 2];
+                            sum41 += bottom0[c + 3] * bottom1_1[c + 3];
+
+                            sum42 += bottom0[c + 0] * bottom1_2[c + 0];
+                            sum42 += bottom0[c + 1] * bottom1_2[c + 1];
+                            sum42 += bottom0[c + 2] * bottom1_2[c + 2];
+                            sum42 += bottom0[c + 3] * bottom1_2[c + 3];
+
+                            sum43 += bottom0[c + 0] * bottom1_3[c + 0];
+                            sum43 += bottom0[c + 1] * bottom1_3[c + 1];
+                            sum43 += bottom0[c + 2] * bottom1_3[c + 2];
+                            sum43 += bottom0[c + 3] * bottom1_3[c + 3];
+                        }
+
+                        for (; c < cur_subchannels/8; c++)
+                        {
+                            sum40 += bottom0[c] * bottom1_0[c];
+                            sum41 += bottom0[c] * bottom1_1[c];
+                            sum42 += bottom0[c] * bottom1_2[c];
+                            sum43 += bottom0[c] * bottom1_3[c];
+                        }
+
+                        half sum0 = __builtin_shave_sau_sumx_f16_r(sum40);
+                        half sum1 = __builtin_shave_sau_sumx_f16_r(sum41);
+                        half sum2 = __builtin_shave_sau_sumx_f16_r(sum42);
+                        half sum3 = __builtin_shave_sau_sumx_f16_r(sum43);
+
+                        for (c = c*8; c < cur_subchannels; c++)
+                        {
+                            sum0 += line0[x1*max_channels + c] * line1[(x1 + o + 0*stride2)*max_channels + c];
+                            sum1 += line0[x1*max_channels + c] * line1[(x1 + o + 1*stride2)*max_channels + c];
+                            sum2 += line0[x1*max_channels + c] * line1[(x1 + o + 2*stride2)*max_channels + c];
+                            sum3 += line0[x1*max_channels + c] * line1[(x1 + o + 3*stride2)*max_channels + c];
+                        }
+
+                        dline[blockIdx_x + (((o/stride2) + 0)*topwidth + neighborhood_grid_radius*topwidth)] += sum0;
+                        dline[blockIdx_x + (((o/stride2) + 1)*topwidth + neighborhood_grid_radius*topwidth)] += sum1;
+                        dline[blockIdx_x + (((o/stride2) + 2)*topwidth + neighborhood_grid_radius*topwidth)] += sum2;
+                        dline[blockIdx_x + (((o/stride2) + 3)*topwidth + neighborhood_grid_radius*topwidth)] += sum3;
+                    }
+
+                    for (; o < o_max; o += 1*stride2)
+                    {
+                        half8* bottom0 = (half8*)(line0 + x1*max_channels);
+                        half8* bottom1 = (half8*)(line1 + (x1 + o)*max_channels);
+
+                        int c = 0;
+
+                        half8 sum4 = 0;
+                        for (; c <= cur_subchannels/8 - 4; c += 4)
+                        {
+                            sum4 += bottom0[c + 0] * bottom1[c + 0];
+                            sum4 += bottom0[c + 1] * bottom1[c + 1];
+                            sum4 += bottom0[c + 2] * bottom1[c + 2];
+                            sum4 += bottom0[c + 3] * bottom1[c + 3];
+                        }
+                        for (; c < cur_subchannels/8; c++)
+                        {
+                            sum4 += bottom0[c] * bottom1[c];
+                        }
+
+                        half sum = __builtin_shave_sau_sumx_f16_r(sum4);
+
+                        for (c = c*8; c < cur_subchannels; c++)
+                        {
+                            sum += line0[x1*max_channels + c] * line1[(x1 + o)*max_channels + c];
+                        }
+
+                        dline[blockIdx_x + (((o + neighborhood_grid_radius*stride2)/stride2)*topwidth)] += sum;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+__kernel void correlate2_half(__global const half* restrict bottom0,
+                              __global const half* restrict bottom1,
+                              __global half* restrict top,
+                             int topwidth,
+                             int topheight,
+                             int bottomwidth,
+                             int bottomheight,
+                             int bottomchannels,
+                             int max_displacement,
+                             int padding,
+                             int neighborhood_grid_radius,
+                             int neighborhood_grid_width,
+                             int kernel_size,
+                             int stride1,
+                             int stride2)
+{
+    int max_channels = (MAX_OPENCL_BUFF_SIZE/sizeof(half) - topwidth*neighborhood_grid_width) / (3*bottomwidth);
+    if (max_channels > 64) max_channels = 64;
+    int subchannels_count = (bottomchannels + max_channels - 1) / max_channels;
+    int subchannels = (bottomchannels + subchannels_count-1) / subchannels_count;
+    if (subchannels < max_channels) subchannels = max_channels;
+
+    const int sumelems = kernel_size*kernel_size*bottomchannels;
+
+    __private half cmx[MAX_OPENCL_BUFF_SIZE/sizeof(half)];
+
+    __private half* line0 = cmx;
+    __private half* line1 = line0 + bottomwidth*subchannels;
+    __private half* dline = line1 + bottomwidth*subchannels;
+
+    int blockIdx_y = get_global_id(0);
+
+#if defined(USE_MANUAL_DMA)
+    __private half* dmabuf = dline + topwidth*neighborhood_grid_width;
+#endif
+
+    int y1 = blockIdx_y*stride1 + max_displacement;
+
+    for (int j = 0; j < kernel_size; j++)
+    {
+        for (int bottomchannel = 0; bottomchannel < bottomchannels; bottomchannel += subchannels)
+        {
+            // configure channel batching
+            int startchannel = bottomchannel;
+            int endchannel = startchannel + subchannels > bottomchannels ? bottomchannels : startchannel + subchannels;
+            int deltachannels = endchannel-startchannel;
+
+            // load line form blob 0 with repackaging
+            if (y1+j-padding >= 0 && y1+j-padding < bottomheight)
+            {
+#if defined(USE_MANUAL_DMA)
+                __global const half* curr = bottom0 + startchannel*bottomheight*bottomwidth + (y1+j-padding)*bottomwidth;
+                dmacpyLineSrcStrideStart(curr,
+                                         dmabuf,
+                                         bottomwidth*deltachannels*sizeof(half),
+                                         bottomwidth*sizeof(half),
+                                         bottomwidth*bottomheight*sizeof(half));
+
+                for (int ch = 0; ch < deltachannels; ch++)
+                {
+                    for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+                    {
+                        half8 val = ((half8*)(dmabuf + ch*bottomwidth))[blockIdx_x];
+                        line0[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+                        line0[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+                        line0[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+                        line0[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+                        line0[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+                        line0[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+                        line0[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+                        line0[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+                    }
+
+                    for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+                    {
+                        line0[(blockIdx_x)*max_channels+ch] = dmabuf[blockIdx_x + ch*bottomwidth];
+                    }
+                }
+
+                if (deltachannels < subchannels)
+                    for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+                        memzero(line0 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+#else
+                for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+                {
+                    for (int ch = 0; ch < deltachannels; ch++)
+                        line0[blockIdx_x*max_channels+ch]
+                            = bottom0[(ch+startchannel)*bottomheight*bottomwidth + (y1+j-padding)*bottomwidth + blockIdx_x];
+
+                    if (deltachannels < subchannels)
+                        memzero(line0 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+                }
+#endif
+            }
+            else
+                memzero(line0, max_channels*bottomwidth*sizeof(half));
+
+            for (int top_channel_y = 0; top_channel_y < neighborhood_grid_width; top_channel_y++)
+            {
+                int y2 = y1 + (top_channel_y - neighborhood_grid_radius) * stride2;
+
+                // load line form blob 1 with repackaging according to the line we work on now
+                if (y2+j-padding >= 0 && y2+j-padding < bottomheight)
+                {
+#if defined(USE_MANUAL_DMA)
+                    __global const half* curr = bottom1 + startchannel*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth;
+                    dmacpyLineSrcStrideStart(curr,
+                                             dmabuf,
+                                             bottomwidth*deltachannels*sizeof(half),
+                                             bottomwidth*sizeof(half),
+                                             bottomwidth*bottomheight*sizeof(half));
+
+                    for (int ch = 0; ch < deltachannels; ch++)
+                    {
+                        for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+                        {
+                            half8 val = ((half8*)(dmabuf + ch*bottomwidth))[blockIdx_x];
+                            line1[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+                            line1[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+                            line1[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+                            line1[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+                            line1[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+                            line1[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+                            line1[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+                            line1[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+                        }
+
+                        for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+                        {
+                            line1[(blockIdx_x)*max_channels+ch] = dmabuf[blockIdx_x + ch*bottomwidth];
+                        }
+                    }
+#else
+                    for (int ch = 0; ch < deltachannels; ch++)
+                    {
+                        for (int blockIdx_x = 0; blockIdx_x < bottomwidth/8; blockIdx_x++)
+                        {
+                            half8 val = ((__global half8*)(bottom1 + (ch+startchannel)*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth))[blockIdx_x];
+                            line1[(blockIdx_x*8 + 0)*max_channels+ch] = val[0];
+                            line1[(blockIdx_x*8 + 1)*max_channels+ch] = val[1];
+                            line1[(blockIdx_x*8 + 2)*max_channels+ch] = val[2];
+                            line1[(blockIdx_x*8 + 3)*max_channels+ch] = val[3];
+
+                            line1[(blockIdx_x*8 + 4)*max_channels+ch] = val[4];
+                            line1[(blockIdx_x*8 + 5)*max_channels+ch] = val[5];
+                            line1[(blockIdx_x*8 + 6)*max_channels+ch] = val[6];
+                            line1[(blockIdx_x*8 + 7)*max_channels+ch] = val[7];
+                        }
+                        for (int blockIdx_x = bottomwidth/8*8; blockIdx_x < bottomwidth; blockIdx_x++)
+                        {
+                            half val = (bottom1 + (ch+startchannel)*bottomheight*bottomwidth + (y2+j-padding)*bottomwidth)[blockIdx_x];
+                            line1[(blockIdx_x)*max_channels+ch] = val;
+                        }
+                    }
+#endif
+                    for (int blockIdx_x = 0; blockIdx_x < bottomwidth; blockIdx_x++)
+                    {
+                        if (deltachannels < subchannels)
+                            memzero(line1 + blockIdx_x*max_channels+deltachannels, (subchannels-deltachannels)*sizeof(half));
+                    }
+                }
+                else
+                    memzero(line1, max_channels*bottomwidth*sizeof(half));
+
+                if(j == 0 && startchannel == 0)
+                {
+                    memzero(dline, neighborhood_grid_width*topwidth*sizeof(half));
+                }
+                else
+                {
+#if defined(USE_MANUAL_DMA)
+                    dmacpyLineSrcStrideStart(top + top_channel_y*neighborhood_grid_width*topheight*topwidth + blockIdx_y*topwidth,
+                                             dline,
+                                             topwidth*neighborhood_grid_width*sizeof(half),
+                                             topwidth*sizeof(half),
+                                             topwidth*topheight*sizeof(half));
+#else
+                    for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+                    {
+                        for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+                        {
+                            half8 val = ((__global half8*)(top + ((top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth)))[blockIdx_x];
+                            ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] = val;
+                        }
+                        for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+                        {
+                            dline[top_channel_x*topwidth+blockIdx_x] =
+                                top[(top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth+blockIdx_x];
+                        }
+                    }
+#endif
+                }
+
+                if (y1+j-padding >= 0 && y1+j-padding < bottomheight && y2+j-padding >= 0 && y2+j-padding < bottomheight)
+                {
+                    crosscorrh(line0, line1, dline, topwidth, max_displacement, neighborhood_grid_radius,
+                               kernel_size, padding, bottomwidth, stride1, stride2, max_channels, subchannels);
+                }
+
+                if (j == kernel_size-1 && endchannel == bottomchannels)
+                {
+                    half8 scale = (half8){(half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems, (half)sumelems};
+                    for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+                    {
+                        for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+                        {
+                            ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] =
+                                ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] / scale;
+                        }
+                        for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+                        {
+                            dline[top_channel_x*topwidth+blockIdx_x] = dline[top_channel_x*topwidth+blockIdx_x]/(half)sumelems;
+                        }
+                    }
+                }
+
+#if defined(USE_MANUAL_DMA)
+                dmacpyLineDstStrideStart(dline,
+                                         top + top_channel_y*neighborhood_grid_width*topheight*topwidth + blockIdx_y*topwidth,
+                                         topwidth*neighborhood_grid_width*sizeof(half),
+                                         topwidth*sizeof(half),
+                                         topwidth*topheight*sizeof(half));
+#else
+                for (int top_channel_x = 0; top_channel_x < neighborhood_grid_width; top_channel_x++)
+                {
+                    for (int blockIdx_x = 0; blockIdx_x < topwidth/8; blockIdx_x++)
+                    {
+                        ((__global half8*)(top + ((top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth)))[blockIdx_x] =
+                            ((half8*)(dline + top_channel_x*topwidth))[blockIdx_x] + (half8) {0, 0, 0, 0, 0, 0, 0, 0};
+                    }
+                    for (int blockIdx_x = (topwidth/8)*8; blockIdx_x < topwidth; blockIdx_x++)
+                    {
+                        top[(top_channel_y*neighborhood_grid_width+top_channel_x)*topheight*topwidth + blockIdx_y*topwidth+blockIdx_x]
+                            = dline[top_channel_x*topwidth+blockIdx_x] + (half)0;
+                    }
+                }
+#endif
+            }
+        }
+    }
+}
diff --git a/inference-engine/src/vpu/custom_kernels/ctc.cl b/inference-engine/src/vpu/custom_kernels/ctc.cl

index 0197e70..609fc00 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/ctc.cl
+++ b/inference-engine/src/vpu/custom_kernels/ctc.cl
@@ -1,137 +1,173 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-__kernel void ctc_ocl(__global half* probabilities,
-                     __global half* output_sequences,
-                     int C_)
-{
-    size_t t = get_global_id(0);
-
-    __global half* probs = probabilities + t * C_;
-
-    int max_class_idx = 0;
-    half max_prob = probs[0];
-    ++probs;
-    for (int c = 1 ; c < C_ ; c++, ++probs)
-    {
-        if (*probs > max_prob)
-        {
-            max_prob  = *probs;
-            max_class_idx = c;
+__global half *find(__global const half *begin, __global const half *end, half value) {
+    while (begin != end) {
+        if (*begin == value)  {
+            return begin;
          }
+        ++begin;
      }
-    output_sequences[t] = (half)max_class_idx;
+    return end;
+}
+
+#define USE_MANUAL_DMA
+
+#ifdef USE_MANUAL_DMA
+
+__kernel void __dma_preload_CTCDecoder(__global half *probabilities,
+                                       __global half *sequence_indicators,
+                                       __global half *output_sequences,
+                                       int width,
+                                       int height,
+                                       int channels,
+                                       __local half *local_src,
+                                       __local half *local_dst)
+{
+    WorkGroupDmaCreateStrideTransaction(
+        probabilities, // src
+        local_src, // dst
+        width * sizeof(half), // src_width,
+        width * sizeof(half), // dst_width,
+        width * height * sizeof(half), // src_stride,
+        width * sizeof(half), // dst_stride,
+        width * height * channels * sizeof(half), // size
+        0);
  }
  
-__kernel void postProcess(__global half* input,
-                          __global half* output,
-                          __global half* seq_ind,
-                          int height,
-                          int width,
-                          int classes)
+__kernel void __dma_postwrite_CTCDecoder(__global half *probabilities,
+                                         __global half *sequence_indicators,
+                                         __global half *output_sequences,
+                                         int width,
+                                         int height,
+                                         int channels,
+                                         __local half *local_src,
+                                         __local half *local_dst)
  {
-    int wr_index = 0;
-    int rd_index = 0;
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        output_sequences, // dst
+        channels * sizeof(half), // src_width,
+        channels * sizeof(half), // dst_width,
+        channels * sizeof(half), // src_stride,
+        channels * sizeof(half), // dst_stride,
+        channels * height * sizeof(half), // size
+        0);
+}
  
-    half update_data;
-    int update_index;
+__kernel void CTCDecoder(__global half *probabilities,
+                         __global half *sequence_indicators,
+                         __global half *output_sequences,
+                         int width,
+                         int height,
+                         int channels,
+                         __local half *local_src,
+                         __local half *local_dst)
+{
+    const int T = channels;
+    const int B = height;
+    const int C = width;
  
-    for (int i = 0; i < classes; i++)
+    for (int i = 0; i < B*T; i++)
      {
-        output[i] = (half)(-1);
+        local_dst[i] = -1.h;
      }
  
-    for (int n = 0; n < height; ++n)
+    int output_index = 0;
+
+    for (int b = 0; b < B; ++b)
      {
-        int prev_class_id = -1;
-        for (int t = 0; t < classes; ++t)
+        __global const half *seq_ind = sequence_indicators + b*T;
+        const int seq_len = find(seq_ind + 1, seq_ind + T, 0.h) - seq_ind;
+        const int time = min(seq_len, T);
+
+        int prev_class_idx = -1;
+
+        for (int t = 0; t < time; ++t)
          {
-            int class_id = (int)input[rd_index++];
-            update_index = wr_index;
-            update_data = output[update_index];
+            __local const half *probs = local_src + b*C + t*C*B;
+            int max_class_idx = 0;
+            half max_prob = probs[0];
  
-            if ((class_id < (width - 1)) && !(1 && class_id == prev_class_id))
+            for (int c = 1; c < C; ++c)
              {
-                update_data = (half)class_id;
-                wr_index++;
-
+                const half prob = probs[c];
+                if (prob > max_prob)
+                {
+                    max_class_idx = c;
+                    max_prob = prob;
+                }
              }
-            output[update_index] = update_data;
-            prev_class_id = class_id;
  
-            if (seq_ind[t + 1] == 0 ) {
-                break;
+            if (max_class_idx < C-1 && max_class_idx != prev_class_idx)
+            {
+                local_dst[b*T + output_index] = (half)max_class_idx;
+                output_index++;
              }
+
+            prev_class_idx = max_class_idx;
          }
      }
  }
  
-__kernel void ctc_ref_fp16(__global half* probabilities, __global half* seq_ind, __global half* output_sequences, int C, int H, int W)
+#else
+
+__kernel void CTCDecoder(__global half *probabilities,
+                         __global half *sequence_indicators,
+                         __global half *output_sequences,
+                         int width,
+                         int height,
+                         int channels,
+                         __local half *local_src,
+                         __local half *local_dst)
  {
-    int T_ = C;
-    int N_ = H;
-    int C_ = W;
+    const int T = channels;
+    const int B = height;
+    const int C = width;
  
-    // Fill output_sequences with -1
-    for (int i = 0; i < T_; i++)
+    for (int i = 0; i < B*T; i++)
      {
-        output_sequences[i] = (half)(-1.0);
+        output_sequences[i] = -1.h;
      }
+
      int output_index = 0;
  
-    // Caffe impl
-    for(int n = 0; n < N_; ++n)
+    for (int b = 0; b < B; ++b)
      {
+        __global const half *seq_ind = sequence_indicators + b*T;
+        const int seq_len = find(seq_ind + 1, seq_ind + T, 0.h) - seq_ind;
+        const int time = min(seq_len, T);
+
          int prev_class_idx = -1;
  
-        for (int t = 0; t < T_; ++t)
+        for (int t = 0; t < time; ++t)
          {
-            // get maximum probability and its index
+            __global const half *probs = probabilities + b*C + t*C*B;
              int max_class_idx = 0;
-            __global half* probs;
-            half max_prob;
-
-            probs = probabilities + t*C_;
-            max_prob = probs[0];
-            ++probs;
+            half max_prob = probs[0];
  
-            for (int c = 1; c < C_; ++c, ++probs)
+            for (int c = 1; c < C; ++c)
              {
-                if (*probs > max_prob)
+                const half prob = probs[c];
+                if (prob > max_prob)
                  {
                      max_class_idx = c;
-                    max_prob = *probs;
+                    max_prob = prob;
                  }
              }
  
-            //if (max_class_idx != blank_index_
-            //        && !(merge_repeated_&& max_class_idx == prev_class_idx))
-            if (max_class_idx < C_-1 && !(1 && max_class_idx == prev_class_idx))
+            if (max_class_idx < C-1 && max_class_idx != prev_class_idx)
              {
-                output_sequences[output_index] = (half)max_class_idx;
+                output_sequences[b*T + output_index] = (half)max_class_idx;
                  output_index++;
              }
  
              prev_class_idx = max_class_idx;
-
-            // Assume sequence_indicators is always 1
-            if (seq_ind[t + 1] == 0)
-            {
-                break;
-            }
          }
      }
  }
+
+#endif
diff --git a/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml b/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml

index 78ade2b..929be75 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml
+++ b/inference-engine/src/vpu/custom_kernels/customLayerBindings.xml
@@ -1,177 +1,188 @@
-<!-- Should be enabled if support to detect required kernel based on tensor format is added -->
-<!-- <CustomLayer name="ReorgYolo" type="MVCL" version="1">
-    <Kernel entry="reorg">
+<CustomLayer name="ReorgYolo" type="MVCL" version="1">
+    <Kernel entry="reorg_hwc_naive">
          <Source filename="reorg_hwc.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BYXF"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BYXF"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="stride" type="int" source="stride"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="0"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="0"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="F,1,1" local="stride*stride,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"    type="input"  port-index="0"                format="BYXF"/>
-        <Tensor arg-name="out"    type="output" port-index="0"                format="BYXF"/>
-        <Scalar arg-name="w"      type="int"    port-index="0" source="I.X"                />
-        <Scalar arg-name="h"      type="int"    port-index="0" source="I.Y"                />
-        <Scalar arg-name="stride" type="int"                   source="stride"             />
-    </Parameters>
-    <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
-</CustomLayer> -->
+</CustomLayer>
  
  <CustomLayer name="ReorgYolo" type="MVCL" version="1">
-    <Kernel entry="reorg_NCHW">
+    <Where stride="2"/>
+    <Kernel entry="reorg_chw">
          <Source filename="reorg_chw.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="stride" type="int" source="stride"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*2*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*2*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="Y*F/(stride*stride),stride*stride,1" local="1,stride,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"    type="input"  port-index="0"                 format="BFYX"/>
-        <Tensor arg-name="out"    type="output" port-index="0"                 format="BFYX"/>
-        <Scalar arg-name="W"      type="int"    port-index="0" source="I.X"                 />
-        <Scalar arg-name="H"      type="int"    port-index="0" source="I.Y"                 />
-        <Scalar arg-name="stride" type="int"                   source="stride"              />
-    </Parameters>
-    <WorkSizes dim="input,0" global="((Y+7)/8)*8,F,1" local="8,1,1"/>
  </CustomLayer>
  
-<!-- The latest version of the code is not backward compatible with Yolo-v2, it is to be checked by benchmark-->
-<!-- Add work group config understand parameter from IR -->
-<!-- <WorkSizes global="Y*X,num,1" local="X,num,1" dim="input,0"/> -->
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
-  <Where do_softmax="1" num="5"/>
-  <Kernel entry="region_ocl">
-        <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+    <Where do_softmax="1"/>
+    <Kernel entry="region_chw">
+        <Source filename="region.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="classes" type="int" source="classes"/>
+            <Scalar arg-name="coords" type="int" source="coords"/>
+            <Scalar arg-name="num" type="int" source="num"/>
+            <Scalar arg-name="maskSize" type="int" source="3"/>
+            <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+        </Parameters>
+        <WorkSizes global="((X+7)/8)*8*Y,num,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
-        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
-        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
-        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
-        <Scalar arg-name="classes"     type="int"                   source="classes"            />
-        <Scalar arg-name="coords"      type="int"                   source="coords"             />
-        <Scalar arg-name="num"         type="int"                   source="num"                />
-        <Scalar arg-name="maskSize"    type="int"                   source="3"           />
-        <Scalar arg-name="doSoftmax"   type="int"                   source="1"          />
-    </Parameters>
-    <WorkSizes global="Y*X,5,1" local="X,5,1" dim="input,0"/>
  </CustomLayer>
  
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
-    <Where mask="0,1,2" do_softmax="0"/>
-    <Kernel entry="region_ocl">
-        <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+    <Where do_softmax="0" mask="0,1,2"/>
+    <Kernel entry="region_chw">
+        <Source filename="region.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="classes" type="int" source="classes"/>
+            <Scalar arg-name="coords" type="int" source="coords"/>
+            <Scalar arg-name="num" type="int" source="num"/>
+            <Scalar arg-name="maskSize" type="int" source="3"/>
+            <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+        </Parameters>
+        <WorkSizes global="((X+7)/8)*8*Y,3,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
-        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
-        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
-        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
-        <Scalar arg-name="classes"     type="int"                   source="classes"            />
-        <Scalar arg-name="coords"      type="int"                   source="coords"             />
-        <Scalar arg-name="num"         type="int"                   source="num"                />
-        <Scalar arg-name="maskSize"    type="int"                   source="3"           />
-        <Scalar arg-name="doSoftmax"   type="int"                   source="0"          />
-    </Parameters>
-    <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
  </CustomLayer>
  
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
-    <Where mask="3,4,5" do_softmax="0"/>
-    <Kernel entry="region_ocl">
-        <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+    <Where do_softmax="1"/>
+    <Kernel entry="region_hwc">
+        <Source filename="region.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BYXF"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BYXF"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="classes" type="int" source="classes"/>
+            <Scalar arg-name="coords" type="int" source="coords"/>
+            <Scalar arg-name="num" type="int" source="num"/>
+            <Scalar arg-name="maskSize" type="int" source="3"/>
+            <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+        </Parameters>
+        <WorkSizes global="((X+7)/8)*8*Y,num,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
-        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
-        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
-        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
-        <Scalar arg-name="classes"     type="int"                   source="classes"            />
-        <Scalar arg-name="coords"      type="int"                   source="coords"             />
-        <Scalar arg-name="num"         type="int"                   source="num"                />
-        <Scalar arg-name="maskSize"    type="int"                   source="3"           />
-        <Scalar arg-name="doSoftmax"   type="int"                   source="0"          />
-    </Parameters>
-    <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
  </CustomLayer>
  
-<CustomLayer name="RegionYolo" type="MVCL" version="1" max-shaves="3">
-    <Where mask="6,7,8" do_softmax="0"/>
-    <Kernel entry="region_ocl">
-        <Source filename="region_chw.bin"/>
+<CustomLayer name="RegionYolo" type="MVCL" version="1">
+    <Where do_softmax="0" mask="0,1,2"/>
+    <Kernel entry="region_hwc">
+        <Source filename="region.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BYXF"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BYXF"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*(coords+1+classes)*2"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="classes" type="int" source="classes"/>
+            <Scalar arg-name="coords" type="int" source="coords"/>
+            <Scalar arg-name="num" type="int" source="num"/>
+            <Scalar arg-name="maskSize" type="int" source="3"/>
+            <Scalar arg-name="doSoftmax" type="int" source="do_softmax"/>
+        </Parameters>
+        <WorkSizes global="((X+7)/8)*8*Y,3,1" local="((X+7)/8)*8,1,1" dim="input,0"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"    type="input"  port-index="0"                format="BFYX"/>
-        <Tensor arg-name="dst_data"    type="output" port-index="0"                format="ANY"/>
-        <Scalar arg-name="W"           type="int"    port-index="0" source="I.X"                />
-        <Scalar arg-name="H"           type="int"    port-index="0" source="I.Y"                />
-        <Scalar arg-name="classes"     type="int"                   source="classes"            />
-        <Scalar arg-name="coords"      type="int"                   source="coords"             />
-        <Scalar arg-name="num"         type="int"                   source="num"                />
-        <Scalar arg-name="maskSize"    type="int"                   source="3"           />
-        <Scalar arg-name="doSoftmax"   type="int"                   source="0"          />
-    </Parameters>
-    <WorkSizes global="Y*X,3,1" local="X,3,1" dim="input,0"/>
  </CustomLayer>
  
  <!-- Pixel-wise kernel binding, local work group config is per line in the input tensor  -->
  <CustomLayer name="GRN" type="MVCL" version="1">
      <Kernel entry="grn_NCHW">
          <Source filename="grn.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*F*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="bias" type="float" source="bias"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data" type="input"         port-index="0"                  format="BFYX"/>
-        <Tensor arg-name="dst_data" type="output"        port-index="0"                  format="BFYX"/>
-        <Data   arg-name="src"      type="local_data"                      dim="input,0" size="X*F*2" />
-        <Data   arg-name="dst"      type="local_data"                      dim="input,0" size="X*F*2" />
-        <Scalar arg-name="C"        type="int"           port-index="0"    source="I.F"               />
-        <Scalar arg-name="bias"     type="float"                           source="bias"              />
-    </Parameters>
-    <WorkSizes dim="input,0" global="X,Y,1" local="X,1,1"/>
  </CustomLayer>
  
  <!-- Two stage layer binding, first kernel computes mean and variance, the second one normalizes input tensor-->
-<CustomLayer name="MVN" stage="0" type="MVCL" version="1">
-    <Kernel entry="reduction_mean">
+<CustomLayer name="MVN" type="MVCL" version="1">
+    <Kernel entry="reduction_mean" stage="0">
          <Source filename="mvn.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="mean" type="output_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
+            <Tensor arg-name="variance" type="output_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="across_channels" type="int" source="across_channels"/>
+            <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"                type="input"         port-index="0"                          format="BFYX"/>
-        <Tensor arg-name="mean"               type="output_buffer" port-index="0" dim="output,0"           size="Y*F*4" />
-        <Tensor arg-name="variance"           type="output_buffer" port-index="1" dim="output,0"           size="Y*F*4" />
-        <Scalar arg-name="W"                  type="int"           port-index="0" source="I.X"                          />
-        <Scalar arg-name="H"                  type="int"           port-index="0" source="I.Y"                          />
-        <Scalar arg-name="across_channels"    type="int"                          source="across_channels"              />
-        <Data   arg-name="src_line"           type="local_data"                   dim="input,0"            size="X*2"   />
-    </Parameters>
-    <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
-</CustomLayer>
-<CustomLayer name="MVN" stage="1" type="MVCL" version="1">
-    <Kernel entry="mvn_scale">
+    <Kernel entry="mvn_scale" stage="1">
          <Source filename="mvn.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Tensor arg-name="mean_part" type="input_buffer" port-index="0" dim="output,0" size="Y*F*4"/>
+            <Tensor arg-name="power_mean" type="input_buffer" port-index="1" dim="output,0" size="Y*F*4"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H1" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="across_channels" type="int" source="across_channels"/>
+            <Scalar arg-name="normalize_variance" type="int" source="normalize_variance"/>
+            <Scalar arg-name="nparts" type="int" port-index="0" source="I.Y"/>
+            <Data arg-name="src_line" type="local_data" dim="input,0" size="X*2"/>
+            <Data arg-name="dst_line" type="local_data" dim="input,0" size="X*2"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"           type="input"        port-index="0"                            format="BFYX"/>
-        <Tensor arg-name="dst"           type="output"       port-index="0"                            format="BFYX"/>
-        <Tensor arg-name="mean_part"          type="input_buffer" port-index="0" dim="output,0"             size="Y*F*4" />
-        <Tensor arg-name="power_mean"         type="input_buffer" port-index="1" dim="output,0"             size="Y*F*4" />
-        <Scalar arg-name="W"                  type="int"          port-index="0" source="I.X"                            />
-        <Scalar arg-name="H1"                 type="int"          port-index="0" source="I.Y"                            />
-        <Scalar arg-name="across_channels"    type="int"                         source="across_channels"                />
-        <Scalar arg-name="normalize_variance" type="int"                         source="normalize_variance"             />
-        <Scalar arg-name="nparts"             type="int"          port-index="0" source="I.Y"                            />
-        <Data   arg-name="src_line"           type="local_data"                  dim="input,0"              size="X*2"   />
-        <Data   arg-name="dst_line"           type="local_data"                  dim="input,0"              size="X*2"   />
-    </Parameters>
-    <WorkSizes dim="output,0" global="1,Y,F" local="1,1,1"/>
  </CustomLayer>
  
  <!-- Single work group kernel for not embarrassingly-parallel use-case -->
  <CustomLayer name="CTCGreedyDecoder" type="MVCL" version="1" max-shaves="1">
-    <Kernel entry="ctc_ref_fp16">
+    <Kernel entry="CTCDecoder">
          <Source filename="ctc.bin"/>
+        <Parameters>
+            <Tensor arg-name="probabilities" type="input" port-index="0" format="FYX"/>
+            <Tensor arg-name="sequence_indicators" type="input" port-index="1" format="BF"/>
+            <Tensor arg-name="output_sequences" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="width" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="height" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="F*Y*X*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="F*Y*2"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="1,1,1" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="probabilities"    type="input"  port-index="0" format="FYX"  />
-        <Tensor arg-name="seq_ind"          type="input"  port-index="1" format="ANY"  />
-        <Tensor arg-name="output_sequences" type="output" port-index="0" format="BFYX" />
-        <Scalar arg-name="H"                type="int"    port-index="0" source="I.Y"  />
-        <Scalar arg-name="W"                type="int"    port-index="0" source="I.X"  />
-        <Scalar arg-name="C"                type="int"    port-index="0" source="I.F"  />
-    </Parameters>
-    <WorkSizes dim="output,0" global="1,1,1" local="1,1,1"/>
  </CustomLayer>
  
  <CustomLayer name="ShuffleChannel" type="MVCL" version="1">
@@ -179,16 +190,16 @@
      <!-- <Where group="2"/> -->
      <Kernel entry="ShuffleChannel">
          <Source filename="shuffle_channels.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="G" type="int" source="group"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data" type="input"  port-index="0"               format="BFYX"/>
-        <Tensor arg-name="dst_data" type="output" port-index="0"               format="BFYX"/>
-        <Scalar arg-name="C"        type="int"    port-index="0" source="I.F"               />
-        <Scalar arg-name="H"        type="int"    port-index="0" source="I.Y"               />
-        <Scalar arg-name="W"        type="int"    port-index="0" source="I.X"               />
-        <Scalar arg-name="G"        type="int"                   source="group"             />
-    </Parameters>
-    <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
  </CustomLayer>
  
  <!-- Reference version of generic quantize layer, should be changed to FakeQuantize-->
@@ -196,25 +207,25 @@
      <!-- <Where levels="2"/> -->
      <Kernel entry="quantize">
          <Source filename="quantize.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="input_low" type="input" port-index="1" format="ANY"/>
+            <Tensor arg-name="input_high" type="input" port-index="2" format="ANY"/>
+            <Tensor arg-name="output_low" type="input" port-index="3" format="ANY"/>
+            <Tensor arg-name="output_high" type="input" port-index="4" format="ANY"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="levels" type="int" source="levels"/>
+            <Scalar arg-name="input_low_size" type="int" port-index="1" source="I.F"/>
+            <Scalar arg-name="input_high_size" type="int" port-index="2" source="I.F"/>
+            <Scalar arg-name="output_low_size" type="int" port-index="3" source="I.F"/>
+            <Scalar arg-name="output_high_size" type="int" port-index="4" source="I.F"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*F*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="1,Y,1" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"              type="input"  port-index="0"                  format="BFYX"/>
-        <Tensor arg-name="input_low"        type="input"  port-index="1"                  format="ANY" />
-        <Tensor arg-name="input_high"       type="input"  port-index="2"                  format="ANY" />
-        <Tensor arg-name="output_low"       type="input"  port-index="3"                  format="ANY" />
-        <Tensor arg-name="output_high"      type="input"  port-index="4"                  format="ANY" />
-        <Tensor arg-name="dst"              type="output" port-index="0"                  format="BFYX"/>
-        <Scalar arg-name="levels"           type="int"                   source="levels"               />
-        <Scalar arg-name="input_low_size"   type="int"    port-index="1" source="I.F"                  />
-        <Scalar arg-name="input_high_size"  type="int"    port-index="2" source="I.F"                  />
-        <Scalar arg-name="output_low_size"  type="int"    port-index="3" source="I.F"                  />
-        <Scalar arg-name="output_high_size" type="int"    port-index="4" source="I.F"                  />
-        <Scalar arg-name="W"                type="int"    port-index="0" source="I.X"                  />
-        <Scalar arg-name="H"                type="int"    port-index="0" source="I.Y"                  />
-        <Data   arg-name="src_local"           type="local_data"                  dim="input,0"              size="X*Y*2"   />
-        <Data   arg-name="dst_local"           type="local_data"                  dim="input,0"              size="X*Y*2"   />
-    </Parameters>
-    <WorkSizes dim="input,0" global="F,1,1" local="1,1,1"/>
  </CustomLayer>
  
  <!-- Reference version of generic quantize layer, should be changed to FakeQuantize-->
@@ -222,110 +233,110 @@
      <Where levels="256"/>
      <Kernel entry="quantize">
          <Source filename="binary_layers.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="input_low" type="input" port-index="1" format="BFYX"/>
+            <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX"/>
+            <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX"/>
+            <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="levels" type="int" source="levels"/>
+            <Scalar arg-name="input_low_size" type="int" source="input_low_size"/>
+            <Scalar arg-name="input_high_size" type="int" source="input_high_size"/>
+            <Scalar arg-name="output_low_size" type="int" source="output_low_size"/>
+            <Scalar arg-name="output_high_size" type="int" source="output_high_size"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2"/>
+            <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"              type="input"      port-index="0" format="BFYX"             />
-        <Tensor arg-name="input_low"        type="input"      port-index="1" format="BFYX"             />
-        <Tensor arg-name="input_high"       type="input"      port-index="2" format="BFYX"             />
-        <Tensor arg-name="output_low"       type="input"      port-index="3" format="BFYX"             />
-        <Tensor arg-name="output_high"      type="input"      port-index="4" format="BFYX"             />
-        <Tensor arg-name="dst"              type="output"     port-index="0" format="BFYX"             />
-        <Scalar arg-name="levels"           type="int"        port-index="0" source="levels"           />
-        <Scalar arg-name="input_low_size"   type="int"        port-index="0" source="input_low_size"   />
-        <Scalar arg-name="input_high_size"  type="int"        port-index="0" source="input_high_size"  />
-        <Scalar arg-name="output_low_size"  type="int"        port-index="0" source="output_low_size"  />
-        <Scalar arg-name="output_high_size" type="int"        port-index="0" source="output_high_size" />
-        <Scalar arg-name="W"                type="int"        port-index="0" source="I.X"              />
-        <Scalar arg-name="H"                type="int"        port-index="0" source="I.Y"              />
-        <Data   arg-name="src_local"        type="local_data" dim="input,0"  size="X*Y*2"              />
-        <Data   arg-name="dst_local"        type="local_data" dim="input,0"  size="X*Y*2"              />
-    </Parameters>
-    <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
  </CustomLayer>
  
  <CustomLayer name="QuantizeTemporaryType" type="MVCL" version="1">
      <Where levels="2"/>
      <Kernel entry="binarization">
          <Source filename="binary_layers.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="input_low_high" type="input" port-index="1" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="switch_out" type="int" source="switch_out"/>
+            <Scalar arg-name="input_low_high_size" type="int" source="input_low_size"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="H" type="int" port-index="0" source="I.Y"/>
+            <Tensor arg-name="input_high" type="input" port-index="2" format="BFYX"/>
+            <Tensor arg-name="output_low" type="input" port-index="3" format="BFYX"/>
+            <Tensor arg-name="output_high" type="input" port-index="4" format="BFYX"/>
+            <Scalar arg-name="input_high_size" type="int" source="input_high_size"/>
+            <Scalar arg-name="output_low_size" type="int" source="output_low_size"/>
+            <Scalar arg-name="output_high_size" type="int" source="output_high_size"/>
+            <Data arg-name="src_local" type="local_data" dim="input,0" size="X*Y*2"/>
+            <Data arg-name="dst_local" type="local_data" dim="input,0" size="X*Y*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src"                 type="input"      port-index="0" format="BFYX"             />
-        <Tensor arg-name="input_low_high"      type="input"      port-index="1" format="BFYX"             />
-        <Tensor arg-name="dst"                 type="output"     port-index="0" format="BFYX"             />
-        <Scalar arg-name="switch_out"          type="int"        port-index="0" source="switch_out"       />
-        <Scalar arg-name="input_low_high_size" type="int"        port-index="0" source="input_low_size"   />
-        <Scalar arg-name="W"                   type="int"        port-index="0" source="I.X"              />
-        <Scalar arg-name="H"                   type="int"        port-index="0" source="I.Y"              />
-        <Tensor arg-name="input_high"          type="input"      port-index="2" format="BFYX"             />
-        <Tensor arg-name="output_low"          type="input"      port-index="3" format="BFYX"             />
-        <Tensor arg-name="output_high"         type="input"      port-index="4" format="BFYX"             />
-        <Scalar arg-name="input_high_size"     type="int"        port-index="0" source="input_high_size"  />
-        <Scalar arg-name="output_low_size"     type="int"        port-index="0" source="output_low_size"  />
-        <Scalar arg-name="output_high_size"    type="int"        port-index="0" source="output_high_size" />
-        <Data   arg-name="src_local"           type="local_data" dim="input,0"  size="X*Y*2"              />
-        <Data   arg-name="dst_local"           type="local_data" dim="input,0"  size="X*Y*2"              />
-    </Parameters>
-    <WorkSizes dim="input,0" global="1,1,F" local="1,1,1"/>
  </CustomLayer>
  
  <CustomLayer name="BinaryConvolution" type="MVCL" version="1">
-  <Where kernel="3,3" />
+    <Where kernel="3,3"/>
      <Kernel entry="binary_convolution">
          <Source filename="binary_convolution3x3.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+            <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+            <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+            <Scalar arg-name="GC" type="int" source="group"/>
+            <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+            <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+            <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+            <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+            <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+            <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+            <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+
+            <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*3*2"/>
+            <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"      type="input"   port-index="0"                      format="BFYX"/>
-        <Data   arg-name="weights_data"  type="data"                     source="weights"   format="ANY" />
-        <Tensor arg-name="dst_data"      type="output"  port-index="0"                      format="BFYX"/>
-        <Scalar arg-name="pad_value"     type="float"                    source="pad_value"              />
-        <Scalar arg-name="IW"            type="int"     port-index="0"   source="I.X"                    />
-        <Scalar arg-name="IH"            type="int"     port-index="0"   source="I.Y"                    />
-        <Scalar arg-name="IC"            type="int"     port-index="0"   source="I.F"                    />
-        <Scalar arg-name="DW"            type="int"     port-index="0"   source="dilations"              />
-        <Scalar arg-name="DH"            type="int"     port-index="1"   source="dilations"              />
-        <Scalar arg-name="GC"            type="int"                      source="group"                  />
-        <Scalar arg-name="KW"            type="int"     port-index="0"   source="kernel"                 />
-        <Scalar arg-name="KH"            type="int"     port-index="1"   source="kernel"                 />
-        <Scalar arg-name="PW"            type="int"     port-index="0"   source="pads_begin"             />
-        <Scalar arg-name="PH"            type="int"     port-index="1"   source="pads_begin"             />
-        <Scalar arg-name="SW"            type="int"     port-index="0"   source="strides"                />
-        <Scalar arg-name="SH"            type="int"     port-index="1"   source="strides"                />
-        <Scalar arg-name="OW"            type="int"     port-index="0"   source="O.X"                    />
-
-        <Data   arg-name="src_local"     type="local_data" dim="input,0"  size="X*F*3*2"/>
-        <Data   arg-name="dst_local"     type="local_data" dim="output,0" size="X*2"/>
-    </Parameters>
-    <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
  </CustomLayer>
  
  <CustomLayer name="BinaryConvolution" type="MVCL" version="1">
-  <Where kernel="1,1" />
+    <Where kernel="1,1"/>
      <Kernel entry="binary_convolution">
          <Source filename="binary_convolution1x1.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+            <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+            <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+            <Scalar arg-name="GC" type="int" source="group"/>
+            <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+            <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+            <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+            <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+            <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+            <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+            <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+
+            <Data arg-name="src_local" type="local_data" dim="input,0" size="X*F*2"/>
+            <Data arg-name="dst_local" type="local_data" dim="output,0" size="X*2"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"      type="input"   port-index="0"                      format="BFYX"/>
-        <Data   arg-name="weights_data"  type="data"                     source="weights"   format="ANY" />
-        <Tensor arg-name="dst_data"      type="output"  port-index="0"                      format="BFYX"/>
-        <Scalar arg-name="pad_value"     type="float"                    source="pad_value"              />
-        <Scalar arg-name="IW"            type="int"     port-index="0"   source="I.X"                    />
-        <Scalar arg-name="IH"            type="int"     port-index="0"   source="I.Y"                    />
-        <Scalar arg-name="IC"            type="int"     port-index="0"   source="I.F"                    />
-        <Scalar arg-name="DW"            type="int"     port-index="0"   source="dilations"              />
-        <Scalar arg-name="DH"            type="int"     port-index="1"   source="dilations"              />
-        <Scalar arg-name="GC"            type="int"                      source="group"                  />
-        <Scalar arg-name="KW"            type="int"     port-index="0"   source="kernel"                 />
-        <Scalar arg-name="KH"            type="int"     port-index="1"   source="kernel"                 />
-        <Scalar arg-name="PW"            type="int"     port-index="0"   source="pads_begin"             />
-        <Scalar arg-name="PH"            type="int"     port-index="1"   source="pads_begin"             />
-        <Scalar arg-name="SW"            type="int"     port-index="0"   source="strides"                />
-        <Scalar arg-name="SH"            type="int"     port-index="1"   source="strides"                />
-        <Scalar arg-name="OW"            type="int"     port-index="0"   source="O.X"                    />
-
-        <Data   arg-name="src_local"     type="local_data" dim="input,0"  size="X*F*2"/>
-        <Data   arg-name="dst_local"     type="local_data" dim="output,0" size="X*2"/>
-    </Parameters>
-    <WorkSizes dim="output,0" global="Y,F,1" local="1,1,1"/>
  </CustomLayer>
  
  <!-- Reference version of generic quantize binary convolution -->
@@ -333,183 +344,234 @@
  <CustomLayer name="BinaryConvolution" type="MVCL" version="1">
      <Kernel entry="binary_convolution">
          <Source filename="binary_layers.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Data arg-name="weights_data" type="data" source="weights" format="ANY"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="pad_value" type="float" source="pad_value"/>
+            <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="DW" type="int" port-index="0" source="dilations"/>
+            <Scalar arg-name="DH" type="int" port-index="1" source="dilations"/>
+            <Scalar arg-name="GC" type="int" source="group"/>
+            <Scalar arg-name="KW" type="int" port-index="0" source="kernel"/>
+            <Scalar arg-name="KH" type="int" port-index="1" source="kernel"/>
+            <Scalar arg-name="PW" type="int" port-index="0" source="pads_begin"/>
+            <Scalar arg-name="PH" type="int" port-index="1" source="pads_begin"/>
+            <Scalar arg-name="SW" type="int" port-index="0" source="strides"/>
+            <Scalar arg-name="SH" type="int" port-index="1" source="strides"/>
+        </Parameters>
+        <WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
      </Kernel>
-    <Parameters>
-        <Tensor arg-name="src_data"      type="input"   port-index="0"                      format="BFYX"/>
-        <Data   arg-name="weights_data"  type="data"                     source="weights"   format="ANY" />
-        <Tensor arg-name="dst_data"      type="output"  port-index="0"                      format="BFYX"/>
-        <Scalar arg-name="pad_value"     type="float"                    source="pad_value"              />
-        <Scalar arg-name="IW"            type="int"     port-index="0"   source="I.X"                    />
-        <Scalar arg-name="IH"            type="int"     port-index="0"   source="I.Y"                    />
-        <Scalar arg-name="IC"            type="int"     port-index="0"   source="I.F"                    />
-        <Scalar arg-name="DW"            type="int"     port-index="0"   source="dilations"              />
-        <Scalar arg-name="DH"            type="int"     port-index="1"   source="dilations"              />
-        <Scalar arg-name="GC"            type="int"                      source="group"                  />
-        <Scalar arg-name="KW"            type="int"     port-index="0"   source="kernel"                 />
-        <Scalar arg-name="KH"            type="int"     port-index="1"   source="kernel"                 />
-        <Scalar arg-name="PW"            type="int"     port-index="0"   source="pads_begin"             />
-        <Scalar arg-name="PH"            type="int"     port-index="1"   source="pads_begin"             />
-        <Scalar arg-name="SW"            type="int"     port-index="0"   source="strides"                />
-        <Scalar arg-name="SH"            type="int"     port-index="1"   source="strides"                />
-    </Parameters>
-    <WorkSizes dim="output,0" global="X,Y,F" local="1,1,1"/>
  </CustomLayer>
  
  <CustomLayer name="Resample" type="MVCL" version="1">
-   <Where antialias="0" />
- <Kernel entry="resample_nearest">
-   <Source filename="resample_nn.bin" />
- </Kernel>
- <Parameters>
-   <Tensor arg-name="src"      type="input"  port-index="0" format="BFYX"   />
-   <Tensor arg-name="dst"      type="output" port-index="0" format="BFYX"   />
-   <Scalar arg-name="iw"       type="int"    port-index="0" source="I.X"    />
-   <Scalar arg-name="ih"       type="int"    port-index="0" source="I.Y"    />
-   <Scalar arg-name="factor"   type="float"  port-index="0" source="factor" />
-   <Scalar arg-name="ow"       type="int"    port-index="0" source="O.X"    />
-   <Scalar arg-name="oh"       type="int"    port-index="0" source="O.Y"    />
-   <Scalar arg-name="channels" type="int"    port-index="0" source="I.F"    />
- </Parameters>
- <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    <Where antialias="0"/>
+    <Kernel entry="resample_nearest">
+        <Source filename="resample_nn.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*ceil(1/factor)*F*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="output,0" size="X*F*2"/>
+            <Scalar arg-name="iw" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="ih" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="factor" type="float" source="factor"/>
+            <Scalar arg-name="ow" type="int" port-index="0" source="O.X"/>
+            <Scalar arg-name="oh" type="int" port-index="0" source="O.Y"/>
+            <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+        </Parameters>
+        <WorkSizes global="1,Y,1" local="1,1,1" dim="output,0"/>
+    </Kernel>
  </CustomLayer>
  
  <CustomLayer name="Resample" type="MVCL" version="1">
-   <Where antialias="1"/>
-<Kernel entry="resample_with_antialias">
-  <Source filename="resample_with_antialias.bin" />
-</Kernel>
-<Parameters>
-  <Tensor arg-name="src"      type="input"  port-index="0" format="BFYX"/>
-  <Tensor arg-name="dst"      type="output" port-index="0" format="BFYX"/>
-  <Scalar arg-name="iw"       type="int"    port-index="0" source="I.X" />
-  <Scalar arg-name="ih"       type="int"    port-index="0" source="I.Y" />
-  <Scalar arg-name="factor"   type="float"  port-index="0" source="factor"  />
-  <Scalar arg-name="ow"       type="int"    port-index="0" source="O.X" />
-  <Scalar arg-name="oh"       type="int"    port-index="0" source="O.Y" />
-  <Scalar arg-name="channels" type="int"    port-index="0" source="I.F" />
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    <Where antialias="1"/>
+    <Kernel entry="resample_with_antialias">
+        <Source filename="resample_with_antialias.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*5*F*2"/>
+            <Data arg-name="local_dst" type="local_data" dim="output,0" size="X*F*2"/>
+            <Scalar arg-name="iw" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="ih" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="factor" type="float" source="factor"/>
+            <Scalar arg-name="ow" type="int" port-index="0" source="O.X"/>
+            <Scalar arg-name="oh" type="int" port-index="0" source="O.Y"/>
+            <Scalar arg-name="channels" type="int" port-index="0" source="I.F"/>
+        </Parameters>
+        <WorkSizes global="1,round(Y*factor),F" local="1,1,F" dim="input,0"/>
+    </Kernel>
  </CustomLayer>
  
  <CustomLayer name="Convolution" type="MVCL" version="1">
-   <Where isHWC="0"/>
-<Kernel entry="Convolution1x1_NCHW">
-  <Source filename="convolution1x1.bin" />
-</Kernel>
-<Parameters>
-  <Tensor arg-name="in"    type="input"  port-index="0"   format="BFYX" />
-  <Tensor arg-name="out"   type="output" port-index="0"   format="BFYX" />
-  <Data   arg-name="w"     type="data"   source="weights" format="ANY"/>
-  <Scalar arg-name="IW"    type="int"    port-index="0"   source="I.X"  />
-  <Scalar arg-name="IH"    type="int"    port-index="0"   source="I.Y"  />
-  <Scalar arg-name="IC"    type="int"    port-index="0"   source="I.F"  />
-  <Scalar arg-name="OW"    type="int"    port-index="0"   source="O.X"  />
-  <Scalar arg-name="OH"    type="int"    port-index="0"   source="O.Y"  />
-  <Scalar arg-name="OC"    type="int"    port-index="0"   source="O.F"  />
-
-  <Scalar arg-name="stride-x" type="int"    port-index="0"   source="stride-x"/>
-  <Scalar arg-name="stride-y" type="int"    port-index="0"   source="stride-y"/>
-  <Scalar arg-name="pad-x"    type="int"    port-index="0"   source="pad-x"   />
-  <Scalar arg-name="pad-y"    type="int"    port-index="0"   source="pad-y"   />
-  <Scalar arg-name="kernel-x" type="int"    port-index="0"   source="kernel-x"/>
-  <Scalar arg-name="kernel-y" type="int"    port-index="0"   source="kernel-y"/>
-  <Scalar arg-name="output"   type="int"    port-index="0"   source="output"  />
-  <Scalar arg-name="group"    type="int"    port-index="0"   source="group"   />
-
-  <Data   arg-name="in_local"  type="local_data" dim="input,0"  size="X*F*2"/>
-  <Data   arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
-
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    <Where kernel="1,1" dilation="1,1"/>
+    <Kernel entry="Convolution1x1_NCHW">
+       <Source filename="convolution1x1.bin"/>
+       <Parameters>
+           <Tensor arg-name="in" type="input" port-index="0" format="BFYX"/>
+           <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+           <Data arg-name="w" type="data" source="weights" format="ANY"/>
+           <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+           <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+           <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+           <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+           <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+           <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+
+           <Scalar arg-name="stride-x" type="int" port-index="0" source="stride"/>
+           <Scalar arg-name="stride-y" type="int" port-index="1" source="stride"/>
+           <Scalar arg-name="pad-x" type="int" port-index="0" source="pads_begin"/>
+           <Scalar arg-name="pad-y" type="int" port-index="1" source="pads_begin"/>
+           <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel"/>
+           <Scalar arg-name="kernel-y" type="int" port-index="1" source="kernel"/>
+           <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+           <Scalar arg-name="group" type="int" port-index="0" source="group"/>
+
+           <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
+           <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
+
+       </Parameters>
+       <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+   </Kernel>
  </CustomLayer>
  
  <CustomLayer name="Convolution" type="MVCL" version="1">
-   <Where isHWC="1"/>
-<Kernel entry="Convolution1x1_NHWC">
-  <Source filename="convolution1x1.bin" />
-</Kernel>
-<Parameters>
-  <Tensor arg-name="in"    type="input"  port-index="0"   format="BYXF" />
-  <Tensor arg-name="out"   type="output" port-index="0"   format="BFYX" />
-  <Data   arg-name="w"     type="data"   source="weights" format="ANY"/>
-  <Scalar arg-name="IW"    type="int"    port-index="0"   source="I.X"  />
-  <Scalar arg-name="IH"    type="int"    port-index="0"   source="I.Y"  />
-  <Scalar arg-name="IC"    type="int"    port-index="0"   source="I.F"  />
-  <Scalar arg-name="OW"    type="int"    port-index="0"   source="O.X"  />
-  <Scalar arg-name="OH"    type="int"    port-index="0"   source="O.Y"  />
-  <Scalar arg-name="OC"    type="int"    port-index="0"   source="O.F"  />
-
-  <Scalar arg-name="stride-x" type="int"    port-index="0"   source="stride-x"/>
-  <Scalar arg-name="stride-y" type="int"    port-index="0"   source="stride-y"/>
-  <Scalar arg-name="pad-x"    type="int"    port-index="0"   source="pad-x"   />
-  <Scalar arg-name="pad-y"    type="int"    port-index="0"   source="pad-y"   />
-  <Scalar arg-name="kernel-x" type="int"    port-index="0"   source="kernel-x"/>
-  <Scalar arg-name="kernel-y" type="int"    port-index="0"   source="kernel-y"/>
-  <Scalar arg-name="output"   type="int"    port-index="0"   source="output"  />
-  <Scalar arg-name="group"    type="int"    port-index="0"   source="group"   />
-
-  <Data   arg-name="in_local"  type="local_data" dim="input,0"  size="X*F*2"/>
-  <Data   arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    <Where kernel="1,1" dilation="1,1"/>
+    <Kernel entry="Convolution1x1_NHWC">
+       <Source filename="convolution1x1.bin"/>
+       <Parameters>
+           <Tensor arg-name="in" type="input" port-index="0" format="BYXF"/>
+           <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+           <Data arg-name="w" type="data" source="weights" format="ANY"/>
+           <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+           <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+           <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+           <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+           <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+           <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+
+           <Scalar arg-name="stride-x" type="int" port-index="0" source="stride"/>
+           <Scalar arg-name="stride-y" type="int" port-index="1" source="stride"/>
+           <Scalar arg-name="pad-x" type="int" port-index="0" source="pads_begin"/>
+           <Scalar arg-name="pad-y" type="int" port-index="1" source="pads_begin"/>
+           <Scalar arg-name="kernel-x" type="int" port-index="0" source="kernel"/>
+           <Scalar arg-name="kernel-y" type="int" port-index="1" source="kernel"/>
+           <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+           <Scalar arg-name="group" type="int" port-index="0" source="group"/>
+
+           <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*2"/>
+           <Data arg-name="out_local" type="local_data" dim="output,0" size="X*2"/>
+       </Parameters>
+       <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+   </Kernel>
  </CustomLayer>
  
  <CustomLayer name="Convolution" type="MVCL" version="1">
-<Where kernel-x="3"/>
-<Kernel entry="Convolution3x3">
-  <Source filename="convolution3x3.bin" />
-</Kernel>
-<Parameters>
-  <Tensor arg-name="in_param"   type="input"  port-index="0"   format="BFYX"      />
-  <Tensor arg-name="out"        type="output" port-index="0"   format="BFYX"      />
-  <Data   arg-name="w"          type="data"   source="weights" format="BFYX"      />
-  <Scalar arg-name="IW"         type="int"    port-index="0"   source="I.X"       />
-  <Scalar arg-name="IH"         type="int"    port-index="0"   source="I.Y"       />
-  <Scalar arg-name="IC"         type="int"    port-index="0"   source="I.F"       />
-  <Scalar arg-name="OW"         type="int"    port-index="0"   source="O.X"       />
-  <Scalar arg-name="OH"         type="int"    port-index="0"   source="O.Y"       />
-  <Scalar arg-name="OC"         type="int"    port-index="0"   source="O.F"       />
-  <Scalar arg-name="KX"         type="int"    port-index="0"   source="kernel-x"  />
-  <Scalar arg-name="KY"         type="int"    port-index="0"   source="kernel-y"  />
-  <Scalar arg-name="stride_x"   type="int"    port-index="0"   source="stride-x"  />
-  <Scalar arg-name="stride_y"   type="int"    port-index="0"   source="stride-y"  />
-  <Scalar arg-name="pad_x"      type="int"    port-index="0"   source="pad-x"     />
-  <Scalar arg-name="pad_y"      type="int"    port-index="0"   source="pad-y"     />
-  <Scalar arg-name="dilation_x" type="int"    port-index="0"   source="dilation-x"/>
-  <Scalar arg-name="dilation_y" type="int"    port-index="0"   source="dilation-y"/>
-
-  <Scalar arg-name="stride-x"   type="int"    port-index="1"   source="stride-x"  />
-  <Scalar arg-name="stride-y"   type="int"    port-index="1"   source="stride-y"  />
-  <Scalar arg-name="pad-x"      type="int"    port-index="1"   source="pad-x"     />
-  <Scalar arg-name="pad-y"      type="int"    port-index="1"   source="pad-y"     />
-  <Scalar arg-name="kernel-x"   type="int"    port-index="0"   source="kernel-x"  />
-  <Scalar arg-name="kernel-y"   type="int"    port-index="0"   source="kernel-y"  />
-  <Scalar arg-name="output"     type="int"    port-index="0"   source="output"    />
-
-  <Data   arg-name="in_local"  type="local_data" dim="input,0"  size="X*F*3*2"/>
-  <Data   arg-name="out_local" type="local_data" dim="output,0" size="X*F*2"/>
-  <Data   arg-name="w_local"   type="local_data" dim="input,0"  size="3*3*F*2"/>
-</Parameters>
-<WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    <Where kernel="3,3" dilation="1,1"/>
+    <Kernel entry="Convolution3x3">
+        <Source filename="convolution3x3.bin"/>
+        <Parameters>
+            <Tensor arg-name="in_param" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="out" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="w" type="data" source="weights" format="BFYX"/>
+            <Scalar arg-name="IW" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="IH" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="IC" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="OW" type="int" port-index="0" source="O.X"/>
+            <Scalar arg-name="OH" type="int" port-index="0" source="O.Y"/>
+            <Scalar arg-name="OC" type="int" port-index="0" source="O.F"/>
+            <Scalar arg-name="KX" type="int" port-index="0" source="kernel"/>
+            <Scalar arg-name="KY" type="int" port-index="1" source="kernel"/>
+            <Scalar arg-name="stride_x" type="int" port-index="0" source="stride"/>
+            <Scalar arg-name="stride_y" type="int" port-index="1" source="stride"/>
+            <Scalar arg-name="pad_x" type="int" port-index="0" source="pads_begin"/>
+            <Scalar arg-name="pad_y" type="int" port-index="1" source="pads_begin"/>
+            <Scalar arg-name="dilation_x" type="int" port-index="0" source="dilation"/>
+            <Scalar arg-name="dilation_y" type="int" port-index="1" source="dilation"/>
+            <Scalar arg-name="output" type="int" port-index="0" source="output"/>
+
+            <Data arg-name="in_local" type="local_data" dim="input,0" size="X*F*3*2"/>
+            <Data arg-name="out_local" type="local_data" dim="output,0" size="X*F*2"/>
+            <Data arg-name="w_local" type="local_data" dim="input,0" size="3*3*F*2"/>
+        </Parameters>
+        <WorkSizes global="Y,F,B" local="1,1,1" dim="output,0"/>
+    </Kernel>
  </CustomLayer>
  
  <CustomLayer name="ExperimentalDetectronPriorGridGenerator" type="MVCL" version="1">
-<Kernel entry="experimental_detectron_prior_grid_generator">
-    <Source filename="detectron_prior_grid_gen.bin"/>
-</Kernel>
-<Parameters>
-    <Tensor arg-name="input_priors"      type="input"  port-index="0" format="BFYX"/>
-    <Tensor arg-name="input_feature_map" type="input"  port-index="1" format="BFYX"/>
-    <Tensor arg-name="input_rois"        type="input"  port-index="2" format="BFYX"/>
-    <Tensor arg-name="output"            type="output" port-index="0" format="BFYX"/>
-    <Data   arg-name="local_input_priors" type="local_data" dim="input,1" size="X*2" />
-    <Data   arg-name="local_output"       type="local_data" dim="input,1" size="((X+7)/8)*12*2"/>
-    <Scalar arg-name="grid_h"            type="int"    port-index="1" source="I.Y"/>
-    <Scalar arg-name="grid_w"            type="int"    port-index="1" source="I.X"/>
-    <Scalar arg-name="stride_h"          type="float"    port-index="0" source="stride_h"/>
-    <Scalar arg-name="stride_w"          type="float"    port-index="0" source="stride_w"/>
-    <Scalar arg-name="num_priors"            type="int"    port-index="0" source="I.Y"/>
-    <Scalar arg-name="num_anchors_per_prior" type="int"    port-index="0" source="I.X"/>
-</Parameters>
-<WorkSizes dim="input,1" global="((X+31)/32)*32,Y,1" local="32,1,1"/>
+    <Kernel entry="experimental_detectron_prior_grid_generator">
+        <Source filename="detectron_prior_grid_gen.bin"/>
+        <Parameters>
+            <Tensor arg-name="input_priors" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="input_feature_map" type="input" port-index="1" format="BFYX"/>
+            <Tensor arg-name="input_rois" type="input" port-index="2" format="BFYX"/>
+            <Tensor arg-name="output" type="output" port-index="0" format="BFYX"/>
+            <Data arg-name="local_input_priors" type="local_data" dim="input,1" size="X*2"/>
+            <Data arg-name="local_output" type="local_data" dim="input,1" size="((X+7)/8)*12*2"/>
+            <Scalar arg-name="grid_h" type="int" port-index="1" source="I.Y"/>
+            <Scalar arg-name="grid_w" type="int" port-index="1" source="I.X"/>
+            <Scalar arg-name="stride_h" type="float" source="stride_h"/>
+            <Scalar arg-name="stride_w" type="float" source="stride_w"/>
+            <Scalar arg-name="num_priors" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="num_anchors_per_prior" type="int" port-index="0" source="I.X"/>
+        </Parameters>
+        <WorkSizes dim="input,1" global="((X+31)/32)*32,Y,1" local="32,1,1"/>
+    </Kernel>
+</CustomLayer>
+
+<CustomLayer name="Convert" type="MVCL" version="1">
+    <Kernel entry="cvtu8f16">
+        <Source filename="cvtu8f16.bin"/>
+        <Parameters>
+            <Tensor arg-name="src" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="dst" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="scale" type="float" source="scale"/>
+            <Scalar arg-name="bias" type="float" source="bias"/>
+            <Data arg-name="local_src" type="local_data" dim="input,0" size="X*1"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="X,Y,F" local="X,1,1"/>
+    </Kernel>
+</CustomLayer>
+
+<CustomLayer name="Correlate" type="MVCL" version="1">
+    <Kernel entry="correlate2_half">
+        <Source filename="correlate.bin"/>
+        <Parameters>
+            <Tensor arg-name="bottom0" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="bottom1" type="input" port-index="1" format="BFYX"/>
+            <Tensor arg-name="top" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="topwidth" type="int" source="top_width"/>
+            <Scalar arg-name="topheight" type="int" source="top_height"/>
+            <Scalar arg-name="bottomwidth" type="int" port-index="0" source="I.X"/>
+            <Scalar arg-name="bottomheight" type="int" port-index="0" source="I.Y"/>
+            <Scalar arg-name="bottomchannels" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="max_displacement" type="int" source="displacement"/>
+            <Scalar arg-name="padding" type="int" source="pad"/>
+            <Scalar arg-name="neighborhood_grid_radius" type="int" source="neighborhood_grid_radius"/>
+            <Scalar arg-name="neighborhood_grid_width" type="int" source="neighborhood_grid_width"/>
+            <Scalar arg-name="kernel_size" type="int" source="kernel_size"/>
+            <Scalar arg-name="stride1" type="int" port-index="0" source="stride"/>
+            <Scalar arg-name="stride2" type="int" port-index="1" source="stride"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="top_height,1,1" local="1,1,1"/>
+    </Kernel>
+</CustomLayer>
+
+<CustomLayer name="SpatialTransform" type="MVCL" version="1">
+    <Kernel entry="ocl_st">
+        <Source filename="st.bin"/>
+        <Parameters>
+            <Tensor arg-name="src_data" type="input" port-index="0" format="BFYX"/>
+            <Tensor arg-name="theta" type="input" port-index="1" format="ANY"/>
+            <Tensor arg-name="dst_data" type="output" port-index="0" format="BFYX"/>
+            <Scalar arg-name="C" type="int" port-index="0" source="I.F"/>
+            <Scalar arg-name="W" type="int" port-index="0" source="I.X"/>
+            <Data arg-name="local_dst" type="local_data" dim="input,0" size="X*F*2"/>
+        </Parameters>
+        <WorkSizes dim="input,0" global="(X+511)/512,Y,1" local="1,1,1"/>
+    </Kernel>
  </CustomLayer>
diff --git a/inference-engine/src/vpu/custom_kernels/cvtf32f16.cl b/inference-engine/src/vpu/custom_kernels/cvtf32f16.cl

index e592f14..081a74d 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/cvtf32f16.cl
+++ b/inference-engine/src/vpu/custom_kernels/cvtf32f16.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/cvtu8f16.cl b/inference-engine/src/vpu/custom_kernels/cvtu8f16.cl

index 622621d..33d7d2f 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/cvtu8f16.cl
+++ b/inference-engine/src/vpu/custom_kernels/cvtu8f16.cl
@@ -1,27 +1,90 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-__kernel void cvtu8f16(const __global uchar* restrict src,
-                             __global  half* restrict dst,
-                                      float   scale,
-                                      float   bais)
+#define USE_MANUAL_DMA 1
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_cvtu8f16(
+    __global uchar* restrict src,
+    __global half*  restrict dst,
+    float   scale,
+    float   bias,
+    __local uchar* restrict local_src,
+    __local half*  restrict local_dst)
  {
-  int idx = get_global_id(0)
-          + get_global_id(1) * get_global_size(0)
-          + get_global_id(2) * get_global_size(0) * get_global_size(1);
+    WorkGroupDmaCreate3DTransaction(
+        src + get_group_id(0)*get_local_size(0)
+            + get_group_id(1)*get_local_size(1)*get_global_size(0)
+            + get_group_id(2)*get_local_size(2)*get_global_size(0)*get_global_size(1), // src
+        local_src, // dst
+        get_local_size(0) * sizeof(uchar), // src width
+        get_local_size(0) * sizeof(uchar), // dst width
+        get_global_size(0) * sizeof(uchar), // src stride
+        get_local_size(0) * sizeof(uchar),  // dst stride
+        get_local_size(2), // num planes
+        get_global_size(0) * get_global_size(1) * sizeof(uchar), // src plane stride
+        get_local_size(0) * get_local_size(1) * sizeof(uchar), // dst plane stride
+        get_local_size(0) * get_local_size(1) * sizeof(uchar), // plane size
+        0);
+}
  
-  dst[idx] = convert_half(src[idx])*(half)scale+(half)bais;
+__kernel void __dma_postwrite_cvtu8f16(
+    __global uchar* restrict src,
+    __global half*  restrict dst,
+    float   scale,
+    float   bias,
+    __local uchar* restrict local_src,
+    __local half*  restrict local_dst)
+{
+    WorkGroupDmaCreate3DTransaction(
+        local_dst, // src
+        dst + get_group_id(0)*get_local_size(0)
+            + get_group_id(1)*get_local_size(1)*get_global_size(0)
+            + get_group_id(2)*get_local_size(2)*get_global_size(0)*get_global_size(1), // dst
+        get_local_size(0) * sizeof(half), // src width
+        get_local_size(0) * sizeof(half), // dst width
+        get_local_size(0) * sizeof(half), // src stride
+        get_global_size(0) * sizeof(half), // dst stride
+        get_local_size(2), // num planes
+        get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride
+        get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride
+        get_local_size(0) * get_local_size(1) * sizeof(half), // plane size
+        0);
  }
+
+__kernel void cvtu8f16(
+    __global uchar* restrict src,
+    __global half*  restrict dst,
+    float   scale,
+    float   bias,
+    __local uchar* restrict local_src,
+    __local half* restrict local_dst)
+{
+    size_t idx = get_local_id(0) +
+                 get_local_id(1)*get_local_size(0) +
+                 get_local_id(2)*get_local_size(0)*get_local_size(1);
+    local_dst[idx] = convert_half(local_src[idx])*(half)scale+(half)bias;
+}
+
+#else // defined (USE_MANUAL_DMA)
+
+__kernel void cvtu8f16(
+    __global uchar* restrict src,
+    __global half*  restrict dst,
+    float   scale,
+    float   bias,
+    __local uchar* restrict local_src, // unused, added for compatibility with DMA variant
+    __local half* restrict local_dst) // unused, added for compatibility with DMA variant
+{
+    int idx = get_global_id(0) +
+              get_global_id(1) * get_global_size(0) +
+              get_global_id(2) * get_global_size(0) * get_global_size(1);
+    dst[idx] = convert_half(src[idx])*(half)scale+(half)bias;
+}
+
+#endif // defined (USE_MANUAL_DMA)
+
diff --git a/inference-engine/src/vpu/custom_kernels/detectron_prior_grid_gen.cl b/inference-engine/src/vpu/custom_kernels/detectron_prior_grid_gen.cl

index ca0ad88..e92d3c6 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/detectron_prior_grid_gen.cl
+++ b/inference-engine/src/vpu/custom_kernels/detectron_prior_grid_gen.cl
@@ -1,16 +1,6 @@
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/grn.cl b/inference-engine/src/vpu/custom_kernels/grn.cl

index 96df041..88cebb8 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/grn.cl
+++ b/inference-engine/src/vpu/custom_kernels/grn.cl
@@ -1,6 +1,9 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
  #define USE_MANUAL_DMA 1
  
  #if defined (USE_MANUAL_DMA)
@@ -52,10 +55,10 @@ __kernel void __dma_postwrite_grn_NCHW(
  }
  
  __kernel void grn_NCHW(
-    __global const half* restrict src_data,
-    __global       half* restrict dst_data,
-    __local        half* restrict src,
-    __local        half* restrict dst,
+    __global const half* restrict src,
+    __global       half* restrict dst,
+    __local        half* restrict local_src,
+    __local        half* restrict local_dst,
      int C,
      float bias)
  {
@@ -64,7 +67,7 @@ __kernel void grn_NCHW(
      #pragma unroll 8
      for (int c = 0; c < C; c++)
      {
-        float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
+        float val = (float) local_src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
          variance += val*val;
      }
  
@@ -73,18 +76,18 @@ __kernel void grn_NCHW(
      #pragma unroll 8
      for (int c = 0; c < C; c++)
      {
-        dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
-        = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
+        local_dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
+        = local_src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
      }
  }
  
  #else // defined (USE_MANUAL_DMA)
  
  __kernel void grn_NCHW(
-    __global const half* restrict src_data,
-    __global       half* restrict dst_data,
-    __local        half* restrict src, // unused, added for compatibility with DMA variant
-    __local        half* restrict dst, // unused, added for compatibility with DMA variant
+    __global const half* restrict src,
+    __global       half* restrict dst,
+    __local        half* restrict local_src, // unused, added for compatibility with DMA variant
+    __local        half* restrict local_dst, // unused, added for compatibility with DMA variant
      int C,
      float bias)
  {
@@ -93,7 +96,7 @@ __kernel void grn_NCHW(
      #pragma unroll 4
      for (int c = 0; c < C; c++)
      {
-        float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
+        float val = (float) src[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)];
          variance += val*val;
      }
  
@@ -102,77 +105,9 @@ __kernel void grn_NCHW(
      #pragma unroll 4
      for (int c = 0; c < C; c++)
      {
-        dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
-        = src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
+        dst[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]
+        = src[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance;
      }
  }
  
  #endif // defined (USE_MANUAL_DMA)
-
-// doesn't work right now due to compiler limitation
-// ToDo: fix compiler
-#if defined(IN_KERNEL_DMA)
-
-#define MAX_LOCAL_W 224
-#define MAX_LOCAL_H   2
-#define MAX_LOCAL_C  24
-
-__kernel void grn_NCHW(__global const half* restrict src_data,
-                       __global       half* restrict dst_data,
-                      int C,
-                      float bias)
-{
-    __local half src[MAX_LOCAL_W*MAX_LOCAL_H*MAX_LOCAL_C]; // get_local_size(0)*get_local_size(1)*C
-    __local half dst[MAX_LOCAL_W*MAX_LOCAL_H*MAX_LOCAL_C]; // get_local_size(0)*get_local_size(1)*C
-
-    const size_t index = get_group_id(0)*get_local_size(0) + get_group_id(1)*get_local_size(1)*get_global_size(0);
-
-    event_t e1 = async_work_group_copy_3D3D(
-        src,                                                                            // dst
-        src_data + index,                                                               // src
-        get_local_size(0),                                                              // num_elements_per_line,
-        get_local_size(1),                                                              // num_lines,
-        get_global_size(0) - get_local_size(0),                                         // src_line_stride,
-        0,                                                                              // dst_line_stride,
-        C,                                                                              // num_planes,
-        get_global_size(0)*get_global_size(1) - get_local_size(0) * get_local_size(1),  // src_plane_stride
-        0,                                                                              // dst_plane_stride
-        0);                                                                             // event
-    wait_group_events(1, &e1);
-
-    ////////////////////////
-
-    float variance = bias + 1e-9f;
-
-    #pragma unroll 8
-    for (int c = 0; c < C; c++)
-    {
-        float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)];
-        variance += val*val;
-    }
-
-    half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f);
-
-    #pragma unroll 8
-    for (int c = 0; c < C; c++)
-    {
-        dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]
-        = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance;
-    }
-
-    ////////////////////
-
-    event_t e2 = async_work_group_copy_3D3D(
-        dst_data + index,                                                               // src
-        dst,                                                                            // dst
-        get_local_size(0),                                                              // num_elements_per_line,
-        get_local_size(1),                                                              // num_lines,
-        0,                                                                              // src_line_stride,
-        get_global_size(0) - get_local_size(0),                                         // dst_line_stride,
-        C,                                                                              // num_planes,
-        0,                                                                              // src_plane_stride
-        get_global_size(0)*get_global_size(1) - get_local_size(0) * get_local_size(1),  // dst_plane_stride
-        0);                                                                             // event
-    wait_group_events(1, &e2);
-}
-#endif
diff --git a/inference-engine/src/vpu/custom_kernels/mvn.cl b/inference-engine/src/vpu/custom_kernels/mvn.cl

index ab595bb..9c5499c 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/mvn.cl
+++ b/inference-engine/src/vpu/custom_kernels/mvn.cl
@@ -1,3 +1,7 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
  // Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
diff --git a/inference-engine/src/vpu/custom_kernels/quantize.cl b/inference-engine/src/vpu/custom_kernels/quantize.cl

index 0b5d414..dd22587 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/quantize.cl
+++ b/inference-engine/src/vpu/custom_kernels/quantize.cl
@@ -1,154 +1,176 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-__kernel void __dma_preload_quantize(const __global half* __restrict src,
-                                     const __global half* __restrict input_low,
-                                     const __global half* __restrict input_high,
-                                     const __global half* __restrict output_low,
-                                     const __global half* __restrict output_high,
-                                     const __global half* __restrict dst,
-                                                    int              levels,
-                                                    int              input_low_size,
-                                                    int              input_high_size,
-                                                    int              output_low_size,
-                                                    int              output_high_size,
-                                                    int              W,
-                                                    int              H,
-                                           __local  half* __restrict src_local,
-                                     const __local  half* __restrict dst_local)
+__kernel void __dma_preload_quantize(__global half const *const restrict src,
+                                     __global half const *const restrict input_low,
+                                     __global half const *const restrict input_high,
+                                     __global half const *const restrict output_low,
+                                     __global half const *const restrict output_high,
+                                     __global half       *const restrict dst,
+                                     int levels,
+                                     int input_low_size,
+                                     int input_high_size,
+                                     int output_low_size,
+                                     int output_high_size,
+                                     int W,
+                                     int C,
+                                     __local half       *const restrict local_src,
+                                     __local half const *const restrict local_dst)
  {
-    const int sizePlane = W*H;
-    async_work_group_copy(src_local ,src + get_group_id(0)*sizePlane, sizePlane, 0);
+    WorkGroupDmaCreateStrideTransaction(
+        src + get_group_id(1) * get_local_size(1) * W, // src
+        local_src, // dst
+        W * sizeof(half), // src_width,
+        W * sizeof(half), // dst_width,
+        get_global_size(1) * W * sizeof(half), // src_stride,
+        W * sizeof(half), // dst_stride,
+        W * C * sizeof(half), // size
+        0);
  }
  
-__kernel void __dma_postwrite_quantize( const __global half* __restrict src,
-                                        const __global half* __restrict input_low,
-                                        const __global half* __restrict input_high,
-                                        const __global half* __restrict output_low,
-                                        const __global half* __restrict output_high,
-                                              __global half* __restrict dst,
-                                                       int              levels,
-                                                       int              input_low_size,
-                                                       int              input_high_size,
-                                                       int              output_low_size,
-                                                       int              output_high_size,
-                                                       int              W,
-                                                       int              H,
-                                        const __local half* __restrict src_local,
-                                        const __local half* __restrict dst_local)
+__kernel void __dma_postwrite_quantize(__global half const *const restrict src,
+                                       __global half const *const restrict input_low,
+                                       __global half const *const restrict input_high,
+                                       __global half const *const restrict output_low,
+                                       __global half const *const restrict output_high,
+                                       __global half       *const restrict dst,
+                                       int levels,
+                                       int input_low_size,
+                                       int input_high_size,
+                                       int output_low_size,
+                                       int output_high_size,
+                                       int W,
+                                       int C,
+                                       __local half const *const restrict local_src,
+                                       __local half const *const restrict local_dst)
  {
-    const int sizePlane = W*H;
-    async_work_group_copy(dst + get_group_id(0)*sizePlane ,dst_local,  sizePlane, 0);
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        dst + get_group_id(1) * get_local_size(1) * W, // dst
+        W * sizeof(half), // src_width,
+        W * sizeof(half), // dst_width,
+        W * sizeof(half), // src_stride,
+        get_global_size(1) * W * sizeof(half), // dst_stride,
+        W * C * sizeof(half), // size
+        0);
  }
  
-__kernel void quantize(const __global half* __restrict src,
-                       const __global half* __restrict input_low,
-                       const __global half* __restrict input_high,
-                       const __global half* __restrict output_low,
-                       const __global half* __restrict output_high,
-                       const __global half* __restrict dst,
-                                      int              levels,
-                                      int              input_low_size,
-                                      int              input_high_size,
-                                      int              output_low_size,
-                                      int              output_high_size,
-                                      int              W,
-                                      int              H,
-                       const __local half* __restrict src_local,
-                             __local half* __restrict dst_local)
+__kernel void quantize(__global half const *const restrict src,
+                       __global half const *const restrict input_low,
+                       __global half const *const restrict input_high,
+                       __global half const *const restrict output_low,
+                       __global half const *const restrict output_high,
+                       __global half const *const restrict dst,
+                       int levels,
+                       int input_low_size,
+                       int input_high_size,
+                       int output_low_size,
+                       int output_high_size,
+                       int W,
+                       int C,
+                       __local half const *const restrict local_src,
+                       __local half       *const restrict local_dst)
  {
-
-    int c = get_global_id(0);
-
-    int C = get_global_size(0);
-
-    half h_ilow  = (input_low_size   == 1 ? input_low[0]   : input_low[c]);
-    half h_ihigh = (input_high_size  == 1 ? input_high[0]  : input_high[c]);
-    half h_olow  = (output_low_size  == 1 ? output_low[0]  : output_low[c]);
-    half h_ohigh = (output_high_size == 1 ? output_high[0] : output_high[c]);
-
-    half8 h_ilow8  = h_ilow;
-    half8 h_ihigh8 = h_ihigh;
-    half8 h_olow8  = h_olow;
-    half8 h_ohigh8 = h_ohigh;
-
-    float f_ilow  = convert_float(h_ilow);
-    float f_ihigh = convert_float(h_ihigh);
-    float f_olow  = convert_float(h_olow);
-    float f_ohigh = convert_float(h_ohigh);
-
-    float8 f_ilow8  = f_ilow;
-    float8 f_ihigh8 = f_ihigh;
-    float8 f_olow8  = f_olow;
-    float8 f_ohigh8 = f_ohigh;
-
-    float const1 = !(f_ihigh - f_ilow) ? 0.0f : convert_float(levels - 1) / (f_ihigh - f_ilow);
-    float const2 = !(levels - 1)       ? 0.0f : (f_ohigh - f_olow) / convert_float(levels - 1);
-    
-    for (int h = 0; h < H; h++) {
-        int idx = h*W;
-
-        __local half* addr_src = (__local half*)src_local + idx;
-        __local half* addr_dst = (__local half*)dst_local + idx;
-
-        for (int w = 0; w < W / 8; w++) {
-            half8 h_src_val8 = (*((__local half8*)addr_src + w));
-            float8 f_src_val8 = convert_float8(h_src_val8);
-
-            short8 aux_cond1;
-            aux_cond1.s0 = (h_src_val8.s0 <= h_ilow);
-            aux_cond1.s1 = (h_src_val8.s1 <= h_ilow);
-            aux_cond1.s2 = (h_src_val8.s2 <= h_ilow);
-            aux_cond1.s3 = (h_src_val8.s3 <= h_ilow);
-            aux_cond1.s4 = (h_src_val8.s4 <= h_ilow);
-            aux_cond1.s5 = (h_src_val8.s5 <= h_ilow);
-            aux_cond1.s6 = (h_src_val8.s6 <= h_ilow);
-            aux_cond1.s7 = (h_src_val8.s7 <= h_ilow);
-            aux_cond1 *= aux_cond1;
-
-            short8 aux_cond2;
-            aux_cond2.s0 = (h_src_val8.s0 > h_ihigh);
-            aux_cond2.s1 = (h_src_val8.s1 > h_ihigh);
-            aux_cond2.s2 = (h_src_val8.s2 > h_ihigh);
-            aux_cond2.s3 = (h_src_val8.s3 > h_ihigh);
-            aux_cond2.s4 = (h_src_val8.s4 > h_ihigh);
-            aux_cond2.s5 = (h_src_val8.s5 > h_ihigh);
-            aux_cond2.s6 = (h_src_val8.s6 > h_ihigh);
-            aux_cond2.s7 = (h_src_val8.s7 > h_ihigh);
-            aux_cond2 *= aux_cond2;
-
-            short8 aux_cond3 = (!aux_cond1 & aux_cond2);
-            short8 aux_cond4 = (!aux_cond1 & !aux_cond2);
-            aux_cond3 *= aux_cond3;
-            aux_cond4 *= aux_cond4;
-
-            half8 cond1 = convert_half8(aux_cond1);
-            half8 cond2 = convert_half8(aux_cond2);
-            half8 cond3 = convert_half8(aux_cond3);
-            half8 cond4 = convert_half8(aux_cond4);
-
-            half8 aux;
-            aux = convert_half8(round(((f_src_val8 - f_ilow8) * (float8)const1)) * (float8)const2 + f_olow8);
-            half8 dst_val = (
-                      (h_olow8  * cond1) +
-                      (h_ohigh8 * cond3) +
-                      (aux      * cond4)
-                    );
-            *((__local half8*)addr_dst + w) = dst_val;
+    int h = get_global_id(1);
+    int H = get_global_size(1);
+
+    for (int c = 0; c < C; c++)
+    {
+        half h_ilow  = (input_low_size   == 1 ? input_low[0]   : input_low[c]);
+        half h_ihigh = (input_high_size  == 1 ? input_high[0]  : input_high[c]);
+        half h_olow  = (output_low_size  == 1 ? output_low[0]  : output_low[c]);
+        half h_ohigh = (output_high_size == 1 ? output_high[0] : output_high[c]);
+
+        half const1 = (half)(!(h_ihigh - h_ilow) ? 0.0f : convert_float(levels - 1) / (convert_float(h_ihigh) - convert_float(h_ilow)));
+        half const2 = (half)(!(levels - 1)       ? 0.0f : (convert_float(h_ohigh) - convert_float(h_olow)) / convert_float(levels - 1));
+
+        __local const half* restrict addr_src = local_src + c*W;
+        __local       half* restrict addr_dst = local_dst + c*W;
+
+        for (int w = 0; w < W / 8; w++)
+        {
+            half8 val = *((__local half8*)addr_src + w);
+#if 1
+            // round is too slow =( 902 b of code
+            //half8 aux = round((val - (half8)h_ilow) * (half8)const1);
+
+            half8 aux = (val - (half8)h_ilow) * (half8)const1 + (half8)0.5h;
+
+            aux = (half8){
+                (half)(short)(aux.s0),
+                (half)(short)(aux.s1),
+                (half)(short)(aux.s2),
+                (half)(short)(aux.s3),
+                (half)(short)(aux.s4),
+                (half)(short)(aux.s5),
+                (half)(short)(aux.s6),
+                (half)(short)(aux.s7)
+            };
+
+            aux = aux * (half8)const2 + (half8)h_olow;
+
+            // vector comparison add 756 b of assembly, so do in manually
+            // short8 a = val <= (half8)h_olow;
+            // short8 b = val >  (half8)h_ohigh;
+
+            short8 a;
+            short8 b;
+            a.s0 = (val.s0 <= h_ilow);
+            a.s1 = (val.s1 <= h_ilow);
+            a.s2 = (val.s2 <= h_ilow);
+            a.s3 = (val.s3 <= h_ilow);
+            a.s4 = (val.s4 <= h_ilow);
+            a.s5 = (val.s5 <= h_ilow);
+            a.s6 = (val.s6 <= h_ilow);
+            a.s7 = (val.s7 <= h_ilow);
+
+            b.s0 = (val.s0 > h_ihigh);
+            b.s1 = (val.s1 > h_ihigh);
+            b.s2 = (val.s2 > h_ihigh);
+            b.s3 = (val.s3 > h_ihigh);
+            b.s4 = (val.s4 > h_ihigh);
+            b.s5 = (val.s5 > h_ihigh);
+            b.s6 = (val.s6 > h_ihigh);
+            b.s7 = (val.s7 > h_ihigh);
+
+            a = ~(a-(short8)1);
+            b = ~(b-(short8)1);
+
+            short8 c1 = (~a &  b);
+            short8 c2 = (~a & ~b);
+
+            short8 res = a  & as_short8((half8)h_olow)
+                       | c1 & as_short8((half8)h_ohigh)
+                       | c2 & as_short8(aux);
+
+            *((__local half8*)addr_dst + w) = as_half8(res);
+#else
+            *((__local half8*)addr_dst + w) = val;
+#endif
          }
  
-        for (int w = W & (~0x7); w < W; w++) {
-            half h_src_val = addr_src[w];
-            float f_src_val = convert_float(h_src_val);
-            half dst_val;
-
-            if (h_src_val <= h_ilow) {
-                dst_val = h_olow;
-            } else if (h_src_val > h_ihigh) {
-                dst_val = h_ohigh;
-            } else {
-                dst_val = convert_half(round((f_src_val - f_ilow) * const1) * const2 + f_olow);
-            }
-            addr_dst[w] = dst_val;
+        for (int w = W & (~0x7); w < W; w++)
+        //for (int w = 0         ; w < W; w++)
+        {
+            half val = addr_src[w];
+#if 1
+            short a = val <= h_ilow; a = ~(a-1);
+            short b = val > h_ihigh; b = ~(b-1);
+
+            short c1 = (~a &  b);
+            short c2 = (~a & ~b);
+
+            short res = a  & as_short(h_olow)
+                      | c1 & as_short(h_ohigh)
+                      | c2 & as_short(((half)(round( (val - h_ilow) * const1) * const2) + h_olow));
+
+            addr_dst[w] = as_half(res);
+#else
+            addr_dst[w] = val;
+#endif
          }
      }
  }
diff --git a/inference-engine/src/vpu/custom_kernels/region.cl b/inference-engine/src/vpu/custom_kernels/region.cl

new file mode 100644 (file)

index 0000000..d04b738
--- /dev/null
+++ b/inference-engine/src/vpu/custom_kernels/region.cl
@@ -0,0 +1,474 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+__constant static half log_2_e = (half)1.442695040888963; // log2(exp(1.0))
+
+#define ALLOW_EARLY_RETURN 1
+
+#define USE_MANUAL_DMA 1
+
+#if USE_MANUAL_DMA
+
+static void inline logistic_activate(__local const half* restrict src,
+                                     __local       half* restrict dst,
+                                     int offset)
+{
+    half val = src[offset];
+    val = 1.0h / (1.0h + exp2(val * -log_2_e));
+    dst[offset] = val;
+}
+
+__kernel void __dma_preload_region_chw(
+    __global const half* restrict src,
+    __global       half* restrict _0,
+    __local        half* restrict local_src,
+    __local        half* restrict _1,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + c*H*W + h*W, // src
+        local_src, // dst
+        W*sizeof(half), // src_width,
+        W*sizeof(half), // dst_width,
+        W*H*sizeof(half), // src_stride,
+        W*sizeof(half), // dst_stride,
+        W*local_C*sizeof(half), // size
+        0);
+}
+
+__kernel void __dma_postwrite_region_chw(
+    __global       half* restrict _0,
+    __global       half* restrict dst,
+    __local        half* restrict _1,
+    __local  const half* restrict local_dst,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        dst + c*H*W + h*W, // dst
+        W*sizeof(half), // src_width,
+        W*sizeof(half), // dst_width,
+        W*sizeof(half), // src_stride,
+        W*H*sizeof(half), // dst_stride,
+        W*local_C*sizeof(half), // size
+        0);
+}
+
+__kernel void region_chw(
+    __global       half* restrict src_data,
+    __global       half* restrict dst_data,
+    __local  const half* restrict local_src,
+    __local        half* restrict local_dst,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+    if (w >= W) return;
+#endif
+
+    __local const half *restrict src = local_src + w;
+    __local       half *restrict dst = local_dst + w;
+
+    const int stride = W;
+    logistic_activate(src, dst, 0*stride);
+    logistic_activate(src, dst, 1*stride);
+
+    //copy plane 2 and 3
+    dst[2*stride] = src[2*stride];
+    dst[3*stride] = src[3*stride];
+
+    logistic_activate(src, dst, 4*stride);
+
+    src += (coords + 1)*stride;
+    dst += (coords + 1)*stride;
+
+    if (doSoftmax)
+    {
+        half max_val = src[0];
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            max_val = max(max_val, src[c*stride]);
+        }
+
+        half expSum = 0.0h;
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            const half e = src[c*stride] - max_val;
+            const half tmp = exp2(e * log_2_e);
+            dst[c*stride] = tmp;
+            expSum += tmp;
+        }
+
+        const half invExpSum = 1.0h / expSum;
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            dst[c*stride] *= invExpSum;
+        }
+    }
+    else
+    {
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            logistic_activate(src, dst, c*stride);
+        }
+    }
+}
+
+__kernel void __dma_preload_region_hwc(
+    __global const half* restrict src,
+    __global       half* restrict _0,
+    __local        half* restrict local_src,
+    __local        half* restrict _1,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+    if (!doSoftmax) num = maskSize;
+    const int C = local_C*num;
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + h*W*C + c, // src
+        local_src, // dst
+        local_C*sizeof(half), // src_width,
+        local_C*sizeof(half), // dst_width,
+        C*sizeof(half), // src_stride,
+        local_C*sizeof(half), // dst_stride,
+        local_C*W*sizeof(half), // size
+        0);
+}
+
+__kernel void __dma_postwrite_region_hwc(
+    __global       half* restrict _0,
+    __global       half* restrict dst,
+    __local        half* restrict _1,
+    __local  const half* restrict local_dst,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    // Region always outputs in CHW layout; same as postwrite_chw
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        dst + c*H*W + h*W, // dst
+        W*sizeof(half), // src_width,
+        W*sizeof(half), // dst_width,
+        W*sizeof(half), // src_stride,
+        W*H*sizeof(half), // dst_stride,
+        W*local_C*sizeof(half), // size
+        0);
+}
+
+static void inline logistic_activate_hwc(__local const half* restrict src,
+                                         __local       half* restrict dst,
+                                         int offset,
+                                         int stride)
+{
+    half val = src[offset];
+    val = 1.0h / (1.0h + exp2(val * -log_2_e));
+    dst[offset*stride] = val;
+}
+
+__kernel void region_hwc(
+    __global       half* restrict src_data,
+    __global       half* restrict dst_data,
+    __local  const half* restrict local_src,
+    __local        half* restrict local_dst,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+    if (w >= W) return;
+#endif
+
+    const int local_C = classes + coords + 1;
+
+    __local const half *restrict src = local_src + w*local_C;
+    __local       half *restrict dst = local_dst + w;
+
+    const int stride = W;
+    logistic_activate_hwc(src, dst, 0, stride);
+    logistic_activate_hwc(src, dst, 1, stride);
+
+    //copy plane 2 and 3
+    dst[2*stride] = src[2];
+    dst[3*stride] = src[3];
+
+    logistic_activate_hwc(src, dst, 4, stride);
+
+    src += coords + 1;
+    dst += (coords + 1)*stride;
+
+    if (doSoftmax)
+    {
+        half max_val = src[0];
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            max_val = max(max_val, src[c]);
+        }
+
+        half expSum = 0.0h;
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            const half e = src[c] - max_val;
+            const half tmp = exp2(e * log_2_e);
+            dst[c*stride] = tmp;
+            expSum += tmp;
+        }
+
+        const half invExpSum = 1.0h / expSum;
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            dst[c*stride] *= invExpSum;
+        }
+    }
+    else
+    {
+        #pragma unroll 4
+        for (int c = 0; c < classes; c++)
+        {
+            logistic_activate_hwc(src, dst, c, stride);
+        }
+    }
+}
+
+#else // defined (USE_MANUAL_DMA)
+
+#define NUM_CLASSES 80
+
+static void inline logistic_activate(__global const half* restrict src,
+                                     __global       half* restrict dst,
+                                     int offset)
+{
+    half val = src[offset];
+    val = 1.0h / (1.0h + exp2(val * -log_2_e));
+    dst[offset] = val;
+}
+
+__kernel void region_chw(
+    __global const half* restrict global_src,
+    __global       half* restrict global_dst,
+    __local        half* restrict _0,
+    __local        half* restrict _1,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+    if (w >= W) return;
+#endif
+
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+
+    __global const half *restrict src = global_src + c*H*W + h*W + w;
+    __global       half *restrict dst = global_dst + c*H*W + h*W + w;
+
+    const int stride = H*W;
+    logistic_activate(src, dst, 0*stride);
+    logistic_activate(src, dst, 1*stride);
+
+    //copy plane 2 and 3
+    dst[2*stride] = src[2*stride];
+    dst[3*stride] = src[3*stride];
+
+    logistic_activate(src, dst, 4*stride);
+
+    src += (coords + 1)*stride;
+    dst += (coords + 1)*stride;
+
+    if (doSoftmax)
+    {
+        __private half data[NUM_CLASSES];
+
+        half max_val = src[0];
+        for (int c = 0; c < classes; c++)
+        {
+            half tmp = src[c*stride];
+            data[c] = tmp;
+            max_val = max(max_val, tmp);
+        }
+
+        half expSum = 0.0h;
+        for (int c = 0; c < classes; c++)
+        {
+            half tmp = half_exp(data[c] - max_val);
+            data[c] = tmp;
+            expSum += tmp;
+        }
+
+        for (int c = 0; c < classes; c++)
+        {
+            dst[c*stride] = data[c] / expSum;
+        }
+    }
+    else
+    {
+        #pragma unroll 4
+        for (int c = 0;  c < classes; c++)
+        {
+            logistic_activate(src, dst, c*stride);
+        }
+    }
+}
+
+static void inline logistic_activate_hwc(__global const half* restrict src,
+                                         __global       half* restrict dst,
+                                         int offset,
+                                         int stride)
+{
+    half val = src[offset];
+    val = 1.0h / (1.0h + exp2(val * -log_2_e));
+    dst[offset*stride] = val;
+}
+
+
+__kernel void region_hwc(
+    __global const half* restrict global_src,
+    __global       half* restrict global_dst,
+    __local        half* restrict _0,
+    __local        half* restrict _1,
+    int W,           /* 13 */
+    int H,           /* 13 */
+    int classes,     /* 20 */
+    int coords,      /* 4  */
+    int num,         /* 5  */
+    int maskSize,
+    int doSoftmax
+    )
+{
+    const int w = get_local_id(0);
+
+#if ALLOW_EARLY_RETURN
+    if (w >= W) return;
+#endif
+
+    const int local_C = classes + coords + 1;
+    const int c = get_group_id(1)*local_C;
+    const int h = get_group_id(0);
+    const int C = num*local_C;
+
+    __global const half *restrict src = global_src + h*W*C + w*C + c;
+    __global       half *restrict dst = global_dst + c*H*W + h*W + w;
+
+    const int stride = H*W;
+    logistic_activate_hwc(src, dst, 0, stride);
+    logistic_activate_hwc(src, dst, 1, stride);
+
+    //copy plane 2 and 3
+    dst[2*stride] = src[2];
+    dst[3*stride] = src[3];
+
+    logistic_activate_hwc(src, dst, 4, stride);
+
+    src += coords + 1;
+    dst += (coords + 1)*stride;
+
+    if (doSoftmax)
+    {
+        __private half data[NUM_CLASSES];
+
+        half max_val = src[0];
+        for (int c = 0; c < classes; c++)
+        {
+            half tmp = src[c];
+            data[c] = tmp;
+            max_val = max(max_val, tmp);
+        }
+
+        half expSum = 0.0h;
+        for (int c = 0; c < classes; c++)
+        {
+            half tmp = half_exp(data[c] - max_val);
+            data[c] = tmp;
+            expSum += tmp;
+        }
+
+        for (int c = 0; c < classes; c++)
+        {
+            dst[c*stride] = data[c] / expSum;
+        }
+    }
+    else
+    {
+        #pragma unroll 4
+        for (int c = 0;  c < classes; c++)
+        {
+            logistic_activate_hwc(src, dst, c, stride);
+        }
+    }
+}
+
+#endif // defined (USE_MANUAL_DMA)
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw.cl b/inference-engine/src/vpu/custom_kernels/region_chw.cl

index 650d8f4..c728042 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/region_chw.cl
+++ b/inference-engine/src/vpu/custom_kernels/region_chw.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl

index 12fb691..f83e814 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl
+++ b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch0.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl

index f88df78..16298d5 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl
+++ b/inference-engine/src/vpu/custom_kernels/region_chw_m7_branch1.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/reorg_chw.cl b/inference-engine/src/vpu/custom_kernels/reorg_chw.cl

index 59df994..6cd2b78 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/reorg_chw.cl
+++ b/inference-engine/src/vpu/custom_kernels/reorg_chw.cl
@@ -1,43 +1,121 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-__kernel void reorg_NCHW(__global const half* restrict src,
-                         __global       half* restrict out,
-                         int H,
-                         int W,
-                         int stride)
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_reorg_chw(__global half const *restrict src,
+                                      __global half       *restrict dst,
+                                      int W,
+                                      int H,
+                                      int C,
+                                      int stride,
+                                      __local half        *restrict local_src,
+                                      __local half        *restrict local_dst
+                                      )
+{
+    const int stride_y = get_group_id(1);
+
+    const int srcIdx = stride_y*W*stride + W*stride*stride*get_group_id(0);
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + srcIdx, // src
+        local_src, // dst
+        W * stride * sizeof(half), // src width
+        W * stride * sizeof(half), // dst width
+        W * stride * stride * get_num_groups(0) * sizeof(half), // src stride
+        W * stride * sizeof(half),  // dst stride
+        W * stride * get_local_size(0) * sizeof(half), //total size
+        0);
+}
+
+__kernel void __dma_postwrite_reorg_chw(__global half const *restrict src,
+                                        __global half       *restrict dst,
+                                        int W,
+                                        int H,
+                                        int C,
+                                        int stride,
+                                        __local half       *restrict local_src,
+                                        __local half const *restrict local_dst
+                                        )
+{
+    const int stride_y = get_group_id(1);
+
+    const int dstIdx = stride_y*W*stride*get_global_size(0) + get_group_id(0)*W;
+
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        dst + dstIdx, // dst
+        W * sizeof(half), // src width
+        W * sizeof(half), // dst width
+        W * sizeof(half), // src stride
+        W * get_num_groups(0) * sizeof(half),  // dst stride
+        get_local_size(0) * W * stride * sizeof(half), //total size
+        0);
+}
+
+__kernel void reorg_chw(__global half const *restrict src,
+                        __global half       *restrict dst,
+                        int W,
+                        int H,
+                        int C,
+                        int stride,
+                        __local half       *restrict local_src,
+                        __local half       *restrict local_dst
+                        )
  {
-    int h = min((int)get_global_id(0), H-1);
+    const int c = get_local_id(0);
+    const int stride_x = get_local_id(1);
  
-    int c  = get_global_id(1);
-    int C  = get_global_size(1);
-    int C2 = C/(stride*stride);
+    const int srcIdx = stride_x + c*W*stride;
+    const int dstIdx = stride_x*W*get_local_size(0) + c*W;
  
-    int offset = c / C2;
+    int x = 0;
+    for (; x <= W - 8; x += 8) {
+         half8 data = (half8) {
+             local_src[srcIdx + (x + 0)*stride], local_src[srcIdx + (x + 1)*stride],
+             local_src[srcIdx + (x + 2)*stride], local_src[srcIdx + (x + 3)*stride],
+             local_src[srcIdx + (x + 4)*stride], local_src[srcIdx + (x + 5)*stride],
+             local_src[srcIdx + (x + 6)*stride], local_src[srcIdx + (x + 7)*stride]
+         };
  
-    int c2 = c - C2 * offset;
+         *((__local half8*)(&local_dst[dstIdx + x])) = data;
+    }
  
-    int H2 = H*stride;
-    int W2 = W*stride;
+    for (; x < W; x++) {
+        local_dst[dstIdx + x] = local_src[srcIdx + x*stride];
+    }
+}
  
-    for (int w = 0; w < W; ++w)
-    {
-        int h2 = h*stride + offset / stride;
-        int w2 = w*stride + offset - stride * (offset / stride);
+#else
  
-        out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
+__kernel void reorg_chw(__global half const *restrict src,
+                        __global half       *restrict dst,
+                        int W,
+                        int H,
+                        int C,
+                        int stride,
+                        __local half const *restrict _0,
+                        __local half       *restrict _1
+                        )
+{
+    const int stride_x = get_local_id(1);
+    const int stride_y = get_group_id(1);
+    const int N = get_global_size(0);
+    const int c = get_local_id(0)*get_num_groups(0) + get_group_id(0);
+
+    const int srcIdx = c*W*stride*stride + stride_x + stride_y*W*stride;
+    const int dstIdx = c*W + stride_x*W*N + stride_y*W*N*stride;
+
+    #pragma unroll 8
+    for (int x = 0; x < W; x++) {
+        dst[dstIdx + x] = src[srcIdx + x*stride];
      }
  }
+
+#endif
+
diff --git a/inference-engine/src/vpu/custom_kernels/reorg_chw_local.cl b/inference-engine/src/vpu/custom_kernels/reorg_chw_local.cl

index 62b9c57..35032cf 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/reorg_chw_local.cl
+++ b/inference-engine/src/vpu/custom_kernels/reorg_chw_local.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/reorg_chw_stack.cl b/inference-engine/src/vpu/custom_kernels/reorg_chw_stack.cl

index 3482649..3e0932e 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/reorg_chw_stack.cl
+++ b/inference-engine/src/vpu/custom_kernels/reorg_chw_stack.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl b/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl

index 56b3c86..6bbddc0 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl
+++ b/inference-engine/src/vpu/custom_kernels/reorg_hwc.cl
@@ -1,64 +1,139 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#define MIN(v1, v2) ((v1) < (v2) ? (v1) : (v2))
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-__kernel void reorg(__global half* restrict src,
-                    __global half* restrict out,
-                    int h,
-                    int w,
-                    int stride)
+__kernel void __dma_preload_reorg_hwc(__global half const *restrict src,
+                                        __global half       *restrict _0,
+                                        int W,
+                                        int H,
+                                        int C,
+                                        int stride,
+                                        __local half       *restrict local_src,
+                                        __local half       *restrict _1
+                                        )
  {
-    int j = MIN(get_global_id(0), h-1);
+    const int stride_x = get_group_id(1);
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + get_group_id(0) * stride + stride_x * C, // src
+        local_src, // dst
+        stride * sizeof(half), // src_width,
+        stride * sizeof(half), // dst_width,
+        C * stride * sizeof(half), // src_stride,
+        stride * sizeof(half), // dst_stride,
+        H * W * sizeof(half), // size
+        0);
+}
  
-    int k = get_global_id(1);
-    int c = get_global_size(1);
+__kernel void __dma_postwrite_reorg_hwc(__global half const *restrict _0,
+                                        __global half       *restrict dst,
+                                        int W,
+                                        int H,
+                                        int C,
+                                        int stride,
+                                        __local half       *restrict _1,
+                                        __local half       *restrict local_dst
+                                        )
+{
+    const int stride_x = get_group_id(1);
  
-    int out_c = c / (stride * stride);
-    int oc    = c * (stride * stride);
-    int oh    = h / stride;
-    int ow    = w / stride;
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst, // src
+        dst + stride_x * C + get_group_id(0) * stride, // dst
+        stride * sizeof(half), // src_width,
+        stride * sizeof(half), // dst_width,
+        stride * sizeof(half), // src_stride,
+        C * stride * sizeof(half), // dst_stride,
+        W * H * sizeof(half), // size
+        0);
+}
  
-    int in_index = w * (j + h*k);
+__kernel void reorg_hwc(__global half const *restrict src,
+                        __global half       *restrict dst,
+                        int W,
+                        int H,
+                        int C,
+                        int stride,
+                        __local half        *restrict local_src,
+                        __local half        *restrict local_dst
+                        )
+{
+    const int stride_y = get_local_id(1);
+    const int blocks = get_local_size(0);
+    const int b = get_local_id(0);
  
-    int new_z = in_index / (oh*ow);
-    int new_y = (in_index %(oh*ow)) / ow;
-    int new_x = (in_index %(oh*ow)) % ow;
-    int new_index = new_z + new_x * oc + new_y * oc * ow;
+    const int OC = stride * stride;
+    const int OH = H / stride;
+    const int OW = W / stride;
+    const int IC = stride;
+    const int IH = H;
+    const int IW = W / stride;
  
-    in_index++;
+    for (int block_h = 0; block_h < stride; block_h++) {
+        const int src_line = b * stride * stride + stride_y * stride + block_h;
+        const int c = src_line / IH;
+        const int h = src_line % IH;
  
-    int c2 = k % out_c;
-    int offset = k / out_c;
-    int w2 = 0 * stride + offset % stride;
-    int h2 = j * stride + offset / stride;
-    int out_index = w2 + w * stride * (h2 + h * stride * c2);
+        const int dst_line = b * stride + stride_y * blocks * stride + block_h;
+        const int oc = dst_line / OH;
+        const int oh = dst_line % OH;
  
-    for (int i = 0; i < w; ++i, out_index+=stride, in_index++)
+        for (int w = 0; w < W / stride; w++) {
+            local_dst[oh*OW*OC + w*OC + oc] = local_src[h*IW*IC + w*IC + c];
+        }
+    }
+}
+
+__kernel void reorg_hwc_naive(__global half const *restrict src,
+                              __global half       *restrict dst,
+                              int W,
+                              int H,
+                              int C,
+                              int stride,
+                              __local half        *restrict local_src,
+                              __local half        *restrict local_dst
+                              )
+{
+    const int out_c = C / (stride * stride);
+    const int oc = C * (stride * stride);
+    const int oh = H / stride;
+    const int ow = W / stride;
+
+    const int c = get_global_id(0);
+
+    for (int h = 0; h < H; ++h)
      {
-        // repacking coordinates
-        int k0 =  out_index / (h*w);
-        int j0 = (out_index % (h*w)) / w;
-        int i0 = (out_index % (h*w)) % w;
-        int out_index_repack = k0 + c * i0 + c * w * j0;
-        out[new_index] = src[out_index_repack];
-
-        int new_z =  in_index / (oh*ow);
+        int in_index = W * (h + H*c) + (0);
+        int new_z = in_index / (oh*ow);
          int new_y = (in_index %(oh*ow)) / ow;
          int new_x = (in_index %(oh*ow)) % ow;
-        new_index = new_z + new_x * oc + new_y * oc * ow;
+        int new_index = new_z + new_x * oc + new_y * oc * ow;
+
+        in_index++;
+
+        int c2 = c % out_c;
+        int offset = c / out_c;
+        int w2 = 0 * stride + offset % stride;
+        int h2 = h * stride + offset / stride;
+        int out_index = w2 + W * stride * (h2 + H * stride * c2);
+
+        #pragma unroll 2
+        for(int i = 0; i < W; ++i, out_index+=stride, in_index++)
+        {
+            // repacking coordinates
+            int k0 = out_index / (H*W);
+            int j0 = (out_index % (H*W)) / W;
+            int i0 = (out_index % (H*W)) % W;
+            int out_index_repack = k0 + C * i0 + C * W * j0;
+
+            dst[new_index] = src[out_index_repack];
+
+            int new_z = in_index / (oh*ow);
+            int new_y = (in_index %(oh*ow)) / ow;
+            int new_x = (in_index %(oh*ow)) % ow;
+            new_index = new_z + new_x * oc + new_y * oc * ow;
+        }
      }
  }
diff --git a/inference-engine/src/vpu/custom_kernels/resample_nn.cl b/inference-engine/src/vpu/custom_kernels/resample_nn.cl

index 49a741a..9584cb2 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/resample_nn.cl
+++ b/inference-engine/src/vpu/custom_kernels/resample_nn.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
@@ -22,8 +12,126 @@
      #define ROUND(x)  (int)(round(x))
  #endif
  
+inline int out_to_in(float ox, float f) {
+    return (int)((ox + 0.5f) * f);
+}
+
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+void interpolationCHW_nn(__local half* psrc, __local half* pdst, int OW, int IW, int C, float rw, float rh)
+{
+    float alpha = rh / 2.0f - 0.5f;
+
+    for (int w = 0; w < OW/8; w++)
+    {
+        float fw0 = rw*(w*8+0) + alpha;
+        float fw1 = rw*(w*8+1) + alpha;
+        float fw2 = rw*(w*8+2) + alpha;
+        float fw3 = rw*(w*8+3) + alpha;
+
+        float fw4 = rw*(w*8+4) + alpha;
+        float fw5 = rw*(w*8+5) + alpha;
+        float fw6 = rw*(w*8+6) + alpha;
+        float fw7 = rw*(w*8+7) + alpha;
+
+        int iw0 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw0), IW-1);
+        int iw1 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw1), IW-1);
+        int iw2 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw2), IW-1);
+        int iw3 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw3), IW-1);
+
+        int iw4 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw4), IW-1);
+        int iw5 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw5), IW-1);
+        int iw6 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw6), IW-1);
+        int iw7 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw7), IW-1);
+
+        for (int c = 0; c < C; c++)
+        {
+            half8 val = {
+                *((__local half*)(psrc + c * IW + iw0)),
+                *((__local half*)(psrc + c * IW + iw1)),
+
+                *((__local half*)(psrc + c * IW + iw2)),
+                *((__local half*)(psrc + c * IW + iw3)),
+
+                *((__local half*)(psrc + c * IW + iw4)),
+                *((__local half*)(psrc + c * IW + iw5)),
+
+                *((__local half*)(psrc + c * IW + iw6)),
+                *((__local half*)(psrc + c * IW + iw7)),
+            };
+            *((__local half8*)(pdst + c * OW + w*8)) = val;
+        }
+    }
+
+    for (int w = OW/8*8; w < OW; w++)
+    {
+        float fw = rw*w + alpha;
+        int iw0 = __builtin_shave_cmu_min_i32_rr_int((int)ROUND(fw), IW-1);
+
+        for (int c = 0; c < C; c++)
+        {
+            *((__local half*)(pdst + c * OW + w)) = *((__local half*)(psrc + c * IW + iw0));
+        }
+    }
+}
+
+__kernel void __dma_preload_resample_nearest(__global const half* restrict src,
+                                             __global       half* restrict _0,
+                                             __local        half* restrict local_src,
+                                             __local        half* restrict _1,
+                                             int iw,
+                                             int ih,
+                                             float factor,
+                                             int ow,
+                                             int oh,
+                                             int channels)
+{
+    const int oy_first = get_group_id(1) * get_local_size(1);
+    const int oy_last = (get_group_id(1) + 1) * get_local_size(1) - 1;
+    const int iy_first = out_to_in(oy_first, 1.0 / factor);
+    const int iy_last = out_to_in(oy_last, 1.0 /factor);
+    const int iy_size = iy_last - iy_first + 1;
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + get_group_id(2)*channels*ih*iw + iy_first*iw, // src
+        local_src, // dst
+        iy_size * iw * sizeof(half), // src_width,
+        iy_size * iw * sizeof(half), // dst_width,
+        ih * iw * sizeof(half), // src_stride,
+        iy_size * iw * sizeof(half), // dst_stride,
+        channels * iy_size * iw * sizeof(half), // size
+        0);
+}
+
+__kernel void __dma_postwrite_resample_nearest(__global const half* restrict _0,
+                                               __global       half* restrict dst,
+                                               __local        half* restrict _1,
+                                               __local        half* restrict local_dst,
+                                               int iw,
+                                               int ih,
+                                               float factor,
+                                               int ow,
+                                               int oh,
+                                               int channels)
+{
+
+    WorkGroupDmaCreateStrideTransaction(
+        local_dst,  // src
+        dst + get_group_id(2)*channels*get_global_size(1)*ow + get_group_id(1)*get_local_size(1)*ow,  // dst
+        get_local_size(1) * ow * sizeof(half), // src_width,
+        get_local_size(1) * ow * sizeof(half), // dst_width,
+        get_local_size(1) * ow * sizeof(half), // src_stride,
+        get_global_size(1) * ow * sizeof(half), // dst_stride,
+        channels * get_local_size(1) * ow * sizeof(half), // size
+        0);
+}
+
  kernel void resample_nearest(__global const half* restrict src,
                               __global       half* restrict dst,
+                             __local        half* restrict local_src,
+                             __local        half* restrict local_dst,
                               int iw,
                               int ih,
                               float factor,
@@ -31,30 +139,35 @@ kernel void resample_nearest(__global const half* restrict src,
                               int oh,
                               int channels)
  {
-    int oy = min((int)get_global_id(0), oh-1);
-    int c = get_global_id(1);
-    int b = get_global_id(2);
-
-    float fx = 1.f / factor;
-    float fy = 1.f / factor;
-
-    __global const half* start_src = src + b * iw * ih * channels + iw * ih * c;
-    __global       half* start_dst = dst + b * ow * oh * channels + ow * oh * c;
-
-    for (int ox = 0; ox < ow; ox++)
-    {
-        float ix_r0 = ox*fx + fx / 2.0f - 0.5f;
-        float iy_r0 = oy*fy + fy / 2.0f - 0.5f;
+    interpolationCHW_nn(local_src, local_dst, ow, iw, channels, 1.0 / factor, 1.0 / factor);
+}
  
-        int ix_r1 = ROUND(ix_r0);
-        int iy_r1 = ROUND(iy_r0);
+#else // defined (USE_MANUAL_DMA)
  
-        ix_r1 = max(ix_r1, 0);
-        ix_r1 = min(ix_r1, iw - 1);
+kernel void resample_nearest(__global const half* restrict src,
+                             __global       half* restrict dst,
+                             __local        half* restrict local_src,
+                             __local        half* restrict local_dst,
+                             int iw,
+                             int ih,
+                             float factor,
+                             int ow,
+                             int oh,
+                             int channels)
+{
+    const float inv_factor = 1.0f / factor;
+    const int iy = out_to_in(get_global_id(1), inv_factor);
  
-        iy_r1 = max(iy_r1, 0);
-        iy_r1 = min(iy_r1, ih - 1);
+    __global half* dst_data = dst + get_global_id(1)*ow;
+    __global half* src_data = src + iy*iw;
  
-        start_dst[oy * ow + ox] = start_src[iy_r1 * iw + ix_r1];
+    for (int ox = 0; ox < ow; ++ox)
+    {
+        const int ix = out_to_in(ox, inv_factor);
+        for (int c = 0; c < channels; c++) {
+            dst_data[c*oh*ow + ox] = src_data[c*ih*iw + ix];
+        }
      }
  }
+
+#endif // defined (USE_MANUAL_DMA)
diff --git a/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl b/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl

index 621633c..26d310d 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl
+++ b/inference-engine/src/vpu/custom_kernels/resample_with_antialias.cl
@@ -1,31 +1,110 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
+#define USE_OPTIMIZED_ROUND
+
+#ifdef USE_OPTIMIZED_ROUND
+    #define ROUND(x)  ((int)((x) + 0.5f))
+#else
+    #define ROUND(x)  (int)(round(x))
+#endif
+
+
+inline int out_to_in(float ox, float f) {
+#ifdef USE_OPTIMIZED_ROUND
+    return (int)((ox + 0.5f) / f);
+#else
+    return ROUND((ox + 0.5f) / f - 0.5f);
+#endif
+}
+
  static inline float triangleCoeff(float x)
  {
-    return 1.0f - fabs(x);//fmax(0.0f, 1 - fabs(x));
+    return 1.0f - fabs(x);
  }
  
  static inline float4 triangleCoeff4(float4 x)
  {
-    return 1.0f - fabs(x);//fmax(0.0f, 1 - fabs(x));
+    return 1.0f - fabs(x);
+}
+
+static inline half triangleCoeffHalf(half x)
+{
+    return 1.0h - fabs(x);
+}
+
+static inline half4 triangleCoeffHalf4(half4 x)
+{
+    return 1.0h - fabs(x);
+}
+
+static inline half8 triangleCoeffHalf8(half8 x)
+{
+    return 1.0h - fabs(x);
+}
+
+#define USE_MANUAL_DMA
+
+#if defined (USE_MANUAL_DMA)
+
+__kernel void __dma_preload_resample_with_antialias(__global const half* restrict src,
+                                             __global       half* restrict _0,
+                                             __local        half* restrict local_src,
+                                             __local        half* restrict _1,
+                                             int iw,
+                                             int ih,
+                                             float factor,
+                                             int ow,
+                                             int oh,
+                                             int channels)
+{
+    const int r = (factor > 1.0f) ? 2 : ceil(1.0f / factor);
+    const int oy_first = get_group_id(1) * get_local_size(1);
+    const int oy_last  = (get_group_id(1) + 1) * get_local_size(1) - 1;
+    const int iy_first = max(out_to_in(oy_first, factor) - r, 0);
+    const int iy_last  = min(out_to_in(oy_last, factor) + r, ih - 1);
+    const int iy_size = iy_last - iy_first + 1;
+
+    WorkGroupDmaCreateStrideTransaction(
+        src + get_group_id(2)*get_local_size(2)*ih*iw + iy_first*iw, // src
+        local_src, // dst
+        iy_size * iw * sizeof(half), // src_width,
+        iy_size * iw * sizeof(half), // dst_width,
+        ih * iw * sizeof(half), // src_stride,
+        iy_size * iw * sizeof(half), // dst_stride,
+        get_local_size(2) * iy_size * iw * sizeof(half), // size
+        0);
+}
+
+__kernel void __dma_postwrite_resample_with_antialias(__global const half* restrict _0,
+                                               __global       half* restrict dst,
+                                               __local        half* restrict _1,
+                                               __local        half* restrict dst_local,
+                                               int iw,
+                                               int ih,
+                                               float factor,
+                                               int ow,
+                                               int oh,
+                                               int channels)
+{
+    WorkGroupDmaCreateStrideTransaction(
+        dst_local,  // src
+        dst + get_group_id(2)*get_local_size(2)*get_global_size(1)*ow + get_group_id(1)*get_local_size(1)*ow,  // dst
+        get_local_size(1) * ow * sizeof(half), // src_width,
+        get_local_size(1) * ow * sizeof(half), // dst_width,
+        get_local_size(1) * ow * sizeof(half), // src_stride,
+        get_global_size(1) * ow * sizeof(half), // dst_stride,
+        get_local_size(2) * get_local_size(1) * ow * sizeof(half), // size
+        0);
  }
  
  __kernel void resample_with_antialias(const __global half* restrict src,
                                        __global half* restrict dst,
+                                      __local half* restrict local_src,
+                                      __local half* restrict local_dst,
                                        int iw,
                                        int ih,
                                        float factor,
@@ -33,47 +112,110 @@ __kernel void resample_with_antialias(const __global half* restrict src,
                                        int oh,
                                        int channels)
  {
-    int oy = min((int)get_global_id(0), oh-1);
-    int c = get_global_id(1);
-    int b = get_global_id(2);
+    const int r = (factor > 1.0f) ? 2 : ceil(1.0f / factor);
+    const int oy_first = get_group_id(1) * get_local_size(1);
+    const int oy_last  = (get_group_id(1) + 1) * get_local_size(1) - 1;
+    const int iy_first = max(out_to_in(oy_first, factor) - r, 0);
+    const int iy_last  = min(out_to_in(oy_last, factor) + r, ih - 1);
+    const int iy_size = iy_last - iy_first + 1;
+    const int oy = get_global_id(1);
+    const float iy_f = ((oy + 0.5f) / factor - 0.5f) - iy_first;
+    const int iy = ROUND(iy_f);
+
+    __local half const *restrict start_src = local_src + iw * get_local_id(1) + iw * iy_size * get_local_id(2);
+    __local half       *restrict start_dst = local_dst + ow * get_local_id(1) + ow * get_local_size(1) * get_local_id(2);
+
+    for (int ox = 0; ox < ow; ox++)
+    {
+        const float ix_f = (float)((ox + 0.5f) / factor) - 0.5f;
+        const int ix_i = ROUND(ix_f);
+
+        float4 v_sum = 0.f;
+        float4 v_wsum = 0.f;
+        for (int y = 0; y < iy_size; y++)
+        {
+            float dy = iy_f - y;
+            int x = max(ix_i - r, 0);
+            int end_x = min(ix_i + r, iw - 1);
+
+            float4 dx;
+            for (int i = 0; i < 4; i++)
+                dx[i] = ix_f - x - i;
  
-    float fx = 1.f / factor;
-    float fy = 1.f / factor;
+            for (; x < end_x - 3; x += 4, dx -= 4)
+            {
+                float4 w = factor*triangleCoeff4(factor*dx) * factor*triangleCoeff(factor*dy);
+                float4 src_vec = { start_src[y*iw + x + 0],
+                                   start_src[y*iw + x + 1],
+                                   start_src[y*iw + x + 2],
+                                   start_src[y*iw + x + 3] };
+
+                v_sum += w * src_vec;
+                v_wsum += w;
+            }
+
+            for (; x <= end_x; x++)
+            {
+                float dx = ix_f - x;
+                float w = factor*triangleCoeff(factor*dx) * factor*triangleCoeff(factor*dy);
+
+                v_sum[0] += w * start_src[y*iw + x];
+                v_wsum[0] += w;
+            }
+        }
  
-    float ax = 1.0f / fx;
-    float ay = 1.0f / fy;
+        v_sum[0] = v_sum[0] + v_sum[1] + v_sum[2] + v_sum[3];
+        v_wsum[0] = v_wsum[0] + v_wsum[1] + v_wsum[2] + v_wsum[3];
  
-    int rx = (fx < 1.0f) ? 2 : ceil((1.0f)/ax);
-    int ry = (fy < 1.0f) ? 2 : ceil((1.0f)/ay);
+        start_dst[get_local_id(1)*ow + ox] = (!v_wsum[0]) ? 0.0f : (half)(v_sum[0] / v_wsum[0]);
+    }
+}
  
-    const __global half* restrict start_src = src + b * iw * ih * channels + iw * ih * c;
-    __global half* restrict start_dst = dst + b * ow * oh * channels + ow * oh * c;
+#else
  
-    float iy_r0 = oy*fy + fy / 2.0f - 0.5f;
-    int iy_r1 = (int)(round(iy_r0));
+__kernel void resample_with_antialias(const __global half* restrict src,
+                                      __global half* restrict dst,
+                                      __local half* restrict _0,
+                                      __local half* restrict _1,
+                                      int iw,
+                                      int ih,
+                                      float factor,
+                                      int ow,
+                                      int oh,
+                                      int channels)
+{
+    int oy = get_global_id(1);
+    int c = get_global_id(2);
+
+    int r = (factor > 1.0f) ? 2 : ceil((1.0f)/factor);
+
+    const __global half* restrict start_src = src + iw * ih * c;
+    __global half* restrict start_dst = dst + ow * oh * c;
+
+    float iy_f = (oy + 0.5) / factor - 0.5f;
+    int iy_i = ROUND(iy_f);
  
      for (int ox = 0; ox < ow; ox++)
      {
-        float ix_r0 = ox*fx + fx / 2.0f - 0.5f;
-        int ix_r1 = (int)(round(ix_r0));
+        float ix_f = (ox + 0.5) / factor - 0.5f;
+        int ix_i = ROUND(ix_f);
  
          float4 v_sum = 0.f;
          float4 v_wsum = 0.f;
  
-        for (int y = max(iy_r1 - ry, 0);
-            y <= min(iy_r1 + ry, (int)ih - 1); y++)
+        for (int y = max(iy_i - r, 0); y <= min(iy_i + r, (int)ih - 1); y++)
          {
-            float dy = iy_r0 - y;
-            int x = max(ix_r1 - rx, 0);
-            int end_x = min(ix_r1 + rx, (int)iw - 1);
+            float dy = iy_f - y;
+            int x = max(ix_i - r, 0);
+            int end_x = min(ix_i + r, (int)iw - 1);
  
              float4 dx;
              for (int i = 0; i < 4; i++)
-                dx[i] = ix_r0 - x - i;
+                dx[i] = ix_f - x - i;
  
              for (; x <= end_x - 3; x += 4, dx -= 4)
              {
-                float4 w = ax*triangleCoeff4(ax*dx) * ay*triangleCoeff(ay*dy);
+                float4 w = factor*triangleCoeff4(factor*dx) * factor*triangleCoeff(factor*dy);
                  float4 src_vec = { start_src[y*iw + x + 0],
                                     start_src[y*iw + x + 1],
                                     start_src[y*iw + x + 2],
@@ -85,8 +227,8 @@ __kernel void resample_with_antialias(const __global half* restrict src,
  
              for (; x <= end_x; x++)
              {
-                float dx = ix_r0 - x;
-                float w = ax*triangleCoeff(ax*dx) * ay*triangleCoeff(ay*dy);
+                float dx = ix_f - x;
+                float w = factor*triangleCoeff(factor*dx) * factor*triangleCoeff(factor*dy);
  
                  v_sum[0] += w * start_src[y*iw + x];
                  v_wsum[0] += w;
@@ -99,3 +241,5 @@ __kernel void resample_with_antialias(const __global half* restrict src,
          start_dst[oy*ow + ox] = (!v_wsum[0]) ? (half)0.0f : (half)(v_sum[0] / v_wsum[0]);
      }
  }
+
+#endif
diff --git a/inference-engine/src/vpu/custom_kernels/shuffle_channels.cl b/inference-engine/src/vpu/custom_kernels/shuffle_channels.cl

index 48afd02..237e26f 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/shuffle_channels.cl
+++ b/inference-engine/src/vpu/custom_kernels/shuffle_channels.cl
@@ -1,16 +1,6 @@
  // Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
  //
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
  
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
diff --git a/inference-engine/src/vpu/custom_kernels/st.cl b/inference-engine/src/vpu/custom_kernels/st.cl

new file mode 100644 (file)

index 0000000..bac1606
--- /dev/null
+++ b/inference-engine/src/vpu/custom_kernels/st.cl
@@ -0,0 +1,253 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define MAX_WIDTH 512
+#define MIN(a, b) ((a) < (b)) ? (a) : (b);
+
+__kernel void __dma_postwrite_ocl_st(__global half const *const restrict src_data,
+                                     __global half const *const restrict theta,
+                                     __global half       *const restrict dst_data,
+                                     int C,
+                                     int W,
+                                     __local half   const *const restrict local_dst)
+{
+    const int x0 = get_global_id(0) * MAX_WIDTH;
+    const int x1 = MIN(x0 + MAX_WIDTH, W);
+    const int length = x1 - x0;
+
+    WorkGroupDmaCreate3DTransaction(
+        local_dst, // src
+        dst_data + get_global_id(1) * W + x0, // dst
+        length * sizeof(half), // src width
+        length * sizeof(half), // dst width
+        length * sizeof(half), // src stride
+        W * sizeof(half), // dst stride
+        C, // num planes
+        get_local_size(1) * length * sizeof(half), // src plane stride
+        get_global_size(1) * W * sizeof(half), // dst plane stride
+        get_local_size(1) * length * sizeof(half), // plane size
+        0);
+}
+
+__attribute__((noinline))
+void calcInd(__global half const *const restrict theta,
+                      half       *const restrict weight,
+                      int        *const restrict ind,
+                      int y, int H, int x0, int length, int step, int W)
+{
+    float a = (float)y * 1.0f / H * 2 - 1;
+
+    int x = 0;
+
+    float8 va = (float8) {a, a, a, a, a, a, a, a};
+    float8 vxy = (float8) {x0 + 0, x0 + 1, x0 + 2, x0 + 3,
+                           x0 + 4, x0 + 5, x0 + 6, x0 + 7};
+
+    for (; x <= length - 8; x += 8, vxy += 8)
+    {
+        float8 va1 = vxy * 1.0f / W * 2 - 1.f;
+
+        float8 vx = (va * theta[0] + va1 * theta[1] + theta[2] + 1.f) / 2.f * H;
+        float8 vy = (va * theta[3] + va1 * theta[4] + theta[5] + 1.f) / 2.f * W;
+
+        const int8 ix = convert_int8(vx) - ((vx < 0) & 1);
+        const int8 iy = convert_int8(vy) - ((vy < 0) & 1);
+
+        float8 ax = vx - convert_float8(ix);
+        float8 ay = vy - convert_float8(iy);
+        float8 bx = 1.f - ax;
+        float8 by = 1.f - ay;
+
+        union {int8 d; uint8 i; } check_x;
+
+        check_x.d = ix;
+        int8 b01 = check_x.i < (uint8)H;
+
+        check_x.d = ix + 1;
+        int8 b45 = check_x.i < (uint8)H;
+
+        union {int8 d; uint8 i; } check_y;
+
+        check_y.d = iy;
+        int8 b23 = check_y.i < (uint8)W;
+
+        check_y.d = iy + 1;
+        int8 b67 = check_y.i < (uint8)W;
+
+        int8 b0123 = b01 & b23;
+        int8 b0167 = b01 & b67;
+        int8 b4523 = b45 & b23;
+        int8 b4567 = b45 & b67;
+
+        int8 TL_id = ((ix + 0) * W + (iy + 0)) * (b0123 & 1);
+        int8 BL_id = ((ix + 1) * W + (iy + 0)) * (b4523 & 1);
+        int8 TR_id = ((ix + 0) * W + (iy + 1)) * (b0167 & 1);
+        int8 BR_id = ((ix + 1) * W + (iy + 1)) * (b4567 & 1);
+
+        union {float8 f; int8 i;} w0; w0.f = bx * by;
+        union {float8 f; int8 i;} w1; w1.f = ax * by;
+        union {float8 f; int8 i;} w2; w2.f = bx * ay;
+        union {float8 f; int8 i;} w3; w3.f = ax * ay;
+
+        w0.i = w0.i & b0123;
+        w1.i = w1.i & b4523;
+        w2.i = w2.i & b0167;
+        w3.i = w3.i & b4567;
+
+        *((half8*)(weight + x + 0*step)) = convert_half8(w0.f);
+        *((half8*)(weight + x + 1*step)) = convert_half8(w1.f);
+        *((half8*)(weight + x + 2*step)) = convert_half8(w2.f);
+        *((half8*)(weight + x + 3*step)) = convert_half8(w3.f);
+
+        *((int8*)(ind + x + 0*step)) = TL_id;
+        *((int8*)(ind + x + 1*step)) = BL_id;
+        *((int8*)(ind + x + 2*step)) = TR_id;
+        *((int8*)(ind + x + 3*step)) = BR_id;
+    }
+
+    for (; x < length; x++)
+    {
+        float a1 = (float)(x0 + x) * 1.0f / W * 2 - 1;
+
+        float fx = (a * theta[0] + a1 * theta[1] + theta[2] + 1)/2 * H;
+        float fy = (a * theta[3] + a1 * theta[4] + theta[5] + 1)/2 * W;
+
+        const int ix = (int)(fx) - (fx < 0);
+        const int iy = (int)(fy) - (fy < 0);
+
+        float ax = fx - ix;
+        float ay = fy - iy;
+        float bx = 1 - ax;
+        float by = 1 - ay;
+
+        int b0 = ix >=  0;
+        int b4 = ix >= -1;
+        int b1 = ix  <  H;
+        int b5 = ix  <  H-1;
+
+        int b2 = iy >=  0;
+        int b6 = iy >= -1;
+        int b3 = iy  <  W;
+        int b7 = iy  <  W-1;
+
+        int b01 = b0 & b1;
+        int b23 = b2 & b3;
+        int b45 = b4 & b5;
+        int b67 = b6 & b7;
+
+        int b0123 = b01 & b23;
+        int b0167 = b01 & b67;
+        int b4523 = b45 & b23;
+        int b4567 = b45 & b67;
+
+        int TL_id = ((ix + 0) * W + (iy + 0)) * b0123;
+        int BL_id = ((ix + 1) * W + (iy + 0)) * b4523;
+        int TR_id = ((ix + 0) * W + (iy + 1)) * b0167;
+        int BR_id = ((ix + 1) * W + (iy + 1)) * b4567;
+
+        half w0 = bx*by*b0123;
+        half w1 = ax*by*b4523;
+        half w2 = bx*ay*b0167;
+        half w3 = ax*ay*b4567;
+
+        weight[x + 0*step] = w0;
+        weight[x + 1*step] = w1;
+        weight[x + 2*step] = w2;
+        weight[x + 3*step] = w3;
+
+        ind[x + 0*step] = TL_id;
+        ind[x + 1*step] = BL_id;
+        ind[x + 2*step] = TR_id;
+        ind[x + 3*step] = BR_id;
+    }
+}
+
+__attribute__((noinline))
+void apply(__global half const *const restrict src,
+                    half const *const restrict weight,
+                    int  const *const restrict ind,
+            __local half       *const restrict dst,
+                    int length,
+                    int step)
+{
+    int x = 0;
+    for(; x <= length - 8; x += 8)
+    {
+        int8 TL_id = *((int8*)(ind + x + 0*step));
+        int8 BL_id = *((int8*)(ind + x + 1*step));
+        int8 TR_id = *((int8*)(ind + x + 2*step));
+        int8 BR_id = *((int8*)(ind + x + 3*step));
+
+        half8 w00 = *((half8*)(weight + x + 0*step));
+        half8 w01 = *((half8*)(weight + x + 1*step));
+        half8 w02 = *((half8*)(weight + x + 2*step));
+        half8 w03 = *((half8*)(weight + x + 3*step));
+
+        half8 TL = (half8){src[TL_id[0]], src[TL_id[1]], src[TL_id[2]], src[TL_id[3]],
+                           src[TL_id[4]], src[TL_id[5]], src[TL_id[6]], src[TL_id[7]]};
+        half8 TR = (half8){src[TR_id[0]], src[TR_id[1]], src[TR_id[2]], src[TR_id[3]],
+                           src[TR_id[4]], src[TR_id[5]], src[TR_id[6]], src[TR_id[7]]};
+        half8 BL = (half8){src[BL_id[0]], src[BL_id[1]], src[BL_id[2]], src[BL_id[3]],
+                           src[BL_id[4]], src[BL_id[5]], src[BL_id[6]], src[BL_id[7]]};
+        half8 BR = (half8){src[BR_id[0]], src[BR_id[1]], src[BR_id[2]], src[BR_id[3]],
+                           src[BR_id[4]], src[BR_id[5]], src[BR_id[6]], src[BR_id[7]]};
+
+        half8 res = w00 * TL +  w01 * BL + w02 * TR + w03 * BR;
+
+        *((__local half8*)(dst + x)) = res;
+    }
+
+    for (; x < length; x++)
+    {
+        int TL_id = ind[x + 0*step];
+        int BL_id = ind[x + 1*step];
+        int TR_id = ind[x + 2*step];
+        int BR_id = ind[x + 3*step];
+
+        half w00 = weight[x + 0*step];
+        half w01 = weight[x + 1*step];
+        half w02 = weight[x + 2*step];
+        half w03 = weight[x + 3*step];
+
+        half TL = src[TL_id];
+        half TR = src[TR_id];
+        half BL = src[BL_id];
+        half BR = src[BR_id];
+
+        half res = w00 * TL + w01 * BL + w02 * TR + w03 * BR;
+        dst[x] = res;
+    }
+}
+
+__kernel void ocl_st(__global half const *const restrict src_data,
+                     __global half const *const restrict theta,
+                     __global half const *const restrict dst_data,
+                     int C,
+                     int W,
+                     __local half        *const restrict local_dst)
+{
+    int w = get_group_id(0);
+
+    int y = get_global_id(1);
+    int H = get_global_size(1);
+
+    __private int ind[4][MAX_WIDTH] __attribute__((aligned(16)));
+    __private half weight[4][MAX_WIDTH] __attribute__((aligned(16)));
+
+    const int x0 = w * MAX_WIDTH;
+    const int x1 = MIN(x0 + MAX_WIDTH, W);
+    const int length = x1 - x0;
+
+    calcInd(theta, weight, ind, y, H, x0, length, MAX_WIDTH, W);
+
+    for (int c = 0; c < C; c++)
+    {
+        __global half const *const restrict src = src_data + c*H*W;
+        __local  half       *const restrict dst = local_dst + c*get_local_size(1)*length + get_local_id(1)*length;
+
+        apply(src, weight, ind, dst, length, MAX_WIDTH);
+    }
+}
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_kernel.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_kernel.hpp

new file mode 100644 (file)

index 0000000..fd5e339
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_kernel.hpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <pugixml.hpp>
+#include <ie_common.h>
+
+#include <vpu/utils/enums.hpp>
+#include <vpu/utils/small_vector.hpp>
+
+namespace vpu {
+
+namespace ie = InferenceEngine;
+
+VPU_DECLARE_ENUM(CustomParamType,
+    Input,
+    Output,
+    Data,
+    LocalData,
+    InputBuffer,
+    OutputBuffer,
+    Int,
+    Float)
+
+VPU_DECLARE_ENUM(CustomDataFormat,
+                 BYXF = 0,  // NHWC used in most software layers
+                 BFYX = 1,  // NCHW used if HW module is enabled
+                 YXF = 2,   // HWC used in most software layers
+                 FYX = 3,   // CHW used if HW module is enabled
+                 BF = 4,    // NC layout
+                 Any = 5,   // doesn't really matter
+                 None = 6)
+
+VPU_DECLARE_ENUM(CustomDimSource, Input, Output)
+
+struct CustomKernel final {
+    struct KernelParam final {
+        CustomParamType type = CustomParamType::Input;
+        CustomDataFormat format = CustomDataFormat::Any;
+        std::string argName;
+        int portIndex = -1;
+        std::string irSource;
+        std::string bufferSizeRule;
+        CustomDimSource dimSource;
+        int dimIdx = -1;
+    };
+
+private:
+    std::string _configDir;
+    int _maxShaves = 0;
+    std::string _kernelBinary;
+    SmallVector<KernelParam> _kernelParams;
+    SmallVector<std::string> _globalGridSizeRules;
+    SmallVector<std::string> _localGridSizeRules;
+    SmallVector<std::string> _parameters;
+    int _kernelId = 0;
+
+    CustomDimSource _wgDimSource = CustomDimSource::Input;
+    int _wgDimIdx = -1;
+
+    int _inputDataCount = 0;
+
+public:
+    explicit CustomKernel(const pugi::xml_node& node, std::string configDir);
+
+    void processParametersNode(const pugi::xml_node& node);
+    void processWorkSizesNode(const pugi::xml_node& node);
+
+    int maxShaves() const { return _maxShaves; }
+    const std::string& kernelBinary() const { return _kernelBinary; }
+    SmallVector<KernelParam> bindings() const { return _kernelParams; }
+    SmallVector<std::string> globalGridSizeRules() const { return _globalGridSizeRules; }
+    SmallVector<std::string> localGridSizeRules() const { return _localGridSizeRules; }
+    SmallVector<std::string> parameters() const { return _parameters; }
+    int kernelId() const { return _kernelId; }
+    CustomDimSource dimSource() const { return _wgDimSource; }
+    int dimSourceIndex() const { return _wgDimIdx; }
+    int inputDataCount() const { return _inputDataCount; }
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_layer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_layer.hpp

index 3f1e90d..10b7b97 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_layer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/custom_layer.hpp
@@ -12,109 +12,45 @@
  #include <functional>
  
  #include <details/caseless.hpp>
-
  #include <pugixml.hpp>
  
  #include <vpu/utils/enums.hpp>
  #include <vpu/utils/small_vector.hpp>
+#include <vpu/frontend/custom_kernel.hpp>
+
+#include <ie_common.h>
  
  namespace vpu {
  
  namespace ie = InferenceEngine;
  
-VPU_DECLARE_ENUM(CustomDataFormat,
-    BYXF = 0,  // NHWC used in most software layers
-    BFYX = 1,  // NCHW used if HW module is enabled
-    YXF  = 2,  // HWC used in most software layers
-    FYX  = 3,  // CHW used if HW module is enabled
-    Any  = 4,  // doesn't really matter
-    None = 5
-)
-
-VPU_DECLARE_ENUM(CustomParamType,
-    Input,
-    Output,
-    Data,
-    LocalData,
-    InputBuffer,
-    OutputBuffer,
-    Int,
-    Float
-)
-
-VPU_DECLARE_ENUM(CustomDimSource,
-    Input,
-    Output
-)
-
  class CustomLayer final {
  public:
      using Ptr = std::shared_ptr<CustomLayer>;
+    explicit CustomLayer(std::string configDir, const pugi::xml_node& customLayer);
  
-    struct KernelParam final {
-        CustomParamType type = CustomParamType::Input;
-        CustomDataFormat format = CustomDataFormat::Any;
-        std::string argName;
-        int portIndex = -1;
-        std::string irSource;
-        SmallVector<std::string> bufferSizeRules;
-        CustomDimSource dimSource;
-        int dimIdx = -1;
-    };
+    std::vector<CustomKernel> kernels() const { return _kernels; }
+    std::string layerName() const { return _layerName; }
+    std::map<int, CustomDataFormat> inputs() { return _inputs; }
+    std::map<int, CustomDataFormat> outputs() { return _outputs; }
  
      static ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> loadFromFile(
                  const std::string& configFile,
                  bool canBeMissed = false);
  
-    const std::string& kernelBinary() const { return _kernelBinary; }
-
-    void setStageNumInputs(int id);
-    int stageNumInputs() const;
-    uint32_t kernelAddress(int idx = 1) const;
-    int kernelId() const;
-    int maxShaves() const;
-    const std::map<std::string, std::string>& whereParams() const { return _whereParams; }
-
-    const SmallVector<KernelParam>& bindings() const { return _kernelParams; }
-    const SmallVector<std::string>& parameters() const { return _parameters; }
-
-    const SmallVector<std::string>& globalSizeRules() const { return _globalSizeRules; }
-    const SmallVector<std::string>& localSizeRules() const { return _localSizeRules; }
-
-    CustomDimSource dimSource() const { return _wgDimSource; }
-    int dimSourceIndex() const { return _wgDimIdx; }
-
-private:
-    explicit CustomLayer(const std::string& dirname) : _configDir(dirname) {}
-
-    void loadSingleLayer(const pugi::xml_node& node);
-    void processWhere(const pugi::xml_node& node);
-    void processKernelNode(const pugi::xml_node& node);
-    void processParametersNode(const pugi::xml_node& node);
-    void processWorkSizesNode(const pugi::xml_node& node);
-
-    static bool isLegalSizeRule(const std::string& rule);
-    static CustomDataFormat formatFromString(const std::string& str);
+    bool meetsWhereRestrictions(const std::map<std::string, std::string>& params) const;
+    static bool isLegalSizeRule(const std::string& rule, std::map<std::string, std::string> layerParams);
+    static CustomDataFormat formatFromLayout(const InferenceEngine::Layout& layout);
  
  private:
      std::string _configDir;
      std::string _layerName;
-    std::string _kernelEntry;
-    std::string _kernelBinary;
      std::map<std::string, std::string> _whereParams;
  
-    int _maxShaves = 0;
-    int _stageNumInputs = -1;
-
-    SmallVector<KernelParam> _kernelParams;
-    SmallVector<std::string> _globalSizeRules;
-    SmallVector<std::string> _localSizeRules;
-    SmallVector<std::string> _parameters;
-
-    std::map<uint32_t, uint32_t, std::greater<uint32_t>> _kernelAddress;
+    std::vector<CustomKernel> _kernels;
  
-    CustomDimSource _wgDimSource = CustomDimSource::Input;
-    int _wgDimIdx = -1;
+    std::map<int, CustomDataFormat> _inputs;
+    std::map<int, CustomDataFormat> _outputs;
  };
  
  };  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp

index 76ee331..462ecc9 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
@@ -151,6 +151,8 @@ public:
      void parseExpTopKROIs(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
      void parseNonZero(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
      void parseROIAlign(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseOutShapeOfReshape(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseBroadcast(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
  
      //
      // Special layers
@@ -172,9 +174,11 @@ public:
      void parseLSTMCell(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
      void parseTensorIterator(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs);
  
-//
-// Utility
-//
+    //
+    // Utility
+    //
+
+    static CustomLayer::Ptr getSuitableCustomLayer(const std::vector<CustomLayer::Ptr>& customLayers, const ie::CNNLayerPtr&cnnLayer);
  
  private:
      Data getVpuData(const ie::DataPtr& ieData) const;
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp

index d3449cf..2692e67 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@@ -103,6 +103,7 @@ struct CompilationConfig final {
      bool dumpAllPasses;
  
      bool disableReorder = false;  // TODO: rename to enableReorder and switch logic.
+    bool disableConvertStages = false;
      bool enablePermuteMerging = true;
      bool enableReplWithSCRelu = false;
      bool enableReplaceWithReduceMean = true;
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp

index bd734a3..c8c26c5 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/pass_manager.hpp
@@ -110,6 +110,7 @@ public:
      Pass::Ptr splitHwDepthConv();
      Pass::Ptr splitHwConvAndPool();
      Pass::Ptr hwPadding();
+    Pass::Ptr splitLargeKernelConv();
  
      //
      // Batch support
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp

index b59bbfa..8c33d52 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
@@ -264,7 +264,7 @@ private:
      void serializeDescImpl(
              BlobSerializer& serializer,
              const DataDesc& storedDesc,
-            const DimValues& storedStrides) const;
+            const ShapeLocation& shapeLocation) const;
  
  private:
      inline DataNode() :
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp

index ea11234..4554fc6 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp
@@ -241,18 +241,25 @@ public:
          friend ModelObj;
      };
  
-    inline DataToDataEdgeHelper connectDataWithData() {
-        return DataToDataEdgeHelper(this);
-    }
-
      DataToShapeAllocation connectDataWithShape(
              const Data& parent,
              const Data& child);
  
-    void replaceParentData(
+    void replaceDataToShapeParent(
+            const DataToShapeAllocation& edge,
+            const Data& newParent);
+    void replaceDataToShapeChild(
+            const DataToShapeAllocation& edge,
+            const Data& newChild);
+
+    inline DataToDataEdgeHelper connectDataWithData() {
+        return DataToDataEdgeHelper(this);
+    }
+
+    void replaceDataToDataParent(
              const DataToDataAllocation& edge,
              const Data& newParent);
-    void replaceChildData(
+    void replaceDataToDataChild(
              const DataToDataAllocation& edge,
              const Data& newChild);
  
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp

index 0f6136f..76b5aa0 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp
@@ -53,7 +53,7 @@ VPU_DECLARE_ENUM(StageType,
      StubPriorBox,
      StubPriorBoxClustered,
  
-    Concat,
+    StubConcat,
      Split,
      Reshape,
      Expand,
@@ -119,7 +119,6 @@ VPU_DECLARE_ENUM(StageType,
      Pad = 71,
      Resample = 72,
      Upsampling = 73,
-    ArgMax = 74,
      Div = 75,
      Min = 76,
      Squared_diff = 77,
@@ -166,6 +165,9 @@ VPU_DECLARE_ENUM(StageType,
      ExpGenerateProposals = 124,
      ExpTopKROIs = 125,
      ScatterElementsUpdate = 126,
+    OutShapeOfReshape = 127,
+    Concat = 128,
+    Broadcast = 129,
  )
  
  //
@@ -241,6 +243,24 @@ VPU_DECLARE_ENUM(TopKOutputs,
      IndexOnly = 2)
  
  //
+// ConcatInferRequirement
+//
+
+// Requirement whether to infer Concat stage on the device side
+VPU_DECLARE_ENUM(ConcatInferRequirement,
+    NeedToInfer = 0,
+    CanBeReplaced = 1)
+
+//
+// ConcatInferRequirement
+//
+
+// Modes for Broadcast operation according to specification
+VPU_DECLARE_ENUM(BroadcastMode,
+    NUMPY = 0,
+    EXPLICIT = 1)
+
+//
  // StageDataInfo
  //
  
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp

index 06b7c95..253a995 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
@@ -57,6 +57,12 @@ DECLARE_VPU_CONFIG_KEY(DUMP_ALL_PASSES);
  DECLARE_VPU_CONFIG_KEY(DISABLE_REORDER);
  
  /**
+ * @brief Used to disable convert stages in tests to be able to insert
+ * convert layer with desired precision.
+ */
+DECLARE_VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES);
+
+/**
   * @brief Used to disable permute merging pass (with setting "NO") in tests to check it preserves behaviour. Default = "YES"
   */
  DECLARE_VPU_CONFIG_KEY(ENABLE_PERMUTE_MERGING);
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp

index 419179d..29fb772 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp
@@ -116,7 +116,8 @@ public:
              const ie::CNNLayerPtr& layer,
              Dim axis,
              const DataVector& inputs,
-            const Data& output);
+            const Data& output,
+            ConcatInferRequirement inferRequirement = ConcatInferRequirement::CanBeReplaced);
  
      Stage addConcatStage(
              const Model& model,
diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp

index 783fd0d..c533a14 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
@@ -132,33 +132,33 @@ void BackEnd::serializeConstData(const Model& model, const mv_blob_header& blobH
  
  void BackEnd::serializeConstShapes(const Model& model, const mv_blob_header& blobHdr, std::vector<char>& blob) {
      for (const auto& data : model->datas()) {
-        const auto serializeToBlob = [&data, &blob, &blobHdr](const BlobSerializer& serializer, int offset) {
-            std::copy_n(serializer.data(), data->desc().numDims() * sizeof(uint32_t), blob.data() + blobHdr.const_data_section_offset + offset);
-        };
-
          const auto dimsOrder = data->desc().dimsOrder();
          const auto storedPerm = dimsOrder.toPermutation();
  
-        const auto shapeLocation = data->shapeLocation();
-
-        if (shapeLocation.dimsLocation == Location::Blob) {
-            BlobSerializer dimsSerializer;
-            const auto dims = data->desc().dims();
+        const auto serializeToBlob = [&data, &blob, &blobHdr, &storedPerm](const DimValues& values, int offset) {
+            BlobSerializer serializer;
  
              for (const auto& d : storedPerm) {
-                dimsSerializer.append(checked_cast<uint32_t>(dims[d]));
+                serializer.append(checked_cast<uint32_t>(values[d]));
              }
-            serializeToBlob(dimsSerializer, shapeLocation.dimsOffset);
+
+            std::copy_n(serializer.data(), data->desc().numDims() * sizeof(uint32_t), blob.data() + blobHdr.const_data_section_offset + offset);
+        };
+
+        const auto shapeLocation = data->shapeLocation();
+
+        if (shapeLocation.dimsLocation == Location::Blob) {
+            serializeToBlob(data->desc().dims(), shapeLocation.dimsOffset);
+        } else if (data->usage() == DataUsage::Output) {
+            auto ioDimsUpperBoundOffset = data->attrs().get<int>("ioDimsUpperBoundOffset");
+            serializeToBlob(data->desc().dims(), ioDimsUpperBoundOffset);
          }
  
          if (shapeLocation.stridesLocation == Location::Blob) {
-            BlobSerializer stridesSerializer;
-            const auto strides = data->strides();
-
-            for (const auto& d : storedPerm) {
-                stridesSerializer.append(checked_cast<uint32_t>(strides[d]));
-            }
-            serializeToBlob(stridesSerializer, shapeLocation.stridesOffset);
+            serializeToBlob(data->strides(), shapeLocation.stridesOffset);
+        } else if (data->usage() == DataUsage::Output) {
+            auto ioStridesUpperBoundOffset = data->attrs().get<int>("ioStridesUpperBoundOffset");
+            serializeToBlob(data->strides(), ioStridesUpperBoundOffset);
          }
      }
  }
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp

new file mode 100644 (file)

index 0000000..c7ad6f6
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_kernel.cpp
@@ -0,0 +1,426 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/custom_kernel.hpp>
+#include <xml_parse_utils.h>
+#include <details/caseless.hpp>
+#include <vpu/utils/extra.hpp>
+
+namespace vpu {
+
+VPU_PACKED(Elf32Ehdr {
+    uint8_t  offs1[28];
+    uint32_t ePhoff;        // Program header offset
+    uint32_t eShoff;        // Section header offset
+    uint8_t  offs2[12];
+    uint16_t eShnum;        // Number of sections
+    uint16_t offs3;
+};)
+
+VPU_PACKED(Elf32Section {
+    uint32_t shName;
+    uint32_t shType;
+    uint32_t shFlags;
+    uint32_t shAddr;
+    uint32_t shOffset;
+    uint32_t shSize;
+    uint32_t shLink;
+    uint32_t shInfo;
+    uint32_t shAddralign;
+    uint32_t shEntsize;
+};)
+
+VPU_PACKED(Elf32Phdr {
+    uint32_t pType;       // Identifies program segment type
+    uint32_t pOffset;     // Segment file offset
+    uint32_t pVaddr;      // Segment virtual address
+    uint32_t pPaddr;      // Segment physical address
+    uint32_t pFilesz;     // Segment size in file
+    uint32_t pMemsz;      // Segment size in memory
+    uint32_t pFlags;      // Flags position from ELF standard spec
+    uint32_t pAlign;      // Segment alignment, file & memory
+};)
+
+VPU_PACKED(Elf32Sym {
+    uint32_t stName;
+    uint32_t stValue;
+    uint32_t stSize;
+    uint8_t  stInfo;
+    uint8_t  stOther;
+    uint16_t stShndx;
+};)
+
+VPU_PACKED(KernelHdr {
+    uint32_t address;       // Kernel address
+    uint32_t flags;         // Should be 0 for now
+    uint32_t sectionSize;   // Section size, offset to the next kernel
+    uint32_t argOffset;     // offset to arguments
+    uint32_t stackSize;     // Size of the stack required for kernel
+    uint32_t stackSizeWI;     // Size of the stack required for kernel per WI
+};)
+
+VPU_PACKED(KernelArgHdr {
+    uint32_t stringOffset;
+    uint32_t addressSpace;
+    uint32_t typeOffset;
+    uint32_t size;
+    uint32_t laneSize;
+};)
+
+std::pair<const Elf32Section*, const Elf32Section*> findSymbolTable(
+        const char* ELFData) {
+    const uint32_t SYMTAB = 2;  // Link editing symbol table
+    const uint32_t STRTAB = 3;  // A string table
+
+    IE_ASSERT(ELFData != nullptr);
+
+    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+    const Elf32Section* strShdr = nullptr;
+    const Elf32Section* symShdr = nullptr;
+    for (size_t i = 0; i < ehdr->eShnum; i++) {
+        if (shdr[i].shType == STRTAB && strShdr == nullptr) {
+            strShdr = &shdr[i];
+        } else if (shdr[i].shType == SYMTAB && symShdr == nullptr) {
+            symShdr = &shdr[i];
+        }
+
+        if (symShdr != nullptr && strShdr != nullptr)
+            break;
+    }
+    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+    return std::make_pair(strShdr, symShdr);
+}
+
+SmallVector<std::string> deduceKernelParameters(
+        const char* ELFData,
+        uint32_t kernelAddress) {
+    IE_ASSERT(ELFData != nullptr);
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+
+    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+    const Elf32Section* strShdr = nullptr;
+    const Elf32Section* symShdr = nullptr;
+    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+    auto firstStr = ELFData + strShdr->shOffset;
+
+    const char* kernelArgStrings = nullptr;
+    for (size_t i = 0; i < numSymEntries; i++) {
+        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
+            kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
+            break;
+        }
+    }
+    IE_ASSERT(kernelArgStrings != nullptr);
+
+    SmallVector<std::string> parameters;
+    for (size_t i = 0; i < numSymEntries; i++) {
+        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
+            auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
+            auto numKernels = *reinterpret_cast<const int*>(ptr);
+
+            auto metaOffset = sizeof(int);
+            for (int k = 0; k < numKernels; k++) {
+                auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+
+                if (kHdr->address-phdr->pVaddr == kernelAddress) {
+                    auto aHdr = reinterpret_cast<const KernelArgHdr*>(
+                        reinterpret_cast<const char*>(&(kHdr->argOffset)) + sizeof(kHdr->argOffset) + kHdr->argOffset);
+
+                    auto numArgs = reinterpret_cast<const int*>(aHdr)[-1];
+                    for (int n = 0; n < numArgs; n++, aHdr++) {
+                        parameters.push_back(kernelArgStrings + aHdr->stringOffset);
+                    }
+
+                    break;
+                }
+
+                metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
+            }
+        }
+    }
+
+    return parameters;
+}
+
+int32_t getKernelId(
+        const char* ELFData,
+        uint32_t kernelAddress) {
+    IE_ASSERT(ELFData != nullptr);
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+
+    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
+
+    const Elf32Section* strShdr = nullptr;
+    const Elf32Section* symShdr = nullptr;
+    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+    auto firstStr = ELFData + strShdr->shOffset;
+
+    const char* kernelArgStrings = nullptr;
+    for (size_t i = 0; i < numSymEntries; i++) {
+        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
+            kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
+            break;
+        }
+    }
+    IE_ASSERT(kernelArgStrings != nullptr);
+
+    for (size_t i = 0; i < numSymEntries; i++) {
+        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
+            auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
+            auto numKernels = *reinterpret_cast<const int*>(ptr);
+
+            auto metaOffset = sizeof(int);
+            for (int k = 0; k < numKernels; k++) {
+                auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+
+                if (kHdr->address-phdr->pVaddr == kernelAddress) {
+                    return k;
+                }
+
+                metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
+            }
+        }
+    }
+
+    return -1;
+}
+
+uint32_t getKernelEntry(const char* ELFData, const std::string& kernelName) {
+    IE_ASSERT(ELFData != nullptr);
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+
+    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
+    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
+
+    const Elf32Section* strShdr = nullptr;
+    const Elf32Section* symShdr = nullptr;
+    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
+    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
+
+    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
+    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
+    auto firstStr = ELFData + strShdr->shOffset;
+
+    for (size_t i = 0; i < numSymEntries; i++) {
+        if (cmp(firstStr + sym[i].stName, kernelName)) {
+            return sym[i].stValue - phdr->pVaddr;
+        }
+    }
+
+    THROW_IE_EXCEPTION << "Cannot find kernel entry point for custom kernel " << kernelName;
+}
+
+CustomKernel::CustomKernel(const pugi::xml_node& kernel, std::string configDir): _configDir {std::move(configDir)} {
+    _maxShaves = XMLParseUtils::GetIntAttr(kernel, "max-shaves", 0);
+
+    for (auto source = kernel.child("Source"); !source.empty(); source = source.next_sibling("Source")) {
+        auto fileName = _configDir + "/" + XMLParseUtils::GetStrAttr(source, "filename", "");
+
+        std::ifstream inputFile(fileName, std::ios::binary);
+        if (!inputFile.is_open()) {
+            THROW_IE_EXCEPTION << "Couldn't open kernel file " << fileName;
+        }
+
+        std::ostringstream contentStream;
+        contentStream << inputFile.rdbuf();
+        _kernelBinary.append(contentStream.str());
+    }
+
+    const auto kernelEntryName = XMLParseUtils::GetStrAttr(kernel, "entry");
+    const auto kernelEntry = getKernelEntry(&_kernelBinary[0], kernelEntryName);
+    _parameters = deduceKernelParameters(&_kernelBinary[0], kernelEntry);
+    _kernelId = getKernelId(&_kernelBinary[0], kernelEntry);
+
+    processParametersNode(kernel);
+    processWorkSizesNode(kernel);
+
+    const auto isInputData = [&](const CustomKernel::KernelParam& param) {
+        return param.type == CustomParamType::Input || param.type == CustomParamType::InputBuffer ||
+               param.type == CustomParamType::Data;
+    };
+
+    _inputDataCount = std::count_if(begin(_kernelParams), end(_kernelParams), isInputData);
+}
+
+std::pair<CustomDimSource, int> parseDimSource(const std::string& dims) {
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+    const auto pos = dims.find_first_of(',');
+    const auto source = dims.substr(0, pos);
+    const auto dimSource = [&] {
+        if (cmp(source, "input")) {
+            return CustomDimSource::Input;
+        } else if (cmp(source, "output")) {
+            return CustomDimSource::Output;
+        } else {
+            THROW_IE_EXCEPTION << "Invalid dim source argument" << source;
+        }
+    }();
+
+    const auto idx = [&] {
+        if (pos == std::string::npos) {
+            return -1;
+        }
+        const auto idxString = dims.substr(pos + 1, std::string::npos);
+        return std::stoi(idxString);
+    }();
+
+    return std::make_pair(dimSource, idx);
+}
+
+
+CustomDataFormat formatFromString(const std::string& str) {
+    static const ie::details::caseless_map<std::string, CustomDataFormat> FormatNameToType = {
+        { "BFYX" , CustomDataFormat::BFYX },
+        { "BYXF" , CustomDataFormat::BYXF },
+        { "FYX" , CustomDataFormat::FYX },
+        { "YXF" , CustomDataFormat::YXF },
+        { "BF" , CustomDataFormat::BF },
+        { "ANY"  , CustomDataFormat::Any }
+    };
+
+    auto it = FormatNameToType.find(str);
+    if (it != FormatNameToType.end()) {
+        return it->second;
+    }
+
+    THROW_IE_EXCEPTION << "Tensor node has an invalid format '" << str << "'";
+}
+
+SmallVector<std::string> parseSizeRule(const std::string& size) {
+    auto result = SmallVector<std::string>();
+    result.reserve(std::count(begin(size), end(size), ',') + 1);
+    std::stringstream sizeRules{size};
+    std::string bufferSize;
+
+    while (std::getline(sizeRules, bufferSize, ',')) {
+        result.push_back(bufferSize);
+    }
+
+    return result;
+}
+
+void CustomKernel::processParametersNode(const pugi::xml_node& node) {
+    const auto cmp = ie::details::CaselessEq<std::string> {};
+    const auto parameters = node.child("Parameters");
+
+    for (auto tensor = parameters.child("Tensor"); !tensor.empty(); tensor = tensor.next_sibling("Tensor")) {
+        KernelParam kp;
+
+        auto typeStr = XMLParseUtils::GetStrAttr(tensor, "type");
+        if (cmp(typeStr, "input")) {
+            kp.type = CustomParamType::Input;
+        } else if (cmp(typeStr, "output")) {
+            kp.type = CustomParamType::Output;
+        } else if (cmp(typeStr, "input_buffer")) {
+            kp.type = CustomParamType::InputBuffer;
+        } else if (cmp(typeStr, "output_buffer")) {
+            kp.type = CustomParamType::OutputBuffer;
+        } else if (cmp(typeStr, "data")) {
+            kp.type = CustomParamType::Data;
+        } else {
+            THROW_IE_EXCEPTION << "Tensor node has an invalid type '" << typeStr << "'";
+        }
+
+        if (kp.type == CustomParamType::InputBuffer || kp.type == CustomParamType::OutputBuffer) {
+            const auto sizeRule = XMLParseUtils::GetStrAttr(tensor, "size");
+            kp.bufferSizeRule = parseSizeRule(sizeRule)[0];
+
+            const auto dimString = XMLParseUtils::GetStrAttr(tensor, "dim");
+            std::tie(kp.dimSource, kp.dimIdx) = parseDimSource(dimString);
+        }
+
+        kp.format = formatFromString(XMLParseUtils::GetStrAttr(tensor, "format", "BFYX"));
+        kp.argName = XMLParseUtils::GetStrAttr(tensor, "arg-name");
+        kp.portIndex = XMLParseUtils::GetIntAttr(tensor, "port-index");
+
+        _kernelParams.push_back(std::move(kp));
+    }
+
+    for (auto data = parameters.child("Data"); !data.empty(); data = data.next_sibling("Data")) {
+        KernelParam kp;
+
+        auto typeStr = XMLParseUtils::GetStrAttr(data, "type");
+        if (cmp(typeStr, "data")) {
+            kp.type = CustomParamType::Data;
+        } else if (cmp(typeStr, "local_data")) {
+            kp.type = CustomParamType::LocalData;
+        } else {
+            THROW_IE_EXCEPTION << "Data node has an invalid type '" << typeStr << "'";
+        }
+
+        kp.argName = XMLParseUtils::GetStrAttr(data, "arg-name");
+
+        kp.irSource = XMLParseUtils::GetStrAttr(data, "source", "");
+        const auto dimString = XMLParseUtils::GetStrAttr(data, "dim", "");
+
+        if (kp.irSource.empty() && dimString.empty()) {
+            THROW_IE_EXCEPTION << "Data node has no source or dim";
+        }
+
+        if (!kp.irSource.empty() && !dimString.empty()) {
+            THROW_IE_EXCEPTION << "Data node can only have source or dim";
+        }
+
+        if (kp.type == CustomParamType::LocalData) {
+            const auto bufferSize = XMLParseUtils::GetStrAttr(data, "size", "");
+            kp.bufferSizeRule = bufferSize;
+
+            if (!dimString.empty()) {
+                std::tie(kp.dimSource, kp.dimIdx) = parseDimSource(dimString);
+            }
+        }
+
+        _kernelParams.push_back(std::move(kp));
+    }
+
+    for (auto scalar = parameters.child("Scalar"); !scalar.empty(); scalar = scalar.next_sibling("Scalar")) {
+        KernelParam kp;
+
+        const auto type = XMLParseUtils::GetStrAttr(scalar, "type");
+        if (cmp(type, "int")) {
+            kp.type = CustomParamType::Int;
+        } else if (cmp(type, "float")) {
+            kp.type = CustomParamType::Float;
+        } else {
+            THROW_IE_EXCEPTION << "Scalar node has an invalid type " << type;
+        }
+
+        kp.argName = XMLParseUtils::GetStrAttr(scalar, "arg-name");
+        kp.portIndex = XMLParseUtils::GetIntAttr(scalar, "port-index", -1);
+        kp.irSource = XMLParseUtils::GetStrAttr(scalar, "source", "");
+
+        _kernelParams.push_back(std::move(kp));
+    }
+}
+
+void CustomKernel::processWorkSizesNode(const pugi::xml_node& node) {
+    const auto workSizes = node.child("WorkSizes");
+
+    const auto dims = XMLParseUtils::GetStrAttr(workSizes, "dim");
+    std::tie(_wgDimSource, _wgDimIdx) = parseDimSource(dims);
+
+    const auto gwgs = XMLParseUtils::GetStrAttr(workSizes, "global");
+    _globalGridSizeRules = parseSizeRule(gwgs);
+
+    const auto lwgs = XMLParseUtils::GetStrAttr(workSizes, "local");
+    _localGridSizeRules = parseSizeRule(lwgs);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp

index 6feed77..ee1aa84 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/custom_layer.cpp
@@ -3,6 +3,7 @@
  //
  
  #include <vpu/frontend/custom_layer.hpp>
+#include <vpu/utils/numeric.hpp>
  
  #include <climits>
  
@@ -11,7 +12,6 @@
  #include <streambuf>
  #include <tuple>
  #include <utility>
-#include <memory>
  #include <string>
  #include <vector>
  
@@ -31,289 +31,51 @@
  
  #include <vpu/utils/simple_math.hpp>
  #include <vpu/utils/error.hpp>
-#include <vpu/utils/extra.hpp>
+#include <cstring>
  
  namespace vpu {
  
  namespace {
  
-VPU_PACKED(Elf32Ehdr {
-    uint8_t  offs1[28];
-    uint32_t ePhoff;        // Program header offset
-    uint32_t eShoff;        // Section header offset
-    uint8_t  offs2[12];
-    uint16_t eShnum;        // Number of sections
-    uint16_t offs3;
-};)
-
-VPU_PACKED(Elf32Section {
-    uint32_t shName;
-    uint32_t shType;
-    uint32_t shFlags;
-    uint32_t shAddr;
-    uint32_t shOffset;
-    uint32_t shSize;
-    uint32_t shLink;
-    uint32_t shInfo;
-    uint32_t shAddralign;
-    uint32_t shEntsize;
-};)
-
-VPU_PACKED(Elf32Phdr {
-    uint32_t pType;       // Identifies program segment type
-    uint32_t pOffset;     // Segment file offset
-    uint32_t pVaddr;      // Segment virtual address
-    uint32_t pPaddr;      // Segment physical address
-    uint32_t pFilesz;     // Segment size in file
-    uint32_t pMemsz;      // Segment size in memory
-    uint32_t pFlags;      // Flags position from ELF standard spec
-    uint32_t pAlign;      // Segment alignment, file & memory
-};)
-
-VPU_PACKED(Elf32Sym {
-    uint32_t stName;
-    uint32_t stValue;
-    uint32_t stSize;
-    uint8_t  stInfo;
-    uint8_t  stOther;
-    uint16_t stShndx;
-};)
-
-VPU_PACKED(KernelHdr {
-    uint32_t address;       // Kernel address
-    uint32_t flags;         // Should be 0 for now
-    uint32_t sectionSize;   // Section size, offset to the next kernel
-    uint32_t argOffset;     // offset to arguments
-    uint32_t stackSize;     // Size of the stack required for kernel
-    uint32_t stackSizeWI;     // Size of the stack required for kernel per WI
-};)
-
-VPU_PACKED(KernelArgHdr {
-    uint32_t stringOffset;
-    uint32_t addressSpace;
-    uint32_t typeOffset;
-    uint32_t size;
-    uint32_t laneSize;
-};)
-
-enum Flags {
-  CL_Vecz          = 0x01,
-  CL_Unrolled      = 0x02,
-  CL_Predicated    = 0x04,
-  CL_Dma           = 0x08,
-  CL_VeczDma       = 0x10
-};
-
-std::pair<const Elf32Section*, const Elf32Section*> findSymbolTable(
-        const char* ELFData) {
-    const uint32_t SYMTAB = 2;  // Link editing symbol table
-    const uint32_t STRTAB = 3;  // A string table
-
-    IE_ASSERT(ELFData != nullptr);
-
-    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
-    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
-    const Elf32Section* strShdr = nullptr;
-    const Elf32Section* symShdr = nullptr;
-    for (size_t i = 0; i < ehdr->eShnum; i++) {
-        if (shdr[i].shType == STRTAB && strShdr == nullptr) {
-            strShdr = &shdr[i];
-        } else if (shdr[i].shType == SYMTAB && symShdr == nullptr) {
-            symShdr = &shdr[i];
-        }
-
-        if (symShdr != nullptr && strShdr != nullptr)
-            break;
-    }
-    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
-    return std::make_pair(strShdr, symShdr);
-}
-
-uint32_t getKernelEntry(const char* ELFData, const std::string& kernelName) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    IE_ASSERT(ELFData != nullptr);
-
-    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
-    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
-
-    const Elf32Section* strShdr = nullptr;
-    const Elf32Section* symShdr = nullptr;
-    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
-    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
-    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
-    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
-    auto firstStr = ELFData + strShdr->shOffset;
-
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, kernelName)) {
-            return sym[i].stValue - phdr->pVaddr;
-        }
+void assertExactlyOneOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
+    for (const auto &name : childs) {
+        const auto& child = node.child(name.c_str());
+        VPU_THROW_UNLESS(!child.empty(), "Required parameter %s is not found", name);
+        VPU_THROW_UNLESS(child.next_sibling(name.c_str()).empty(),
+            "Found several definitions of the parameter %s", name);
      }
-
-    VPU_THROW_EXCEPTION << "Cannot find kernel entry point for custom kernel " << kernelName;
  }
  
-SmallVector<std::string> deduceKernelParameters(
-        const char* ELFData,
-        uint32_t kernelAddress) {
-    ie::details::CaselessEq<std::string> cmp;
-    IE_ASSERT(ELFData != nullptr);
-
-    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
-    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
-    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
-    const Elf32Section* strShdr = nullptr;
-    const Elf32Section* symShdr = nullptr;
-    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
-    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
-    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
-    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
-    auto firstStr = ELFData + strShdr->shOffset;
-
-    const char* kernelArgStrings = nullptr;
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
-            kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
-            break;
-        }
-    }
-    IE_ASSERT(kernelArgStrings != nullptr);
-
-    SmallVector<std::string> parameters;
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
-            auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
-            auto numKernels = *reinterpret_cast<const int*>(ptr);
-
-            auto metaOffset = sizeof(int);
-            for (int k = 0; k < numKernels; k++) {
-                auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
-
-                if (kHdr->address-phdr->pVaddr == kernelAddress) {
-                    auto aHdr = reinterpret_cast<const KernelArgHdr*>(
-                        reinterpret_cast<const char*>(&(kHdr->argOffset)) + sizeof(kHdr->argOffset) + kHdr->argOffset);
-
-                    auto numArgs = reinterpret_cast<const int*>(aHdr)[-1];
-                    for (int n = 0; n < numArgs; n++, aHdr++) {
-                        parameters.push_back(kernelArgStrings + aHdr->stringOffset);
-                    }
-
-                    break;
-                }
-
-                metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
-            }
-        }
+void assertOneOrMoreOccurrence(const pugi::xml_node &node, const SmallVector<std::string>& childs) {
+    for (const auto& name : childs) {
+        const auto& child = node.child(name.c_str());
+        VPU_THROW_UNLESS(!child.empty(),
+            "Required parameter %s is not found", name);
      }
-
-    return parameters;
  }
  
-std::pair<uint32_t, uint32_t> deduceVectorized(
-        const char* ELFData,
-        uint32_t kernelAddress) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    IE_ASSERT(ELFData != nullptr);
-
-    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
-    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
-    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
-    const Elf32Section* strShdr = nullptr;
-    const Elf32Section* symShdr = nullptr;
-    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
-    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
-    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
-    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
-    auto firstStr = ELFData + strShdr->shOffset;
-
-    const char* kernelArgStrings = nullptr;
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
-            kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
-            break;
-        }
-    }
-    IE_ASSERT(kernelArgStrings != nullptr);
-
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
-            auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
-            auto numKernels = *reinterpret_cast<const int*>(ptr);
-
-            auto metaOffset = sizeof(int);
-            for (int k = 0; k < numKernels; k++) {
-                auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
-
-                if (kHdr->address-phdr->pVaddr == kernelAddress && kHdr->flags == 1) {
-                    auto vecInfo = reinterpret_cast<const uint32_t*>(kHdr + 1);
-                    return std::make_pair(vecInfo[1], vecInfo[0]-phdr->pVaddr);
-                }
-
-                metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
-            }
-        }
+void assertZeroOrOneOccurrence(const pugi::xml_node& node, const SmallVector<std::string>& childNames) {
+    for (const auto& name : childNames) {
+        const auto& child = node.child(name.c_str());
+        VPU_THROW_UNLESS(!child.empty() || child.next_sibling(name.c_str()).empty(),
+            "Found several definitions of the parameter %s", name);
      }
-
-    return std::make_pair(0, 0);
  }
  
-int32_t getKernelId(
-        const char* ELFData,
-        uint32_t kernelAddress) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    IE_ASSERT(ELFData != nullptr);
-
-    auto ehdr = reinterpret_cast<const Elf32Ehdr*>(ELFData);
-    auto phdr = reinterpret_cast<const Elf32Phdr*>(ELFData + ehdr->ePhoff);
-    auto shdr = reinterpret_cast<const Elf32Section*>(ELFData + ehdr->eShoff);
-
-    const Elf32Section* strShdr = nullptr;
-    const Elf32Section* symShdr = nullptr;
-    std::tie(strShdr, symShdr) = findSymbolTable(ELFData);
-    IE_ASSERT(symShdr != nullptr && strShdr != nullptr);
-
-    auto numSymEntries = symShdr->shSize / symShdr->shEntsize;
-    auto sym = reinterpret_cast<const Elf32Sym*>(ELFData + symShdr->shOffset);
-    auto firstStr = ELFData + strShdr->shOffset;
-
-    const char* kernelArgStrings = nullptr;
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.strings")) {
-            kernelArgStrings = ELFData + shdr[sym[i].stShndx].shOffset;
-            break;
+void assertNoEmptyAttributes(const pugi::xml_node& customLayer) {
+    const auto checkAttributes = [&customLayer](const pugi::xml_node& node) {
+        for (const auto& attr : node.attributes()) {
+            VPU_THROW_UNLESS(strlen(attr.value()) != 0,
+                "Wrong custom layer XML: Custom layer %s has node <%s> with an empty attribute %s",
+                customLayer.attribute("name").value(), node.name(), attr.name());
          }
-    }
-    IE_ASSERT(kernelArgStrings != nullptr);
-
-    for (size_t i = 0; i < numSymEntries; i++) {
-        if (cmp(firstStr + sym[i].stName, "opencl.kernelArgs.info")) {
-            auto ptr = ELFData + shdr[sym[i].stShndx].shOffset;
-            auto numKernels = *reinterpret_cast<const int*>(ptr);
-
-            auto metaOffset = sizeof(int);
-            for (int k = 0; k < numKernels; k++) {
-                auto kHdr = reinterpret_cast<const KernelHdr*>(ptr + metaOffset);
+    };
  
-                if (kHdr->address-phdr->pVaddr == kernelAddress) {
-                    return k;
-                }
+    checkAttributes(customLayer);
  
-                metaOffset += kHdr->sectionSize + sizeof(kHdr->address) + sizeof(kHdr->flags);
-            }
-        }
+    for (const auto& child : customLayer.children()) {
+        assertNoEmptyAttributes(child);
      }
-
-    return -1;
  }
  
  }  // namespace
@@ -321,20 +83,16 @@ int32_t getKernelId(
  ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> CustomLayer::loadFromFile(
          const std::string& configFile,
          bool canBeMissed) {
-    ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> out;
-
      pugi::xml_document xmlDoc;
      pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
  
      if (res.status != pugi::status_ok) {
          if (canBeMissed) {
              // Config file might not exist - like global config, for example.
-            return out;
+            return {};
          } else {
-            VPU_THROW_EXCEPTION
-                << "Failed to load custom layer configuration file " << configFile
-                << " : " << res.description()
-                << " at offset " << res.offset;
+            VPU_THROW_FORMAT("Failed to load custom layer configuration file %s : %s at offset %s",
+                configFile, res.description(), res.offset);
          }
      }
  
@@ -346,18 +104,15 @@ ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> CustomLaye
      auto abs_path_ptr = realpath(configFile.c_str(), path);
  #endif
  
-    if (abs_path_ptr == nullptr) {
-        VPU_THROW_EXCEPTION
-            << "Failed to load custom layer configuration file " << configFile
-            << " : can't get canonicalized absolute path";
-    }
+    VPU_THROW_UNLESS(abs_path_ptr != nullptr,
+        "Failed to load custom layer configuration file %s : can't get canonicalized absolute path", configFile);
  
      std::string abs_file_name(path);
  
      // Try extracting directory from config path.
      auto dir_split_pos = abs_file_name.find_last_of("/\\");
-    auto colon_pos = abs_file_name.find_first_of(":");
-    auto first_slash_pos = abs_file_name.find_first_of("/");
+    auto colon_pos = abs_file_name.find_first_of(':');
+    auto first_slash_pos = abs_file_name.find_first_of('/');
  
      // If path is absolute.
      std::string dir_path;
@@ -369,411 +124,113 @@ ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> CustomLaye
              << " : path is not valid";
      }
  
+    auto out = ie::details::caseless_map<std::string, std::vector<CustomLayer::Ptr>> {};
      for (auto r = xmlDoc.document_element(); r; r = r.next_sibling()) {
-        CustomLayer::Ptr layer(new CustomLayer(dir_path));
-
-        layer->loadSingleLayer(r);
-
-        out[layer->_layerName].push_back(layer);
+        auto layerPtr = std::make_shared<CustomLayer>(dir_path, r);
+        out[layerPtr->_layerName].push_back(std::move(layerPtr));
      }
  
      return out;
  }
  
-int CustomLayer::maxShaves() const {
-    return _maxShaves;
-}
-
-void CustomLayer::setStageNumInputs(int id) {
-    _stageNumInputs = id;
-}
-
-int CustomLayer::stageNumInputs() const {
-    return _stageNumInputs;
-}
-
-uint32_t CustomLayer::kernelAddress(int idx) const {
-    for (const auto& x : _kernelAddress) {
-        if ((checked_cast<uint32_t>(idx) % x.first) == 0) {
-            return x.second;
-        }
-    }
-
-    auto it = _kernelAddress.find(1);
-    IE_ASSERT(it != _kernelAddress.end());
-
-    return it->second;
-}
-
-int CustomLayer::kernelId() const {
-    uint32_t kernelAddress = getKernelEntry(&_kernelBinary[0], _kernelEntry);
-    return getKernelId(&_kernelBinary[0], kernelAddress);
-}
-
-void CustomLayer::loadSingleLayer(const pugi::xml_node& node) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    std::string nodeName(node.name());
-    if (!cmp(nodeName, "CustomLayer")) {
-        VPU_THROW_EXCEPTION << "Wrong custom layer XML : Node is not CustomLayer, but " << nodeName;
-    }
-
-    auto nodeType = XMLParseUtils::GetStrAttr(node, "type", "");
-    if (!cmp(nodeType, "MVCL")) {
-        VPU_THROW_EXCEPTION << "Wrong custom layer XML : Type is not MVCL, but " << nodeType;
-    }
-
-    auto version = XMLParseUtils::GetIntAttr(node, "version", -1);
-    IE_ASSERT(version == 1);
-
-    auto layerStage = XMLParseUtils::GetStrAttr(node, "stage", "");
-    auto layerName = XMLParseUtils::GetStrAttr(node, "name", "");
-    if (layerName.empty()) {
-        VPU_THROW_EXCEPTION << "Missing Layer name in CustomLayer";
-    }
-    _layerName = layerStage.empty() ? layerName : layerName + "@stage_" + layerStage;
-
-    _maxShaves = XMLParseUtils::GetIntAttr(node, "max-shaves", 0);
-
-    processWhere(node.child("Where"));
-
-    processKernelNode(node.child("Kernel"));
-
-    processParametersNode(node.child("Parameters"));
-
-    processWorkSizesNode(node.child("WorkSizes"));
-}
-
-void CustomLayer::processWhere(const pugi::xml_node& node) {
-    for (auto child : node.attributes()) {
-        _whereParams[child.name()] = child.value();
-    }
-}
-
-void CustomLayer::processKernelNode(const pugi::xml_node& node) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    std::string nodeName(node.name());
-    if (!cmp(nodeName, "Kernel")) {
-        VPU_THROW_EXCEPTION << "Wrong node, expected Kernel found " << nodeName;
-    }
-
-    if (!_kernelBinary.empty()) {
-        VPU_THROW_EXCEPTION << "Multiple definition of Kernel";
-    }
-
-    _kernelEntry = XMLParseUtils::GetStrAttr(node, "entry", "");
-    if (_kernelEntry.empty()) {
-        VPU_THROW_EXCEPTION << "No Kernel entry in custom layer";
-    }
-
-    _kernelBinary.clear();
-    for (auto sourceNode = node.child("Source"); !sourceNode.empty(); sourceNode = sourceNode.next_sibling("Source")) {
-        auto fileName = _configDir + "/" + XMLParseUtils::GetStrAttr(sourceNode, "filename", "");
-
-        std::ifstream inputFile(fileName, std::ios::binary);
-        if (!inputFile.is_open()) {
-            VPU_THROW_EXCEPTION << "Couldn't open kernel file " << fileName;
-        }
  
-        std::ostringstream contentStream;
-        contentStream << inputFile.rdbuf();
-        _kernelBinary.append(contentStream.str());
-    }
+CustomLayer::CustomLayer(std::string configDir, const pugi::xml_node& customLayer) : _configDir(std::move(configDir)) {
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+    const auto nodeName = customLayer.name();
+    VPU_THROW_UNLESS(cmp(nodeName, "CustomLayer"),
+        "Wrong custom layer XML : Node is not CustomLayer, but %s",  nodeName);
  
-    _kernelAddress[1] = getKernelEntry(&_kernelBinary[0], _kernelEntry);
+    const auto nodeType = XMLParseUtils::GetStrAttr(customLayer, "type");
+    VPU_THROW_UNLESS(cmp(nodeType, "MVCL"),
+        "Wrong custom layer XML : Type is not MVCL, but %s", nodeType);
  
-    _parameters = deduceKernelParameters(&_kernelBinary[0], _kernelAddress[1]);
+    const auto version = XMLParseUtils::GetIntAttr(customLayer, "version");
+    VPU_THROW_UNLESS(version == 1, "Wrong custom layer XML : only version 1 is supported");
  
-    auto vecInfo = deduceVectorized(&_kernelBinary[0], _kernelAddress[1]);
-    if (vecInfo.first != 0) {
-        _kernelAddress[vecInfo.first] = vecInfo.second;
-    }
-}
+    _layerName = XMLParseUtils::GetStrAttr(customLayer, "name");
  
-void CustomLayer::processParametersNode(const pugi::xml_node& node) {
-    ie::details::CaselessEq<std::string> cmp;
+    assertNoEmptyAttributes(customLayer);
  
-    std::string nodeName(node.name());
-    if (!cmp(nodeName, "Parameters")) {
-        VPU_THROW_EXCEPTION << "Wrong node, expected Parameters found " << nodeName;
+    assertZeroOrOneOccurrence(customLayer, {"Where"});
+    const auto whereNode = customLayer.child("Where");
+    for (auto where : whereNode.attributes()) {
+        _whereParams[where.name()] = where.value();
      }
  
-    for (auto tensorNode = node.child("Tensor"); !tensorNode.empty(); tensorNode = tensorNode.next_sibling("Tensor")) {
-        KernelParam kp;
-
-        auto typeStr = XMLParseUtils::GetStrAttr(tensorNode, "type");
-        if (cmp(typeStr, "input")) {
-            kp.type = CustomParamType::Input;
-        } else if (cmp(typeStr, "output")) {
-            kp.type = CustomParamType::Output;
-        } else if (cmp(typeStr, "input_buffer")) {
-            kp.type = CustomParamType::InputBuffer;
-        } else if (cmp(typeStr, "output_buffer")) {
-            kp.type = CustomParamType::OutputBuffer;
-        } else if (cmp(typeStr, "data")) {
-            kp.type = CustomParamType::Data;
-        } else {
-            VPU_THROW_EXCEPTION << "Tensor node has an invalid type " << typeStr;
+    assertOneOrMoreOccurrence(customLayer, {"Kernel"});
+    auto kernelNodes = [&] {
+        auto nodes = SmallVector<pugi::xml_node>{};
+        for (auto kernel = customLayer.child("Kernel"); !kernel.empty(); kernel = kernel.next_sibling("Kernel")) {
+            assertExactlyOneOccurrence(kernel, {"Parameters", "WorkSizes"});
+            assertOneOrMoreOccurrence(kernel, {"Source"});
+            nodes.push_back(kernel);
          }
+        return nodes;
+    }();
  
-        kp.format = formatFromString(XMLParseUtils::GetStrAttr(tensorNode, "format", "BFYX"));
-        if (kp.format == CustomDataFormat::None) {
-            VPU_THROW_EXCEPTION << "Tensor node has an invalid format " << kp.format;
-        }
+    if (kernelNodes.size() == 1) {
+        _kernels.emplace_back(kernelNodes.front(), _configDir);
+    } else {
+        auto stageOrder = std::map<int, CustomKernel>{};
+        for (auto& kernel : kernelNodes) {
+            const auto stageAttr = kernel.attribute("stage");
+            VPU_THROW_UNLESS(stageAttr, "Error while binding %s custom layer: for multi-kernel binding, "
+                "each kernel should be provided with 'stage' attribute.", _layerName);
  
-        kp.argName = XMLParseUtils::GetStrAttr(tensorNode, "arg-name");
-        if (kp.argName.empty()) {
-            VPU_THROW_EXCEPTION << "Tensor node has no arg-name";
-        }
+            const auto stageNum = std::stod(stageAttr.value());
+            VPU_THROW_UNLESS(stageOrder.find(stageNum) == stageOrder.end(),
+                "Error while binding %s custom layer: found duplicating stage id.", _layerName);
  
-        kp.portIndex = XMLParseUtils::GetIntAttr(tensorNode, "port-index", -1);
-        if (kp.portIndex == -1) {
-            VPU_THROW_EXCEPTION << "Tensor node has no port-index";
+            stageOrder.emplace(stageNum, CustomKernel{kernel, _configDir});
          }
  
-        if (kp.type == CustomParamType::InputBuffer || kp.type == CustomParamType::OutputBuffer) {
-            std::string bufferSize(XMLParseUtils::GetStrAttr(tensorNode, "size", ""));
-            while (!bufferSize.empty()) {
-                auto pos = bufferSize.find_first_of(',');
-                auto rule = bufferSize.substr(0, pos);
-                if (!isLegalSizeRule(rule)) {
-                    VPU_THROW_EXCEPTION << "Invalid BufferSize " << rule;
-                }
-
-                kp.bufferSizeRules.emplace_back(std::move(rule));
-
-                if (pos == std::string::npos) {
-                    bufferSize.clear();
-                } else {
-                    bufferSize = bufferSize.substr(pos + 1, std::string::npos);
-                }
-            }
-
-            kp.dimIdx = -1;
-            std::string dim_src_string(XMLParseUtils::GetStrAttr(tensorNode, "dim", ""));
-            if (!dim_src_string.empty()) {
-                // Try to locate index separator.
-                auto pos = dim_src_string.find_first_of(',');
-                auto flag = dim_src_string.substr(0, pos);
-                if (cmp(flag, "input")) {
-                    kp.dimSource = CustomDimSource::Input;
-                } else if (cmp(flag, "output")) {
-                    kp.dimSource = CustomDimSource::Output;
-                } else {
-                    VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
-                }
-
-                int idx = 0;
-                if (pos != std::string::npos) {
-                    // User explicitly set input index in config.
-                    auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
-                    idx = std::stoi(idx_string);
-                }
-                if (idx < 0) {
-                    VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
-                }
+        VPU_THROW_UNLESS(stageOrder.begin()->first == 0,
+            "Error while binding %s custom layer: Stage 0 is not found.", _layerName);
+        VPU_THROW_UNLESS(stageOrder.rbegin()->first == stageOrder.size() - 1,
+            "Error while binding %s custom layer: Kernels should have stage id from 0 to N.", _layerName);
  
-                kp.dimIdx = idx;
-            }
+        for (auto& stage : stageOrder) {
+            _kernels.push_back(std::move(stage.second));
          }
-
-        kp.irSource.clear();
-
-        _kernelParams.emplace_back(std::move(kp));
      }
  
-    for (auto dataNode = node.child("Data"); !dataNode.empty(); dataNode = dataNode.next_sibling("Data")) {
-        KernelParam kp;
-
-        auto typeStr = XMLParseUtils::GetStrAttr(dataNode, "type");
-        if (cmp(typeStr, "data")) {
-            kp.type = CustomParamType::Data;
-        } else if (cmp(typeStr, "local_data")) {
-            kp.type = CustomParamType::LocalData;
-        } else {
-            VPU_THROW_EXCEPTION << "Data node has an invalid type " << typeStr;
-        }
-
-        kp.format = CustomDataFormat::Any;
-
-        kp.argName = XMLParseUtils::GetStrAttr(dataNode, "arg-name");
-        if (kp.argName.empty()) {
-            VPU_THROW_EXCEPTION << "Data node has no arg-name";
-        }
-
-        kp.portIndex = -1;
-
-        kp.irSource = XMLParseUtils::GetStrAttr(dataNode, "source", "");
-        std::string dim_src_string(XMLParseUtils::GetStrAttr(dataNode, "dim", ""));
-
-        if (kp.irSource.empty() && dim_src_string.empty()) {
-            VPU_THROW_EXCEPTION << "Data node has no source or dim";
+    const auto addPorts = [](std::map<int, CustomDataFormat>& ports, const CustomKernel::KernelParam& newEdge) {
+        const auto layerInput = ports.find(newEdge.portIndex);
+        if (layerInput == ports.end()) {
+            ports.emplace(newEdge.portIndex, newEdge.format);
+        } else if (newEdge.format == CustomDataFormat::Any) {
+            return;
+        } else if (layerInput->second == CustomDataFormat::Any) {
+            layerInput->second = newEdge.format;
          }
+    };
  
-        if (!kp.irSource.empty() && !dim_src_string.empty()) {
-            VPU_THROW_EXCEPTION << "Data node can only have source or dim";
-        }
-
-        kp.dimIdx = -1;
-        if (kp.type == CustomParamType::LocalData) {
-            std::string bufferSize(XMLParseUtils::GetStrAttr(dataNode, "size", ""));
-            while (!bufferSize.empty()) {
-                auto pos = bufferSize.find_first_of(',');
-                auto rule = bufferSize.substr(0, pos);
-                if (!isLegalSizeRule(rule)) {
-                    VPU_THROW_EXCEPTION << "Invalid BufferSize " << rule;
-                }
-
-                kp.bufferSizeRules.emplace_back(std::move(rule));
-
-                if (pos == std::string::npos) {
-                    bufferSize.clear();
-                } else {
-                    bufferSize = bufferSize.substr(pos + 1, std::string::npos);
-                }
+    for (const auto& kernel : _kernels) {
+        for (const auto& binding : kernel.bindings()) {
+            if (binding.type == CustomParamType::Input) {
+                addPorts(_inputs, binding);
              }
-
-            kp.dimIdx = -1;
-            std::string dim_src_string(XMLParseUtils::GetStrAttr(dataNode, "dim", ""));
-            if (!dim_src_string.empty()) {
-                // Try to locate index separator.
-                auto pos = dim_src_string.find_first_of(',');
-                auto flag = dim_src_string.substr(0, pos);
-                if (cmp(flag, "input")) {
-                    kp.dimSource = CustomDimSource::Input;
-                } else if (cmp(flag, "output")) {
-                    kp.dimSource = CustomDimSource::Output;
-                } else {
-                    VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
-                }
-
-                int idx = 0;
-                if (pos != std::string::npos) {
-                    // User explicitly set input index in config.
-                    auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
-                    idx = std::stoi(idx_string);
-                }
-                if (idx < 0) {
-                    VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
-                }
-
-                kp.dimIdx = idx;
+            if (binding.type == CustomParamType::Output) {
+                addPorts(_outputs, binding);
              }
          }
-
-        _kernelParams.emplace_back(std::move(kp));
-    }
-
-    for (auto scalarNode = node.child("Scalar"); !scalarNode.empty(); scalarNode = scalarNode.next_sibling("Scalar")) {
-        KernelParam kp;
-
-        std::string typeStr = XMLParseUtils::GetStrAttr(scalarNode, "type");
-        if (cmp(typeStr, "int")) {
-            kp.type = CustomParamType::Int;
-        } else if (cmp(typeStr, "float")) {
-            kp.type = CustomParamType::Float;
-        } else {
-            VPU_THROW_EXCEPTION << "Scalar node has an invalid type " << typeStr;
-        }
-
-        kp.format = CustomDataFormat::Any;
-
-        kp.argName = XMLParseUtils::GetStrAttr(scalarNode, "arg-name");
-        if (kp.argName.empty()) {
-            VPU_THROW_EXCEPTION << "Scalar node has no arg-name";
-        }
-
-        kp.portIndex = XMLParseUtils::GetIntAttr(scalarNode, "port-index", 0);
-
-        kp.irSource = XMLParseUtils::GetStrAttr(scalarNode, "source", "");
-        if (kp.irSource.empty()) {
-            VPU_THROW_EXCEPTION << "Scalar node has no source";
-        }
-
-        _kernelParams.emplace_back(std::move(kp));
      }
  }
  
-void CustomLayer::processWorkSizesNode(const pugi::xml_node & node) {
-    ie::details::CaselessEq<std::string> cmp;
-
-    std::string nodeName(node.name());
-    if (!cmp(node.name(), "WorkSizes")) {
-        VPU_THROW_EXCEPTION << "Wrong node, expected WorkSizes found " << nodeName;
-    }
-
-    _wgDimIdx = -1;
-    std::string dim_src_string(node.attribute("dim").as_string(""));
-    if (!dim_src_string.empty()) {
-        // Try to locate index separator.
-        auto pos = dim_src_string.find_first_of(',');
-        auto flag = dim_src_string.substr(0, pos);
-        if (cmp(flag, "input")) {
-            _wgDimSource = CustomDimSource::Input;
-        } else if (cmp(flag, "output")) {
-            _wgDimSource = CustomDimSource::Output;
-        } else {
-            VPU_THROW_EXCEPTION << "Invalid WG dim source " << flag;
-        }
-
-        int idx = 0;
-        if (pos != std::string::npos) {
-            // User explicitly set input index in config.
-            auto idx_string = dim_src_string.substr(pos + 1, std::string::npos);
-            idx = std::stoi(idx_string);
-        }
-        if (idx < 0) {
-            VPU_THROW_EXCEPTION << "Invalid tensor index " << idx;
-        }
-
-        _wgDimIdx = idx;
-    }
-
-    std::string gws(node.attribute("global").as_string(""));
-    while (!gws.empty()) {
-        auto pos = gws.find_first_of(',');
-        auto rule = gws.substr(0, pos);
-        if (!isLegalSizeRule(rule)) {
-            VPU_THROW_EXCEPTION << "Invalid WorkSize " << rule;
-        }
-
-        _globalSizeRules.emplace_back(std::move(rule));
+bool CustomLayer::isLegalSizeRule(const std::string& rule, std::map<std::string, std::string> layerParams) {
+    {
+        auto sizes = SmallVector<std::pair<std::string, std::string>> {
+            { "b", "1" }, { "B", "1" },
+            { "f", "1" }, { "F", "1" },
+            { "y", "1" }, { "Y", "1" },
+            { "x", "1" }, { "X", "1" },
+        };
  
-        if (pos == std::string::npos) {
-            gws.clear();
-        } else {
-            gws = gws.substr(pos + 1, std::string::npos);
-        }
-    }
-
-    std::string lws(node.attribute("local").as_string(""));
-    while (!lws.empty()) {
-        auto pos = lws.find_first_of(',');
-        auto rule = lws.substr(0, pos);
-        if (!isLegalSizeRule(rule)) {
-            VPU_THROW_EXCEPTION << "Invalid WorkSize " << rule;
-        }
-
-        _localSizeRules.emplace_back(std::move(rule));
-
-        if (pos == std::string::npos) {
-            lws.clear();
-        } else {
-            lws = lws.substr(pos + 1, std::string::npos);
-        }
+        std::move(begin(sizes), end(sizes), inserter(layerParams, end(layerParams)));
      }
-}
-
-bool CustomLayer::isLegalSizeRule(const std::string& rule) {
-    SimpleMathExpression expr;
  
-    expr.setVariables({
-        { 'b', 1 }, { 'B', 1 },
-        { 'f', 1 }, { 'F', 1 },
-        { 'y', 1 }, { 'Y', 1 },
-        { 'x', 1 }, { 'X', 1 },
-    });
+    MathExpression expr;
+    expr.setVariables(layerParams);
  
      try {
          expr.parse(rule);
@@ -784,21 +241,66 @@ bool CustomLayer::isLegalSizeRule(const std::string& rule) {
      return true;
  }
  
-CustomDataFormat CustomLayer::formatFromString(const std::string & str) {
-    static const ie::details::caseless_map<std::string, CustomDataFormat> FormatNameToType = {
-        { "BFYX" , CustomDataFormat::BFYX },
-        { "BYXF" , CustomDataFormat::BYXF },
-        { "FYX" , CustomDataFormat::FYX },
-        { "YXF" , CustomDataFormat::YXF },
-        { "ANY"  , CustomDataFormat::Any },
+CustomDataFormat CustomLayer::formatFromLayout(const InferenceEngine::Layout& layout) {
+    const auto layoutToFormat = std::map<ie::Layout, CustomDataFormat> {
+        { ie::NCHW , CustomDataFormat::BFYX },
+        { ie::NHWC , CustomDataFormat::BYXF },
+        { ie::CHW , CustomDataFormat::FYX },
+        { ie::NC , CustomDataFormat::BF },
+        { ie::ANY , CustomDataFormat::Any }
      };
  
-    auto it = FormatNameToType.find(str);
-    if (it != FormatNameToType.end()) {
-        return it->second;
-    }
+    const auto it = layoutToFormat.find(layout);
+    VPU_THROW_UNLESS(it != layoutToFormat.end(), "Tensor node has an invalid format %s", layout);
+    return it->second;
+}
+
+bool CustomLayer::meetsWhereRestrictions(const std::map<std::string, std::string>& params) const {
+    const auto cmp = ie::details::CaselessEq<std::string>{};
+
+    for (const auto& where : _whereParams) {
+        const auto restrictedParam = [&](const std::pair<std::string, std::string>& param) {
+            return param.first == where.first;
+        };
+
+        const auto param = std::find_if(begin(params), end(params), restrictedParam);
+        if (param == params.end()) {
+            return false;
+        }
  
-    return CustomDataFormat::None;
+        const auto& restriction = where.second;
+        const auto number = parseNumber<float>(param->second);
+
+        const auto meetsRestriction = [&] {
+            // compare non-number restrictions (ex. kernel="3,3")
+            if (!number.hasValue()) {
+                return cmp(param->second, restriction);
+            } else {
+                if (restriction[0] == '>' && restriction[1] == '=') {
+                    const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+                    return number.get() >= to_compare;
+                } else if (restriction[0] == '<' && restriction[1] == '=') {
+                    const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+                    return number.get() <= to_compare;
+                } else if (restriction[0] == '>') {
+                    const auto to_compare = std::stof(restriction.substr(1, std::string::npos));
+                    return number.get() > to_compare;
+                } else if (restriction[0] == '<') {
+                    const auto to_compare = std::stof(restriction.substr(1, std::string::npos));
+                    return number.get() < to_compare;
+                } else if (restriction[0] == '!' && restriction[1] == '=') {
+                    const auto to_compare = std::stof(restriction.substr(2, std::string::npos));
+                    return number.get() != to_compare;
+                }
+                return number.get() == std::stof(restriction);
+            }
+        }();
+
+        if (!meetsRestriction) {
+            return false;
+        }
+    }
+    return true;
  }
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp

index 0902dd0..16f18e9 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
@@ -17,6 +17,13 @@
  #include <map>
  #include <vector>
  #include <utility>
+#include <string>
+
+#include <convert_function_to_cnn_network.hpp>
+#include <generic_ie.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
+#include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
+#include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  
  namespace vpu {
  
@@ -105,6 +112,8 @@ FrontEnd::FrontEnd(StageBuilder::Ptr stageBuilder)
          {"StaticShapeNonZero",                                 LAYER_PARSER(parseNonZero)},
          {"ROIAlign",                                           LAYER_PARSER(parseROIAlign)},
          {"DynamicShapeResolver",                               LAYER_PARSER(parseDSR)},
+        {"OutShapeOfReshape",                                  LAYER_PARSER(parseOutShapeOfReshape)},
+        {"StaticShapeBroadcast",                               LAYER_PARSER(parseBroadcast)},
      }} {}
  
  ModelPtr FrontEnd::buildInitialModel(ie::ICNNNetwork& network) {
@@ -149,38 +158,110 @@ namespace {
  
  std::atomic<int> g_counter(0);
  
-bool hasSuitableCustom(
-        const std::vector<CustomLayer::Ptr>& customLayers,
-        const ie::CNNLayerPtr& layer) {
-    const auto& env = CompileEnv::get();
-    ie::details::CaselessEq<std::string> cmp;
+}  // namespace
  
-    env.log->trace("Check for suitable custom implementation for layer %s:%s", layer->name, layer->type);
+CustomLayer::Ptr FrontEnd::getSuitableCustomLayer(const std::vector<CustomLayer::Ptr>& customLayers,
+                                                  const ie::CNNLayerPtr& cnnLayer) {
+    const auto& env = CompileEnv::get();
+    env.log->trace("Check for suitable custom implementation for layer %s:%s",
+                   cnnLayer->name, cnnLayer->type);
      VPU_LOGGER_SECTION(env.log);
  
-    for (const auto& customLayer : customLayers) {
-        env.log->trace("Check next custom layer : %v", customLayer->whereParams());
+    const auto cnnInputs = [&] {
+        auto inputs = SmallVector<CustomDataFormat>{};
+        inputs.reserve(cnnLayer->insData.size());
+        for (const auto& input : cnnLayer->insData) {
+            const auto layout = input.lock()->getLayout();
+            const auto format = CustomLayer::formatFromLayout(layout);
+            inputs.push_back(format);
+        }
+        return inputs;
+    }();
+
+    const auto cnnOutputs = [&] {
+        auto outputs = SmallVector<CustomDataFormat>{};
+        outputs.reserve(cnnLayer->outData.size());
+        for (const auto& output : cnnLayer->outData) {
+            const auto layout = output->getLayout();
+            const auto format = CustomLayer::formatFromLayout(layout);
+            outputs.push_back(format);
+        }
+        return outputs;
+    }();
+
+    const auto isSuitableLayer = [&env, &cnnLayer](const CustomLayer::Ptr& customLayer) {
+        env.log->trace("Check next custom layer : %v", customLayer->layerName());
          VPU_LOGGER_SECTION(env.log);
  
-        bool suitable = true;
-        for (const auto& whereParam : customLayer->whereParams()) {
-            const auto iter = layer->params.find(whereParam.first);
-            if (iter == layer->params.end() || !cmp(iter->second, whereParam.second)) {
-                suitable = false;
-                break;
+        if (!customLayer->meetsWhereRestrictions(cnnLayer->params)) {
+            env.log->trace("Where restrictions are not met");
+            return false;
+        }
+
+        for (const auto& kernel : customLayer->kernels()) {
+            const auto& gws = kernel.globalGridSizeRules();
+            const auto& lws = kernel.localGridSizeRules();
+
+            const auto validSizeRule = [&](const std::string& rule) {
+                return CustomLayer::isLegalSizeRule(rule, cnnLayer->params);
+            };
+
+            const auto validGridSizes = std::all_of(begin(gws), end(gws), validSizeRule) &&
+                                        std::all_of(begin(lws), end(lws), validSizeRule);
+
+            if (!validGridSizes) {
+                env.log->trace("Work group grid sizes are not valid");
+                return false;
+            }
+        }
+
+        return true;
+    };
+
+    auto suitableCustomLayers = SmallVector<CustomLayer::Ptr>{};
+
+    std::copy_if(begin(customLayers), end(customLayers),
+        back_inserter(suitableCustomLayers), isSuitableLayer);
+
+    if (suitableCustomLayers.empty()) {
+      return nullptr;
+    }
+
+    const auto inputsLayoutMatch = [&](const SmallVector<CustomDataFormat>& cnnEdges,
+                                       const std::map<int, CustomDataFormat>& clEdges) {
+        for (const auto clEdge : clEdges) {
+            const auto port = clEdge.first;
+            VPU_THROW_UNLESS(port < cnnEdges.size(),
+                "Can't bind custom layer edge with port '%s' to CNNNetwork layer", port);
+
+            const auto clFormat = clEdge.second;
+            const auto cnnFormat = cnnEdges[port];
+            if (cnnFormat != clFormat &&
+                cnnFormat != CustomDataFormat::Any &&
+                clFormat != CustomDataFormat::Any) {
+                return false;
              }
          }
+        return true;
+    };
+
  
-        if (suitable) {
-            env.log->trace("Matches");
-            return true;
+    for (const auto& customLayer : suitableCustomLayers) {
+        const auto clInputs = customLayer->inputs();
+
+        if (inputsLayoutMatch(cnnInputs, clInputs)) {
+            env.log->trace("Found suitable '%s' custom layer", customLayer->layerName());
+            return customLayer;
          }
      }
  
-    return false;
+    const auto firstGoodLayer = suitableCustomLayers.front();
+    env.log->trace("Found suitable custom layer '%s', but input layouts "
+                   "have not matched with what CNNNetwork expected",
+                   firstGoodLayer->layerName());
+    return firstGoodLayer;
  }
  
-}  // namespace
  
  void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) {
      parseLayer(model, layer, inputs, outputs,
@@ -191,12 +272,8 @@ void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, cons
  
  void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
                            const FrontEnd::UnsupportedLayerCallback& onUnsupported, const FrontEnd::SupportedLayerCallback& onSupported) {
-    const auto customLayerByType  = _customLayers.find(layer->type);
-    const auto customLayerAsStage = _customLayers.find(layer->type + "@stage_0");
-
-    const bool isCustomLayer =
-        ((customLayerByType != _customLayers.end()) && hasSuitableCustom(customLayerByType->second, layer)) ||
-        ((customLayerAsStage != _customLayers.end()) && hasSuitableCustom(customLayerAsStage->second, layer));
+    const auto customLayer = _customLayers.find(layer->type);
+    const bool isCustomLayer = customLayer != _customLayers.end() && getSuitableCustomLayer(customLayer->second, layer);
  
      const auto& type = isCustomLayer ? "Custom" : layer->type;
      if (parsers.count(type) == 0) {
@@ -235,6 +312,15 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network) {
  
  
  ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLayerCallback& unsupportedLayer, const SupportedLayerCallback& supportedLayer) {
+    // NGraph -> CNN conversion may be called in 2 different moments: at
+    // the beginning if conversion was forced by configuration or after detect
+    // network batch and precision conversions. Conversion utility
+    // returns std::shared_ptr. ICNNNetwork is neither copyable nor movable.
+    // As a result, it is impossible to overwrite given "network" argument.
+    // Do not use network parameter in this function to avoid using wrong network
+    // reference (e.g. original instead of converted).
+    auto* originalOrConvertNetwork = &network;
+
      const auto& env = CompileEnv::get();
  
      //
@@ -268,7 +354,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLa
      // Create new VPU model
      //
  
-    const auto model = std::make_shared<ModelObj>(network.getName());
+    const auto model = std::make_shared<ModelObj>(originalOrConvertNetwork->getName());
  
      model->attrs().set<int>("index", g_counter.fetch_add(1));
      model->attrs().set<Resources>("resources", env.resources);
@@ -276,7 +362,7 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLa
      if (!env.config.ignoreIRStatistic) {
          ie::ICNNNetworkStats* stats = nullptr;
          // V10 IRs doesn't contain stats
-        if (network.getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) {
+        if (originalOrConvertNetwork->getStats(&stats, nullptr) == InferenceEngine::OK && !stats->isEmpty()) {
              env.log->trace("Use node statistics from the IR");
              model->setNodesStats(stats->getNodesStats());
          }
@@ -286,40 +372,50 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLa
      // Update IE Network
      //
  
+    std::shared_ptr<ie::ICNNNetwork> convertedNetwork;
+
      {
          env.log->trace("Update IE Network");
          VPU_LOGGER_SECTION(env.log);
  
-        IE_SUPPRESS_DEPRECATED_START
-        // If we have NGraph network, but CNN compatibility is enabled, enforce conversion
-        if (network.getFunction() && env.config.forceDeprecatedCnnConversion)
-            network.addLayer(nullptr);
-        IE_SUPPRESS_DEPRECATED_END
+        auto convertNetwork = [&convertedNetwork, &originalOrConvertNetwork]() {
+            auto nGraphFunc = originalOrConvertNetwork->getFunction();
+            // Disable shape inference (WA for generic operations)
+            ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
+
+            ngraph::pass::ConvertOpSet3ToOpSet2().run_on_function(nGraphFunc);
+            ngraph::pass::ConvertOpSet2ToOpSet1().run_on_function(nGraphFunc);
+            ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(nGraphFunc);
+            convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *originalOrConvertNetwork);
+            originalOrConvertNetwork = convertedNetwork.get();
+        };
  
-        detectNetworkBatch(network, model);
+        if (originalOrConvertNetwork->getFunction() && env.config.forceDeprecatedCnnConversion) {
+            convertNetwork();
+        }
+
+        detectNetworkBatch(*originalOrConvertNetwork, model);
  
-        ie::NetPass::ConvertPrecision(network, ie::Precision::I64, ie::Precision::I32);
-        ie::NetPass::ConvertPrecision(network, ie::Precision::U64, ie::Precision::I32);
-        ie::NetPass::ConvertPrecision(network, ie::Precision::BOOL, ie::Precision::I32);
+        if (originalOrConvertNetwork->getFunction()) {
+            convertNetwork();
+        }
  
-        IE_SUPPRESS_DEPRECATED_START
-        // force conversion to CNNNetwork
-        if (network.getFunction())
-            network.addLayer(nullptr);
-        IE_SUPPRESS_DEPRECATED_END
+        ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::I64, ie::Precision::I32);
+        ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::U64, ie::Precision::I32);
+        ie::NetPass::ConvertPrecision(*originalOrConvertNetwork, ie::Precision::BOOL, ie::Precision::I32);
  
-        moveConstInputsToBlobs(network);
+        moveConstInputsToBlobs(*originalOrConvertNetwork);
  
-        removeConstLayers(network);
+        removeConstLayers(*originalOrConvertNetwork);
  
-        unrollLoops(network);
+        unrollLoops(*originalOrConvertNetwork);
      }
  
      //
      // Parse IR Network
      //
  
-    _ieParsedNetwork = parseNetwork(network);
+    _ieParsedNetwork = parseNetwork(*originalOrConvertNetwork);
  
      //
      // Process internal VPU Model
@@ -331,7 +427,9 @@ ModelPtr FrontEnd::runCommonPasses(ie::ICNNNetwork& network, const UnsupportedLa
  
          parseInputAndOutputData(model);
  
-        addDataTypeConvertStages(model);
+        if (!CompileEnv::get().config.disableConvertStages) {
+            addDataTypeConvertStages(model);
+        }
  
          addPreProcessStages(model);
      }
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp

index 36a9a9e..5c9ee38 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
@@ -291,6 +291,12 @@ ShapeLocation Allocator::allocateShape(Data& data) {
  
          shapeLocation.dimsLocation = dataLocation.location;
          shapeLocation.dimsOffset = dataLocation.offset;
+
+        if (data->usage() == DataUsage::Output) {
+            // We need to allocate memory for maximum dims values also
+            data->attrs().set<int>("ioDimsUpperBoundOffset", _blobMemOffset);
+            _blobMemOffset += dimsByteSize;
+        }
      } else {
          // Static allocation
          shapeLocation.dimsLocation = Location::Blob;
@@ -308,21 +314,20 @@ ShapeLocation Allocator::allocateShape(Data& data) {
  }
  
  void Allocator::freeData(const Data& data, DeallocationMode mode) {
-    //
-    // Release the chunk
-    //
-
-    if (const auto& parentDataToShapeEdge = data->parentDataToShapeEdge()) {
-        auto const& parent = parentDataToShapeEdge->parent();
+    const auto getChunk = [this, &data](const Data& parent) {
+        VPU_THROW_UNLESS(_allocatedIntermData.count(parent) > 0,
+            "Allocator failed on freeData for {} with usage {}: parent data {} with usage {} is not allocated",
+             data->name(), data->usage(), parent->name(), parent->usage());
  
-        auto it = _memChunksPerData.find(parentDataToShapeEdge->parent());
-        auto chunk = it->second;
+        auto it = _memChunksPerData.find(parent);
  
          VPU_INTERNAL_CHECK(it != _memChunksPerData.end(),
              "Allocator failed on freeData for {} with usage {}: parent data {} with usage {} "
              "containing shape for current data wasn't yet allocated",
              data->name(), data->usage(), parent->name(), parent->usage());
  
+        auto chunk = it->second;
+
          VPU_INTERNAL_CHECK(chunk != nullptr,
              "Allocator failed on freeData for {} with usage {}: parent data {} with usage {} "
              "containing shape for current data has no memory chunk",
@@ -333,6 +338,10 @@ void Allocator::freeData(const Data& data, DeallocationMode mode) {
              "containing shape for this data has zero usages, but it is using at least by current data",
              data->name(), data->usage(), parent->name(), parent->usage());
  
+        return chunk;
+    };
+
+    const auto decreaseChunkUsage = [this](allocator::MemChunk* chunk, const Data& parent) {
          --chunk->inUse;
  
          if (chunk->inUse == 0) {
@@ -341,32 +350,30 @@ void Allocator::freeData(const Data& data, DeallocationMode mode) {
              _memChunksPerData.erase(parent);
              _allocatedIntermData.erase(parent);
          }
+    };
+
+    //
+    // Release the chunk
+    //
+
+    if (const auto& parentDataToShapeEdge = data->parentDataToShapeEdge()) {
+        auto const& parent = parentDataToShapeEdge->parent();
+
+        if (parent->usage() == DataUsage::Intermediate || parent->usage() == DataUsage::Temp) {
+            auto chunk = getChunk(parent);
+            decreaseChunkUsage(chunk, parent);
+        }
      }
  
      auto topParent = data->getTopParentData();
  
      if (topParent->usage() == DataUsage::Intermediate ||
          topParent->usage() == DataUsage::Temp) {
-        IE_ASSERT(_allocatedIntermData.count(topParent) > 0);
-
-        auto it = _memChunksPerData.find(topParent);
-        IE_ASSERT(it != _memChunksPerData.end());
-
-        auto chunk = it->second;
-        IE_ASSERT(chunk != nullptr);
-        IE_ASSERT(chunk->inUse > 0);
+        auto chunk = getChunk(topParent);
  
          switch (mode) {
          case DeallocationMode::JustFree: {
-            --chunk->inUse;
-
-            if (chunk->inUse == 0) {
-                freeMem(chunk);
-
-                _memChunksPerData.erase(topParent);
-                _allocatedIntermData.erase(topParent);
-            }
-
+            decreaseChunkUsage(chunk, topParent);
              break;
          }
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp

index 7e43fc5..7717d25 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
@@ -87,6 +87,16 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
      // initial dump pass must be the first dump
      ADD_DUMP_PASS("initial");
  
+    //
+    // Replace Global AvgPooling with ReduceMean
+    //
+
+    if (env.config.enableReplaceWithReduceMean) {
+        ADD_PASS(replaceWithReduceMean);
+        ADD_DUMP_PASS("replaceWithReduceMean");
+    }
+
+
      if (!env.config.disableReorder && !env.config.hwOptimization) {
          ADD_PASS(reorderInputsToChannelMinor);
          ADD_DUMP_PASS("reorderInputsToChannelMinor");
@@ -128,15 +138,6 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
      ADD_DUMP_PASS("mergeParallelFC");
  
      //
-    // Replace Global AvgPooling with ReduceMean
-    //
-
-    if (env.config.enableReplaceWithReduceMean) {
-        ADD_PASS(replaceWithReduceMean);
-        ADD_DUMP_PASS("replaceWithReduceMean");
-    }
-
-    //
      // Model common adaptation
      //
  
@@ -188,6 +189,11 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
      ADD_PASS(hwPadding);
      ADD_DUMP_PASS("hwPadding");
  
+    if (env.config.hwOptimization) {
+        ADD_PASS(splitLargeKernelConv);
+        ADD_DUMP_PASS("splitLargeKernelConv");
+    }
+
      //
      // Batch support
      //
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp

index 8538f34..54e2446 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
@@ -327,7 +327,7 @@ void PassImpl::adjustModelForMemReqs(const Model& model) {
  
                      env.log->trace("Child data [%s] : mode [%v] offset [%v]", childData->name(), mode, offset);
  
-                    model->replaceParentData(childDataEdge, ddrCopy);
+                    model->replaceDataToDataParent(childDataEdge, ddrCopy);
  
                      loopOverData(childData, [](const Data& subData) {
                          subData->setMemReqs(MemoryType::DDR);
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp

index d465ce2..f21c4b4 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp
@@ -18,7 +18,7 @@ namespace {
  
  class PassImpl final : public PerStagePass {
  public:
-    PassImpl() : PerStagePass({StageType::Concat}) {}
+    PassImpl() : PerStagePass({StageType::StubConcat}) {}
  
      void runForStage(const Model& model, const Stage& stage) override;
  };
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp

index b76f071..097e5f7 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp
@@ -112,7 +112,7 @@ void PassImpl::run(const Model& model) {
                  //
  
                  if (dataEdge->connectionMode() == SharedConnectionMode::SINGLE_STAGE) {
-                    if (connectionStage->type() == StageType::Concat ||
+                    if (connectionStage->type() == StageType::StubConcat ||
                          connectionStage->type() == StageType::Expand) {
                          IE_ASSERT(producer == child);
                          IE_ASSERT(consumer == parent);
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/process_special_stages.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/process_special_stages.cpp

index 0da6be9..5425866 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/process_special_stages.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/process_special_stages.cpp
@@ -105,7 +105,7 @@ void PassImpl::run(const Model& model) {
              continue;
          }
  
-        if (stage->type() == StageType::Concat) {
+        if (stage->type() == StageType::StubConcat) {
              _processor.processConcat(model, stage);
          } else if (stage->type() == StageType::Split) {
              _processor.processSplit(model, stage);
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_screlu.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_screlu.cpp

index f1059f4..30200b9 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_screlu.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_screlu.cpp
@@ -38,7 +38,7 @@ void PassImpl::run(const Model& model) {
              continue;
          }
  
-        const auto nextStages = getExactNextStages(convolutionStage, {StageType::Power, StageType::Concat});
+        const auto nextStages = getExactNextStages(convolutionStage, {StageType::Power, StageType::StubConcat});
          if (nextStages.size() != 2 || convolutionStage->type() != StageType::StubConv) {
              continue;
          }
@@ -54,7 +54,7 @@ void PassImpl::run(const Model& model) {
              continue;
          }
  
-        auto concatAfterPowerStage = getOneOfSingleNextStage(powerStage, {StageType::Concat});
+        auto concatAfterPowerStage = getOneOfSingleNextStage(powerStage, {StageType::StubConcat});
          if (concatAfterPowerStage != concatStage) {
              continue;
          }
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_large_kernel_conv.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_large_kernel_conv.cpp

new file mode 100644 (file)

index 0000000..0ffdfe9
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_large_kernel_conv.cpp
@@ -0,0 +1,163 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/middleend/pass_manager.hpp>
+#include <vpu/utils/numeric.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+#include <precision_utils.h>
+
+#include <vector>
+#include <set>
+#include <memory>
+
+
+namespace vpu {
+
+namespace {
+
+class PassImpl final : public Pass {
+public:
+    explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
+
+    void run(const Model& model) override;
+
+private:
+    StageBuilder::Ptr _stageBuilder;
+};
+
+void PassImpl::run(const Model& model) {
+    VPU_PROFILE(splitLargeKernelConv);
+
+    for (const auto& stage : model->getStages()) {
+        if (stage->type() != StageType::StubConv) {
+            continue;
+        }
+        const auto tryHW = stage->attrs().getOrDefault<bool>("tryHW", false);
+        if (!tryHW) {
+            continue;
+        }
+
+        IE_ASSERT(stage->numInputs() == 4);
+        IE_ASSERT(stage->numOutputs() == 1);
+
+        const auto input = stage->input(0);
+        const auto weights = stage->input(1);
+        const auto biases = stage->input(2);
+        const auto scales  = stage->input(3);
+        const auto output = stage->output(0);
+
+        const auto kernelSizeX = stage->attrs().get<int>("kernelSizeX");
+        const auto kernelSizeY = stage->attrs().get<int>("kernelSizeY");
+        const auto groupSize = stage->attrs().get<int>("groupSize");
+        const auto inputC = input->desc().dim(Dim::C);
+        const auto outputC = output->desc().dim(Dim::C);
+
+        //Only 1x16 convolution is supported now, could expand support up to 1x30
+        if (kernelSizeX != 16 || kernelSizeY != 1 || groupSize != 1) {
+            continue;
+        }
+
+        model->disconnectStage(stage);
+
+        int kernelGroupSize = kernelSizeX / 16 + 1;
+        IE_ASSERT(kernelGroupSize == 2);
+        const auto newKernelSizeX = kernelSizeX / kernelGroupSize;
+        IE_ASSERT(newKernelSizeX * kernelGroupSize == kernelSizeX);
+        const auto inGroupDimX = input->desc().dim(Dim::W) - newKernelSizeX;
+
+        DataVector subInputs(kernelGroupSize);
+        DataVector subOutputs(kernelGroupSize);
+
+        for (int groupInd = 0; groupInd < kernelGroupSize; ++groupInd) {
+            auto postfix = formatString("@subkernel=%d/%d", groupInd + 1, kernelGroupSize);
+
+            //subInput
+            auto subInputDesc = input->desc();
+            subInputDesc.setDim(Dim::W, inGroupDimX);
+
+                subInputs[groupInd] = model->duplicateData(
+                    input,
+                    postfix,
+                    subInputDesc);
+
+            //subShrinkStage
+            DimValues offsets({{Dim::W, groupInd * newKernelSizeX}});
+
+            _stageBuilder->addCropStage(
+                    model,
+                    stage->name() + postfix,
+                    stage->origLayer(),
+                    input,
+                    subInputs[groupInd],
+                    std::move(offsets));
+
+            // subWeights
+            Data subWeights;
+            {
+                const auto content = weights->content();
+                IE_ASSERT(content != nullptr);
+
+                const auto origWeights = content->get<fp16_t>();
+                IE_ASSERT(origWeights != nullptr);
+
+                size_t newWeightsSize = newKernelSizeX * kernelSizeY * outputC * inputC;
+
+                auto newWeightsBlob = ie::make_shared_blob<fp16_t>(InferenceEngine::TensorDesc(
+                    ie::Precision::FP16,
+                    {newWeightsSize},
+                    ie::Layout::C));
+                newWeightsBlob->allocate();
+
+                const auto newWeightsPtr = newWeightsBlob->buffer().as<fp16_t*>();
+                auto src = origWeights + groupInd * newKernelSizeX;
+                auto dst = newWeightsPtr;
+
+                for (int i = 0; i < kernelSizeY * inputC * outputC; ++i) {
+                    std::copy_n(src + i * kernelSizeX, newKernelSizeX, dst + i * newKernelSizeX);
+                }
+
+                subWeights = model->duplicateData(
+                    weights,
+                    postfix,
+                    DataDesc({newKernelSizeX, kernelSizeY, inputC, outputC}),
+                    ieBlobContent(newWeightsBlob));
+            }
+
+            // subOutput
+            auto subOutputDesc = output->desc();
+
+            subOutputs[groupInd] = model->duplicateData(
+                output,
+                postfix,
+                subOutputDesc);
+
+            // subConvStage
+            auto subConvStage = model->duplicateStage(
+                stage,
+                postfix,
+                {subInputs[groupInd], subWeights, biases, scales},
+                {subOutputs[groupInd]});
+
+            subConvStage->attrs().set<int>("kernelSizeX", newKernelSizeX);
+        }
+
+        _stageBuilder->addSumStage(
+            model,
+            stage->name() + "@sum",
+            stage->origLayer(),
+            subOutputs[0],
+            subOutputs[1],
+            output);
+
+        model->removeStage(stage);
+    }
+}
+
+}  // namespace
+
+Pass::Ptr PassManager::splitLargeKernelConv() {
+    return std::make_shared<PassImpl>(_stageBuilder);
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/swap_concat_and_hw_ops.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/swap_concat_and_hw_ops.cpp

index cd77154..c5c19d8 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/swap_concat_and_hw_ops.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/swap_concat_and_hw_ops.cpp
@@ -27,7 +27,7 @@ void PassImpl::run(const Model& model) {
          if (concatStage == nullptr)
              continue;
  
-        if (concatStage->type() != StageType::Concat)
+        if (concatStage->type() != StageType::StubConcat)
              continue;
  
          IE_ASSERT(concatStage->numInputs() > 0);
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp

index ddf9598..72d9968 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
@@ -178,26 +178,36 @@ void DataNode::setShapeAllocationInfo(const ShapeLocation& shapeLocation) {
  
  void DataNode::serializeBuffer(
          BlobSerializer& serializer) {
-    serializeDescImpl(serializer, _desc, this->strides());
+    serializeDescImpl(serializer, _desc, this->shapeLocation());
  
      serializer.append(checked_cast<uint32_t>(_dataLocation.location));
  
+    const auto serializeIOParams = [&serializer](const Data& parent) {
+        auto IOIdx = parent->attrs().get<int>("ioIdx");
+        serializer.append(checked_cast<uint32_t>(IOIdx));
+
+        auto parentByteSize = parent->totalByteSize();
+        serializer.append(checked_cast<uint32_t>(parentByteSize));
+    };
+
      if (_dataLocation.location == Location::Input || _dataLocation.location == Location::Output) {
-        auto topParent = getTopParentData();
+        serializeIOParams(getTopParentData());
+    }
  
-        auto ioIdx = topParent->attrs().get<int>("ioIdx");
-        serializer.append(checked_cast<uint32_t>(ioIdx));
+    if (_shapeLocation.dimsLocation == Location::Output) {
+        serializeIOParams(parentDataToShapeEdge()->parent());
+    }
  
-        auto parentByteSize = topParent->totalByteSize();
-        serializer.append(checked_cast<uint32_t>(parentByteSize));
+    if (_shapeLocation.stridesLocation == Location::Output) {
+        serializeIOParams(parentDataToShapeEdge()->parent());
      }
  
      serializer.append(checked_cast<uint32_t>(_dataLocation.offset));
  }
  
  void DataNode::serializeIOInfo(BlobSerializer& serializer) const {
-    auto ioIdx = attrs().get<int>("ioIdx");
-    serializer.append(checked_cast<uint32_t>(ioIdx));
+    auto dataIOIdx = attrs().get<int>("ioIdx");
+    serializer.append(checked_cast<uint32_t>(dataIOIdx));
  
      auto ioBufferOffset = attrs().get<int>("ioBufferOffset");
      serializer.append(checked_cast<uint32_t>(ioBufferOffset));
@@ -213,13 +223,25 @@ void DataNode::serializeIOInfo(BlobSerializer& serializer) const {
          serializer.append(uint8_t(0));
      }
  
-    serializeDescImpl(serializer, _desc, strides());
+    auto resShapeLocation = shapeLocation();
+    if (resShapeLocation.dimsLocation != Location::Blob) {
+        auto ioDimsUpperBoundOffset = attrs().get<int>("ioDimsUpperBoundOffset");
+        resShapeLocation.dimsLocation = Location::Blob;
+        resShapeLocation.dimsOffset = ioDimsUpperBoundOffset;
+    }
+    if (resShapeLocation.stridesLocation != Location::Blob) {
+        auto ioStridesUpperBoundOffset = attrs().get<int>("ioStridesUpperBoundOffset");
+        resShapeLocation.stridesLocation = Location::Blob;
+        resShapeLocation.stridesOffset = ioStridesUpperBoundOffset;
+    }
+
+    serializeDescImpl(serializer, _desc, resShapeLocation);
  }
  
  void DataNode::serializeDescImpl(
          BlobSerializer& serializer,
          const DataDesc& storedDesc,
-        const DimValues& storedStrides) const {
+        const ShapeLocation& shapeLocation) const {
      IE_ASSERT(storedDesc.numDims() <= MAX_DIMS_32);
  
      auto storedDimsOrder = storedDesc.dimsOrder();
@@ -232,12 +254,10 @@ void DataNode::serializeDescImpl(
  
      serializer.append(checked_cast<uint32_t>(storedPerm.size()));
  
-    const auto& shape = shapeLocation();
-
-    serializer.append(checked_cast<uint32_t>(shape.dimsLocation));
-    serializer.append(checked_cast<uint32_t>(shape.dimsOffset));
-    serializer.append(checked_cast<uint32_t>(shape.stridesLocation));
-    serializer.append(checked_cast<uint32_t>(shape.stridesOffset));
+    serializer.append(checked_cast<uint32_t>(shapeLocation.dimsLocation));
+    serializer.append(checked_cast<uint32_t>(shapeLocation.dimsOffset));
+    serializer.append(checked_cast<uint32_t>(shapeLocation.stridesLocation));
+    serializer.append(checked_cast<uint32_t>(shapeLocation.stridesOffset));
  }
  
  void printTo(std::ostream& os, const Data& data) {
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp

index b0667f0..c2a2ed9 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
@@ -401,19 +401,12 @@ StageDependency ModelObj::addStageDependency(const Stage& stage, const Data& dat
  
      data->_dependentStagesEdges.push_back(edge);
  
-    VPU_THROW_UNLESS(data->usage() == DataUsage::Intermediate,
-        "Adding stage dependency for {} with type {} failed: only {} datas can be added as a dependency "
-        "while adding {} with usage {} was attempted",
-        stage->name(), stage->type(), DataUsage::Intermediate, data->name(), data->usage());
-
      VPU_THROW_UNLESS(data->_producerEdge != nullptr,
          "Adding stage dependency for {} with type {} failed: data {} with usage {} should have producer, "
          "but actually it doesn't", stage->name(), stage->type(), data->name(), data->usage());
  
-    if (data->_producerEdge != nullptr) {
-        ++data->_producerEdge->_producer->_nextStages[stage];
-        ++stage->_prevStages[data->_producerEdge->_producer];
-    }
+    ++data->_producerEdge->_producer->_nextStages[stage];
+    ++stage->_prevStages[data->_producerEdge->_producer];
  
      return edge;
  }
@@ -1392,7 +1385,7 @@ Stage getDataConnectionStage(
          // Check connection stage type and that parent has the largest buffer.
          //
  
-        if (connectionStage->type() == StageType::Concat ||
+        if (connectionStage->type() == StageType::StubConcat ||
              connectionStage->type() == StageType::Expand) {
              IE_ASSERT(producer == child);
              IE_ASSERT(consumer == parent);
@@ -1538,7 +1531,11 @@ DataToShapeAllocation ModelObj::connectDataWithShape(
      const auto& parentStage = parent->producer();
      const auto& childStage = child->producer();
  
-    if (parentStage && childStage && parentStage != childStage && parent->usage() == DataUsage::Intermediate) {
+    const auto& areStagesDifferent = [](const Stage& lhs, const Stage& rhs) {
+        return lhs && rhs && lhs != rhs;
+    };
+
+    if (areStagesDifferent(parentStage, childStage)) {
          // Shape and data are produced from different stages, make sure that shape is calculated before data
          addStageDependency(childStage, parent);
      }
@@ -1546,7 +1543,34 @@ DataToShapeAllocation ModelObj::connectDataWithShape(
      return edge;
  }
  
-void ModelObj::replaceParentData(
+void ModelObj::replaceDataToShapeParent(
+        const DataToShapeAllocation& edge,
+        const Data& newParent) {
+    auto oldParent = edge->parent();
+    auto child = edge->child();
+
+    oldParent->_childDataToShapeEdges.erase(edge);
+    edge->_parent = newParent;
+    newParent->_childDataToShapeEdges.push_back(edge);
+}
+
+void ModelObj::replaceDataToShapeChild(
+        const DataToShapeAllocation& edge,
+        const Data& newChild) {
+    auto parent = edge->parent();
+    auto oldChild = edge->child();
+
+    oldChild->_parentDataToShapeEdge = nullptr;
+    edge->_child = newChild;
+
+    VPU_THROW_UNLESS(newChild->_parentDataToShapeEdge == nullptr,
+        "replaceDataToShapeChild failed: newChild {} with usage {} already has parent {} with usage {}",
+        newChild->name(), newChild->usage(), newChild->_parentDataToShapeEdge->parent()->name(), newChild->_parentDataToShapeEdge->parent()->usage());
+
+    newChild->_parentDataToShapeEdge = edge;
+}
+
+void ModelObj::replaceDataToDataParent(
          const DataToDataAllocation& edge,
          const Data& newParent) {
      auto oldParent = edge->parent();
@@ -1571,7 +1595,7 @@ void ModelObj::replaceParentData(
      }
  }
  
-void ModelObj::replaceChildData(
+void ModelObj::replaceDataToDataChild(
          const DataToDataAllocation& edge,
          const Data& newChild) {
      auto parent = edge->parent();
diff --git a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp

index 405bb65..b00c95b 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
@@ -62,6 +62,7 @@ IE_SUPPRESS_DEPRECATED_START
          VPU_CONFIG_KEY(ENABLE_REPLACE_WITH_REDUCE_MEAN),
          VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING),
          VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR),
+        VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES),
  
          //
          // Debug options
@@ -168,6 +169,7 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
      setOption(_compileConfig.enableReplaceWithReduceMean,    switches, config, VPU_CONFIG_KEY(ENABLE_REPLACE_WITH_REDUCE_MEAN));
      setOption(_compileConfig.enableTensorIteratorUnrolling,  switches, config, VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING));
      setOption(_compileConfig.forcePureTensorIterator,        switches, config, VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR));
+    setOption(_compileConfig.disableConvertStages,           switches, config, VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES));
  
      setOption(_compileConfig.irWithVpuScalesDir, config, VPU_CONFIG_KEY(IR_WITH_SCALES_DIRECTORY));
      setOption(_compileConfig.noneLayers,    config, VPU_CONFIG_KEY(NONE_LAYERS), parseStringSet);
@@ -213,6 +215,9 @@ IE_SUPPRESS_DEPRECATED_END
      if (const auto envVar = std::getenv("IE_VPU_DUMP_ALL_PASSES")) {
          _compileConfig.dumpAllPasses = std::stoi(envVar) != 0;
      }
+    if (const auto envVar = std::getenv("IE_VPU_NUMBER_OF_SHAVES_AND_CMX_SLICES")) {
+        _compileConfig.numSHAVEs = _compileConfig.numCMXSlices = std::stoi(envVar);
+    }
  #endif
  }
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/broadcast.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/broadcast.cpp

new file mode 100644 (file)

index 0000000..a5a6582
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/broadcast.cpp
@@ -0,0 +1,159 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vpu/utils/numeric.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace vpu {
+
+namespace {
+
+class BroadcastStage final : public StageNode {
+public:
+    using StageNode::StageNode;
+
+protected:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<BroadcastStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        const auto inputOrder = input(0)->desc().dimsOrder();
+        auto outputOrder = DimsOrder::fromNumDims(output(0)->desc().numDims());
+
+        if (inputOrder.numDims() >= 3 && inputOrder.dimInd(Dim::C) == 0) {
+            outputOrder.moveDim(Dim::C, 0);
+        }
+
+        orderInfo.setOutput(outputEdge(0), outputOrder);
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement().remove(0));
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement().remove(0));
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+        const auto& dataPrecision = input(0)->desc().type();
+
+        VPU_THROW_UNLESS(numOutputs() == 1,
+                         "{} stage with name {} must have only 1 output, actually provided {} outputs",
+                         type(), name(), numOutputs());
+        if (mode == BroadcastMode::NUMPY) {
+            VPU_THROW_UNLESS(numInputs() == 2,
+                             "{} stage with name {} and numpy mode must have 2 inputs, actually "
+                             "provided {} inputs", type(), name(), numInputs());
+            assertInputsOutputsTypes(this,
+                                     {{dataPrecision}, {DataType::S32}},
+                                     {{dataPrecision}});
+
+        } else {
+            VPU_THROW_UNLESS(numInputs() == 3,
+                             "{} stage with name {} and explicit mode must have 3 inputs, actually "
+                             "provided {} inputs", type(), name(), numInputs());
+            assertInputsOutputsTypes(this,
+                                     {{dataPrecision}, {DataType::S32}, {DataType::S32}},
+                                     {{dataPrecision}});
+        }
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+        serializer.append(static_cast<uint32_t>(mode == BroadcastMode::NUMPY ? 0 : 1));
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        const auto mode = attrs().getOrDefault<BroadcastMode>("mode", BroadcastMode::NUMPY);
+
+        input(0)->serializeBuffer(serializer);
+        input(1)->serializeBuffer(serializer);
+        if (mode == BroadcastMode::EXPLICIT) {
+            input(2)->serializeBuffer(serializer);
+        }
+        output(0)->serializeBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseBroadcast(
+        const Model& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) const {
+    VPU_THROW_UNLESS(layer != nullptr,
+                     "parseBroadcast expects valid CNNLayerPtr, got nullptr");
+
+    VPU_THROW_UNLESS(outputs.size() == 1,
+                     "{} layer with name {} must have only 1 output, actually provided {} outputs",
+                     layer->type, layer->name, outputs.size());
+    const auto output = outputs[0];
+
+    const auto modeString = layer->GetParamAsString("mode", "numpy");
+    if (modeString == "numpy") {
+        VPU_THROW_UNLESS(inputs.size() == 2,
+                         "{} layer with name {} and numpy mode must have 2 inputs, actually "
+                         "provided {} inputs", layer->type, layer->name, inputs.size());
+    } else if (modeString == "explicit") {
+        VPU_THROW_UNLESS(inputs.size() == 3,
+                         "{} layer with name {} and explicit mode must have 3 inputs, actually "
+                         "provided {} inputs", layer->type, layer->name, inputs.size());
+        const auto axesMappingDesc = inputs[2]->desc();
+        const auto axesMappingPerm = axesMappingDesc.dimsOrder().toPermutation();
+        const auto axesMappingDim = axesMappingDesc.dim(axesMappingPerm.at(0));
+        VPU_THROW_UNLESS(axesMappingDesc.numDims() == 1,
+                         "{} layer with name {} and explicit mode must have 1D axesMapping tensor, "
+                         "actually provided {}D tensor",
+                         layer->type, layer->name, axesMappingDesc.numDims());
+        VPU_THROW_UNLESS(axesMappingDim == output->desc().numDims(),
+                         "{} layer with name {} and explicit mode must have axesMapping tensor with "
+                         "size equals to number of output dims, expected [{}], provided [{}]",
+                         layer->type, layer->name, output->desc().numDims(), axesMappingDim);
+
+    } else {
+        VPU_THROW_FORMAT("{} layer with name {}: Graph Transformer doesn't support {} mode",
+                         layer->type, layer->name, modeString);
+    }
+
+    const auto shape = inputs[1];
+    const auto shapeDesc = inputs[1]->desc();
+    const auto shapeDim = shapeDesc.dim(shapeDesc.dimsOrder().toPermutation().at(0));
+    VPU_THROW_UNLESS(shapeDesc.numDims() == 1,
+                     "{} layer with name {} and explicit mode must have 1D target shape tensor, "
+                     "actually provided {}D tensor",
+                     layer->type, layer->name, shapeDesc.numDims());
+    VPU_THROW_UNLESS(shapeDim == output->desc().numDims(),
+                     "{} layer with name {} and explicit mode must have target shape tensor with "
+                     "size equals to number of output dims, expected [{}], provided [{}]",
+                     layer->type, layer->name, output->desc().numDims(), shapeDim);
+
+    const auto mode = modeString == "numpy" ? BroadcastMode::NUMPY : BroadcastMode::EXPLICIT;
+
+    auto stage = model->addNewStage<BroadcastStage>(
+            layer->name,
+            StageType::Broadcast,
+            layer,
+            inputs,
+            outputs);
+
+    stage->attrs().set("mode", mode);
+}
+
+}  //namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/concat.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/concat.cpp

index 6371573..ee6a538 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/concat.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/concat.cpp
@@ -4,61 +4,74 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/utils/numeric.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
  #include <vector>
-#include <limits>
  #include <string>
-#include <algorithm>
  #include <memory>
-#include <set>
-#include <unordered_set>
  #include <utility>
  
-#include <vpu/utils/numeric.hpp>
-
  namespace vpu {
  
  namespace {
  
-class ConcatStage final : public StageNode {
+using InputEdges = details::ContainerRange<StageInputVector, false>;
+
+DimsOrder getMostSuitableOrder(const InputEdges& inputEdges) {
+    DimsOrderMap<int> dimsOrderVotes;
+    for (const auto& inEdge : inputEdges) {
+        dimsOrderVotes[inEdge->input()->desc().dimsOrder()]++;
+    }
+
+    // Select DimsOrder with most votes.
+    // For equal votes : HCW > CHW > HWC.
+
+    DimsOrder finalOrder;
+    int curVotes = -1;
+    for (const auto& p : dimsOrderVotes) {
+        if (p.second > curVotes) {
+            finalOrder = p.first;
+            curVotes = p.second;
+        } else if (p.second == curVotes) {
+            if (p.first.numDims() >= 3) {
+                if (p.first.dimInd(Dim::C) == 2) {
+                    finalOrder = p.first;
+                } else if (p.first.dimInd(Dim::C) == 3 &&
+                           finalOrder.dimInd(Dim::C) != 2) {
+                    finalOrder = p.first;
+                }
+            }
+        }
+    }
+
+    VPU_INTERNAL_CHECK(finalOrder.numDims() > 0,
+                       "getMostSuitableOrder must find order with rank which is grater than 0, "
+                       "actually rank is {}", finalOrder.numDims());
+    VPU_INTERNAL_CHECK(curVotes > 0,
+                       "getMostSuitableOrder: final order must have at least 1 vote "
+                       "actually votes number is {}", curVotes);
+
+    return finalOrder;
+}
+
+//
+// StubConcatStage will be replaced with Data <-> Data edges on special stage processor
+//
+
+class StubConcatStage final : public StageNode {
  public:
      using StageNode::StageNode;
  
  protected:
      StagePtr cloneImpl() const override {
-        return std::make_shared<ConcatStage>(*this);
+        return std::make_shared<StubConcatStage>(*this);
      }
  
      void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
-        auto output = outputEdge(0)->output();
-
-        DimsOrderMap<int> dimsOrderVotes;
-        for (const auto& inEdge : inputEdges()) {
-            dimsOrderVotes[inEdge->input()->desc().dimsOrder()]++;
-        }
-
-        // Select DimsOrder with most votes.
-        // For equal votes : HCW > CHW > HWC.
-
-        DimsOrder finalOrder;
-        int curVotes = -1;
-        for (const auto& p : dimsOrderVotes) {
-            if (p.second > curVotes) {
-                finalOrder = p.first;
-                curVotes = p.second;
-            } else if (p.second == curVotes) {
-                if (p.first.numDims() >= 3) {
-                    if (p.first.dimInd(Dim::C) == 2) {
-                        finalOrder = p.first;
-                    } else if (p.first.dimInd(Dim::C) == 3 &&
-                               finalOrder.dimInd(Dim::C) != 2) {
-                        finalOrder = p.first;
-                    }
-                }
-            }
-        }
-
-        IE_ASSERT(finalOrder.numDims() > 0);
-        IE_ASSERT(curVotes > 0);
+        const auto finalOrder = getMostSuitableOrder(inputEdges());
  
          for (const auto& inEdge : inputEdges()) {
              orderInfo.setInput(inEdge, finalOrder);
@@ -88,7 +101,10 @@ protected:
              }
          }
  
-        IE_ASSERT(minConcatDimInd < dimsOrder.numDims());
+        VPU_INTERNAL_CHECK(minConcatDimInd < dimsOrder.numDims(),
+                           "{} stage with name {} must have minConcatDimInd no greater than number "
+                           "of dimensions, actually index is {}, number of dimension is {}",
+                           type(), name(), minConcatDimInd, dimsOrder.numDims());
  
          //
          // Initial StridesRequirement for inputs and output.
@@ -157,39 +173,133 @@ protected:
      }
  
      void initialCheckImpl() const override {
-        IE_ASSERT(numInputs() > 0);
-        IE_ASSERT(numOutputs() == 1);
+        VPU_INTERNAL_CHECK(numInputs() > 0,
+                           "{} stage with name {} must have no less than 1 input, "
+                           "actually provided {} inputs", type(), name(), numInputs());
+        VPU_INTERNAL_CHECK(numOutputs() == 1,
+                           "{} stage with name {} must have only 1 output, "
+                           "actually provided {} outputs", type(), name(), numOutputs());
  
          const auto& firstInputPrecision = input(0)->desc().type();
          assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
      }
  
      void serializeParamsImpl(BlobSerializer&) const override {
-        VPU_THROW_EXCEPTION << "Must never be called";
+        VPU_THROW_FORMAT("{} stage with name {} must never call serializeParamsImpl",
+                         type(), name());
      }
  
      void serializeDataImpl(BlobSerializer&) const override {
-        VPU_THROW_EXCEPTION << "Must never be called";
+        VPU_THROW_FORMAT("{} stage with name {} must never call serializeDataImpl",
+                         type(), name());
+    }
+};
+
+//
+// ConcatStage will be inferred on device side
+//
+
+class ConcatStage final : public StageNode {
+public:
+    using StageNode::StageNode;
+
+protected:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<ConcatStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        const auto finalOrder = getMostSuitableOrder(inputEdges());
+
+        for (const auto& inEdge : inputEdges()) {
+            orderInfo.setInput(inEdge, finalOrder);
+        }
+
+        orderInfo.setOutput(outputEdge(0), finalOrder);
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, StridesRequirement().remove(0));
+        }
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement().remove(0));
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        VPU_INTERNAL_CHECK(numInputs() > 0,
+                           "{} stage with name {} must have no less than 1 input, "
+                           "actually provided {} inputs", type(), name(), numInputs());
+        VPU_INTERNAL_CHECK(numOutputs() == 1,
+                           "{} stage with name {} must have only 1 output, "
+                           "actually provided {} outputs", type(), name(), numOutputs());
+
+        const auto& firstInputPrecision = input(0)->desc().type();
+        assertAllInputsOutputsTypes(this, {firstInputPrecision}, {firstInputPrecision});
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        const auto axis = attrs().get<Dim>("axis");
+        const auto axisInd = input(0)->desc().dimsOrder().dimInd(axis);
+
+        serializer.append(static_cast<uint32_t>(axisInd));
+        serializer.append(static_cast<uint32_t>(numInputs()));
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        for (const auto& input : inputs()) {
+            input->serializeBuffer(serializer);
+        }
+        output(0)->serializeBuffer(serializer);
      }
  };
  
  }  // namespace
  
-void FrontEnd::parseConcat(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
-    IE_ASSERT(!inputs.empty());
-    IE_ASSERT(outputs.size() == 1);
+void FrontEnd::parseConcat(
+        const Model& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) const {
+    VPU_THROW_UNLESS(!inputs.empty(),
+                     "{} layer with name {} must have no less than 1 input, "
+                     "actually provided 0 inputs", layer->type, layer->name);
+    VPU_THROW_UNLESS(outputs.size() == 1,
+                     "{} layer with name {} must have only 1 output, actually provided {} outputs",
+                     layer->type, layer->name, outputs.size());
  
      auto output = outputs[0];
  
      auto concat = std::dynamic_pointer_cast<ie::ConcatLayer>(layer);
-    IE_ASSERT(layer != nullptr);
+    VPU_THROW_UNLESS(layer != nullptr,
+                     "{} layer with name {} must be able to convert to ie::ConcatLayer",
+                     layer->type, layer->name);
  
-    IE_ASSERT(concat->_axis < output->desc().numDims());
+    VPU_THROW_UNLESS(concat->_axis < output->desc().numDims(),
+                     "{} layer with name {} must have axis attribute no grater than number of "
+                     "dimensions, actually provided axis = {}, numDims = {}",
+                     layer->type, layer->name, concat->_axis, output->desc().numDims());
  
      auto perm = DimsOrder::fromNumDims(output->desc().numDims()).toPermutation();
      auto axis = perm[output->desc().numDims() - 1 - concat->_axis];
  
-    _stageBuilder->addConcatStage(model, concat->name, concat, axis, inputs, output);
+    // If there is DSR as concat's output in the transformed graph, then we need to infer
+    // concat on the device side. In other cases StubConcat stage will be added and it will
+    // be replace with Data <-> Data edges.
+    auto inferRequirement = ConcatInferRequirement::CanBeReplaced;
+    if (auto concatOp = std::dynamic_pointer_cast<ngraph::opset3::Concat>(layer->getNode())) {
+        inferRequirement = concatOp->get_output_as_single_output_node(0)->get_type_info() ==
+                      ngraph::vpu::op::DynamicShapeResolver::type_info
+                      ? ConcatInferRequirement::NeedToInfer
+                      : ConcatInferRequirement::CanBeReplaced;
+    }
+
+    _stageBuilder->addConcatStage(model, concat->name, concat, axis, inputs, output, inferRequirement);
  }
  
  Stage StageBuilder::addConcatStage(
@@ -198,17 +308,28 @@ Stage StageBuilder::addConcatStage(
          const ie::CNNLayerPtr& layer,
          Dim axis,
          const DataVector& inputs,
-        const Data& output) {
+        const Data& output,
+        ConcatInferRequirement inferRequirement) {
      std::vector<DimValues> offsets;
      offsets.reserve(inputs.size());
  
-    DimValues curOffset({{axis, 0}});
-    for (const auto& input : inputs) {
-        offsets.emplace_back(curOffset);
-        curOffset.set(axis, curOffset[axis] + input->desc().dim(axis));
-    }
+    Stage stage;
+    if (inferRequirement == ConcatInferRequirement::NeedToInfer) {
+        stage = model->addNewStage<ConcatStage>(
+                layer->name,
+                StageType::Concat,
+                layer,
+                inputs,
+                {output});
+    } else {
+        DimValues curOffset({{axis, 0}});
+        for (const auto& input : inputs) {
+            offsets.emplace_back(curOffset);
+            curOffset.set(axis, curOffset[axis] + input->desc().dim(axis));
+        }
  
-    auto stage = addConcatStage(model, name, layer, std::move(offsets), inputs, output);
+        stage = addConcatStage(model, name, layer, std::move(offsets), inputs, output);
+    }
  
      stage->attrs().set("axis", axis);
  
@@ -222,11 +343,13 @@ Stage StageBuilder::addConcatStage(
          std::vector<DimValues>&& offsets,
          const DataVector& inputs,
          const Data& output) {
-    IE_ASSERT(offsets.size() == inputs.size());
+    VPU_INTERNAL_CHECK(offsets.size() == inputs.size(),
+                       "offsets count (provided {}) must be equal to inputs count (provided {}) to "
+                       "create Concat stage with name {}", offsets.size(), inputs.size(), name);
  
-    auto stage = model->addNewStage<ConcatStage>(
+    auto stage = model->addNewStage<StubConcatStage>(
          name,
-        StageType::Concat,
+        StageType::StubConcat,
          layer,
          inputs,
          {output});
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/convolution.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/convolution.cpp

index 65d1fb8..1f8878e 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/convolution.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/convolution.cpp
@@ -93,7 +93,14 @@ bool canTryHW(const int outputNumDims,
          tryHW = false;
      }
  
-    if (kernelSizeX > 15 || kernelSizeY > 15 || kernelStrideX > 8) {
+    // 1x16 convolution is split into two 1x8 convolutions in splitLargeKernelConv pass
+    const bool KernelSizeCantBeSplit = !(kernelSizeX == 16 && kernelSizeY == 1);
+    const bool KernelSizeTooLarge = (kernelSizeX > 15 || kernelSizeY > 15);
+    if (KernelSizeTooLarge && KernelSizeCantBeSplit) {
+        tryHW = false;
+    }
+
+    if (kernelStrideX > 8) {
          tryHW = false;
      }
  
@@ -139,6 +146,10 @@ void parseConv2D(const Model      & model,
  
      int groupSize = convLayer->_group;
  
+    // kernelStrideY doesn't matter when kernelSizeY==InputSizeY, change it to try HW in 1D case
+    if (kernelSizeY == input->desc().dim(Dim::H) + padTop + padBottom)
+        kernelStrideY = kernelStrideX;
+
      //
      // Check if HW is applicable
      //
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp

index 55844e0..27cc400 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp
@@ -13,14 +13,14 @@
  #include <memory>
  #include <string>
  #include <map>
-#include <unordered_set>
  #include <utility>
  #include <algorithm>
  #include <tuple>
  
  namespace vpu {
  
-static void calcSizesFromParams(const DataDesc &desc, const SmallVector<std::string> &bufferSizeRules, SmallVector<int, 3> &sizes);
+static SmallVector<int> calcSizesFromParams(const DataDesc& desc, const SmallVector<std::string>& bufferSizeRules,
+                                            std::map<std::string, std::string> layerParams);
  
  namespace {
  
@@ -95,17 +95,14 @@ private:
      }
  
      void serializeParamsImpl(BlobSerializer& serializer) const override {
-        const auto& customLayer = attrs().get<CustomLayer::Ptr>("customLayer");
-        const auto& gws = attrs().get<SmallVector<int, 3>>("gws");
-        const auto& lws = attrs().get<SmallVector<int, 3>>("lws");
+        const auto& kernel = attrs().get<CustomKernel>("customKernel");
+        const auto& gws = attrs().get<SmallVector<int>>("gws");
+        const auto& lws = attrs().get<SmallVector<int>>("lws");
          const auto& ports = attrs().get<std::map<std::string, int>>("ports");
-
-        //
-        // GWG, LWG, Offs
-        //
+        const auto& localDataSizes = attrs().get<std::map<std::string, int>>("localDataSizes");
  
          for (int i = 0; i < gws.size(); ++i) {
-            serializer.append(static_cast<uint32_t>(gws[i]/lws[i]));
+            serializer.append(static_cast<uint32_t>(gws[i] / lws[i]));
          }
  
          for (auto x : lws) {
@@ -116,172 +113,146 @@ private:
              serializer.append(static_cast<uint32_t>(0));
          }
  
-        serializer.append(static_cast<uint32_t>(customLayer->maxShaves()));
-
-        //
-        // Kernel Id
-        //
-
-        serializer.append(static_cast<uint32_t>(customLayer->kernelId()));
-
-        //
-        // Number of inputs
-        //
-
-        IE_ASSERT(customLayer->stageNumInputs() >= 0);
-        serializer.append(static_cast<uint32_t>(customLayer->stageNumInputs()));
-
-        //
-        // Total number of blobs
-        //
-
+        serializer.append(static_cast<uint32_t>(kernel.maxShaves()));
+        serializer.append(static_cast<uint32_t>(kernel.kernelId()));
+        serializer.append(static_cast<uint32_t>(kernel.inputDataCount()));
          serializer.append(static_cast<int32_t>(numInputs() + numOutputs()));
+        serializer.append(static_cast<uint32_t>(kernel.parameters().size()));
  
-        //
-        // Number of kernel parameters
-        //
-
-        serializer.append(static_cast<uint32_t>(customLayer->parameters().size()));
-
-        //
-        // Parameters & relocation info
-        //
-
-        std::map<std::string, CustomLayer::KernelParam> b2b;
-        for (const auto& kp : customLayer->bindings()) {
+        std::map<std::string, CustomKernel::KernelParam> b2b;
+        for (const auto& kp : kernel.bindings()) {
              b2b[kp.argName] = kp;
          }
  
          IE_ASSERT(origLayer() != nullptr);
  
-        for (const auto& kp : customLayer->parameters()) {
+        for (const auto& kp : kernel.parameters()) {
              const auto& parameter = b2b[kp];
  
              switch (parameter.type) {
-                case CustomParamType::Input:
-                case CustomParamType::Output:
-                case CustomParamType::InputBuffer:
-                case CustomParamType::OutputBuffer:
-                case CustomParamType::Data:
-                {
-                    if (ports.find(kp) == ports.end()) {
-                        VPU_THROW_EXCEPTION
-                            << "Unable to bind parameter " << parameter.argName << " for "
-                            << origLayer()->type <<" layer. Name is: " << origLayer()->name;
-                    }
-                    int id = ports.find(kp)->second;
-                    serializer.append(static_cast<uint32_t>(0));
-                    serializer.append(static_cast<uint32_t>(id));
+            case CustomParamType::Input:
+            case CustomParamType::Output:
+            case CustomParamType::InputBuffer:
+            case CustomParamType::OutputBuffer:
+            case CustomParamType::Data: {
+                VPU_THROW_UNLESS(ports.find(kp) != ports.end(),
+                    "XML specification for %s layer has no definition for %s parameter. Layer name: %s",
+                    origLayer()->type, kp, origLayer()->name);
+
+                int id = ports.find(kp)->second;
+                serializer.append(static_cast<uint32_t>(0));
+                serializer.append(static_cast<uint32_t>(id));
+                break;
+            }
+            case CustomParamType::Int:
+            case CustomParamType::Float: {
+                const auto cnnParam = origLayer()->params.find(parameter.irSource);
+                if (cnnParam != origLayer()->params.end()) {
+                    const auto param = [&]() -> std::string {
+                        if (parameter.portIndex < 0) {
+                            return cnnParam->second;
+                        }
  
-                    break;
-                }
-                case CustomParamType::Int:
-                case CustomParamType::Float:
-                {
-                    if (origLayer()->params.find(parameter.irSource) != origLayer()->params.end()) {
-                        std::stringstream parameterStream(origLayer()->params[parameter.irSource]);
-                        std::string param;
+                        VPU_THROW_UNLESS(cnnParam->second.find(',') != std::string::npos,
+                            "Error while parsing CNNetwork parameter '%s' for '%s' layer: port-index=%d is set, "
+                            "but parameter is neither a tensor, nor an array type.",
+                            cnnParam->first, origLayer()->type, parameter.portIndex);
+
+                        std::string value;
+                        std::stringstream parameterStream{cnnParam->second};
                          for (int i = 0; i <= parameter.portIndex; i++) {
-                            getline(parameterStream, param, ',');
+                            getline(parameterStream, value, ',');
                          }
+                        return value;
+                    }();
  
-                        if (parameter.type == CustomParamType::Int) {
-                            serializer.append(static_cast<int32_t>(std::stoi(param)));
-                            serializer.append(static_cast<int32_t>(-1));
-                        } else {
-                            serializer.append(static_cast<float>(std::stof(param) ));
-                            serializer.append(static_cast<int32_t>(-2));
-                        }
-                        break;
+                    if (parameter.type == CustomParamType::Int) {
+                        serializer.append(static_cast<int32_t>(std::stoi(param)));
+                        serializer.append(static_cast<int32_t>(-1));
                      } else {
-                        auto pos = parameter.irSource.find_first_of('.');
-                        if (pos != std::string::npos) {
-                            auto blob = parameter.irSource.substr(0, pos);
-                            auto dim = parameter.irSource.substr(pos + 1, std::string::npos);
-
-                            IE_ASSERT(dim.length() == 1)
-                                    << "Unable to deduce parameter " << parameter.argName << " for "
-                                    << origLayer()->type <<" layer. Name is: " << origLayer()->name;
-                            char dimLetter = dim[0];
-
-                            ie::DataPtr origData;
-                            if (blob == "I") {
-                                origData = origLayer()->insData[parameter.portIndex].lock();
-                            } else {
-                                origData = origLayer()->outData[parameter.portIndex];
-                            }
-                            IE_ASSERT(origData != nullptr);
+                        serializer.append(static_cast<float>(std::stof(param)));
+                        serializer.append(static_cast<int32_t>(-2));
+                    }
+                    break;
+                } else {
+                    auto pos = parameter.irSource.find_first_of('.');
+                    if (pos != std::string::npos) {
+                        auto blob = parameter.irSource.substr(0, pos);
+                        auto dim = parameter.irSource.substr(pos + 1, std::string::npos);
  
-                            auto dims = origData->getDims();
-                            int ndims = dims.size();
+                        VPU_THROW_UNLESS(dim.length() == 1,
+                            "Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+                            parameter.argName, origLayer()->type, origLayer()->name);
  
-                            if (ndims > 4)
-                                VPU_THROW_EXCEPTION
-                                    << "Unable to deduce parameter " << parameter.argName << " for "
-                                    << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+                        char dimLetter = dim[0];
  
-                            const std::map<char, int> vars = {
-                                { 'b', 0 }, { 'B', 0 },
-                                { 'f', 1 }, { 'F', 1 },
-                                { 'y', 2 }, { 'Y', 2 },
-                                { 'x', 3 }, { 'X', 3 },
-                            };
+                        ie::DataPtr origData;
+                        if (blob == "I") {
+                            origData = origLayer()->insData[parameter.portIndex].lock();
+                        } else {
+                            origData = origLayer()->outData[parameter.portIndex];
+                        }
+                        IE_ASSERT(origData != nullptr);
  
-                            auto var = vars.find(dimLetter);
-                            if (var != vars.end()) {
-                                auto res = dims.at(var->second-4+ndims);
+                        auto dims = origData->getDims();
+                        int ndims = dims.size();
+
+                        if (ndims > 4) {
+                            VPU_THROW_UNLESS(dim.length() == 1,
+                                 "Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+                                 parameter.argName, origLayer()->type, origLayer()->name);
+                        }
+                        const std::map<char, int> vars = {
+                            {'b', 0}, {'B', 0},
+                            {'f', 1}, {'F', 1},
+                            {'y', 2}, {'Y', 2},
+                            {'x', 3}, {'X', 3},
+                        };
+
+                        auto var = vars.find(dimLetter);
+                        if (var != vars.end()) {
+                            auto res = dims.at(var->second - 4 + ndims);
+
+                            serializer.append(static_cast<uint32_t>(res));
+                            serializer.append(static_cast<int32_t>(-1));
+                        } else {
+                            VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+                                parameter.argName, origLayer()->type, origLayer()->name);
+                        }
  
-                                serializer.append(static_cast<uint32_t>(res));
+                        break;
+                    } else {
+                        VPU_THROW_UNLESS(parameter.portIndex < 0,
+                            "Unable to deduce parameter '%s' for '%s' layer: port-index=%d is set, "
+                            "but parameter is neither a tensor, nor an array type.",
+                            parameter.argName, origLayer()->type, parameter.portIndex);
+                        try {
+                            if (parameter.type == CustomParamType::Int) {
+                                serializer.append(static_cast<int32_t>(std::stoi(parameter.irSource)));
                                  serializer.append(static_cast<int32_t>(-1));
                              } else {
-                                VPU_THROW_EXCEPTION
-                                    << "Unable to deduce parameter " << parameter.argName << " for "
-                                    << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+                                serializer.append(static_cast<float>(std::stof(parameter.irSource)));
+                                serializer.append(static_cast<int32_t>(-2));
                              }
-
                              break;
-                        } else {
-                            try {
-                                if (parameter.type == CustomParamType::Int) {
-                                    serializer.append(static_cast<int32_t>(std::stoi(parameter.irSource)));
-                                    serializer.append(static_cast<int32_t>(-1));
-                                } else {
-                                    serializer.append(static_cast<float>(std::stof(parameter.irSource) ));
-                                    serializer.append(static_cast<int32_t>(-2));
-                                }
-                                break;
-                            }
-                            catch (const std::invalid_argument&) {
-                                VPU_THROW_EXCEPTION
-                                    << "Unable to deduce parameter " << parameter.argName << " for "
-                                    << origLayer()->type <<" layer. Name is: " << origLayer()->name
-                                    <<", parameter is: " << parameter.irSource;
-                            }
+                        } catch (const std::invalid_argument&) {
+                            VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. "
+                                "Name is: '%s', parameter is: '%s'",
+                                parameter.argName, origLayer()->type, origLayer()->name, parameter.irSource);
                          }
                      }
                  }
-                case CustomParamType::LocalData:
-                {
-                    ie::DataPtr origData;
-                    if (parameter.dimSource == CustomDimSource::Input) {
-                        origData = origLayer()->insData[parameter.dimIdx].lock();
-                    } else {
-                        origData = origLayer()->outData[parameter.dimIdx];
-                    }
-                    IE_ASSERT(origData != nullptr);
-
-                    SmallVector<int, 3> sizes;
-                    calcSizesFromParams(DataDesc(origData->getTensorDesc()), parameter.bufferSizeRules, sizes);
-
-                    serializer.append(static_cast<int32_t>(sizes[0] * sizes[1] * sizes[2]));
-                    serializer.append(static_cast<int32_t>(-3));
+            }
+            case CustomParamType::LocalData: {
+                const auto size = localDataSizes.at(parameter.argName);
+                serializer.append(static_cast<int32_t>(size));
+                serializer.append(static_cast<int32_t>(-3));
  
-                    break;
-                }
-                default:
-                    VPU_THROW_EXCEPTION
-                        << "Unable to deduce parameter " << parameter.argName << " for "
-                        << origLayer()->type <<" layer. Name is: " << origLayer()->name;
+                break;
+            }
+            default:
+                VPU_THROW_FORMAT("Unable to deduce parameter '%s' for '%s' layer. Name is: '%s'",
+                    parameter.argName, origLayer()->type, origLayer()->name);
              }
          }
      }
@@ -305,80 +276,59 @@ private:
  
  }  // namespace
  
-static void calcSizesFromParams(const DataDesc &desc, const SmallVector<std::string> &bufferSizeRules, SmallVector<int, 3> &sizes) {
-    // assume output tensor is dimension source by default
-    auto batchDim = desc.dim(Dim::N, 1);
-    auto featureDim = desc.dim(Dim::C, 1);
-    auto yDim = desc.dim(Dim::H, 1);
-    auto xDim = desc.dim(Dim::W, 1);
-
-    const std::map<char, int> vars = {
-        { 'b', batchDim },   { 'B', batchDim },
-        { 'f', featureDim }, { 'F', featureDim },
-        { 'y', yDim },       { 'Y', yDim },
-        { 'x', xDim },       { 'X', xDim },
-    };
+static SmallVector<int> calcSizesFromParams(const DataDesc& desc, const SmallVector<std::string>& bufferSizeRules,
+                                            std::map<std::string, std::string> layerParams) {
+    {
+        const auto B = std::to_string(desc.dim(Dim::N, 1));
+        const auto F = std::to_string(desc.dim(Dim::C, 1));
+        const auto Y = std::to_string(desc.dim(Dim::H, 1));
+        const auto X = std::to_string(desc.dim(Dim::W, 1));
+
+        auto sizes = std::vector<std::pair<std::string, std::string>> {
+            {"b", B}, {"B", B},
+            {"f", F}, {"F", F},
+            {"y", Y}, {"Y", Y},
+            {"x", X}, {"X", X},
+        };
  
-    sizes.reserve(std::max<size_t>(bufferSizeRules.size(), 3));
-    for (const auto& rule : bufferSizeRules) {
-        SimpleMathExpression expr;
-        expr.setVariables(vars);
-        expr.parse(rule);
-        sizes.emplace_back(expr.evaluate());
-    }
-    while (sizes.size() < 3) {
-        sizes.emplace_back(1);
+        std::move(begin(sizes), end(sizes), inserter(layerParams, end(layerParams)));
      }
-}
  
-static CustomLayer::Ptr chooseSuitable(const std::vector<CustomLayer::Ptr>& customLayers,
-                                       const std::map<std::string, std::string>& layerParams) {
-    ie::details::CaselessEq<std::string> cmp;
+    MathExpression expr;
+    expr.setVariables(layerParams);
+    const auto parseSizeRule = [&expr](const std::string& rule) {
+        expr.parse(rule);
+        return expr.evaluate();
+    };
  
-    for (const auto& customLayer : customLayers) {
-        bool suitable = true;
-        for (const auto& whereParam : customLayer->whereParams()) {
-            if (layerParams.find(whereParam.first) == layerParams.end() ||
-                !cmp(layerParams.find(whereParam.first)->second, whereParam.second)) {
-                suitable = false;
-            }
-        }
-        if (suitable) {
-            return customLayer;
-        }
-    }
+    auto sizes = SmallVector<int>{};
+    sizes.reserve(bufferSizeRules.size());
+    std::transform(begin(bufferSizeRules), end(bufferSizeRules), std::back_inserter(sizes), parseSizeRule);
  
-    IE_ASSERT(false);
-    return CustomLayer::Ptr(nullptr);
+    return sizes;
  }
  
  void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) {
      IE_ASSERT(layer != nullptr);
      IE_ASSERT(outputs.size() == 1);
  
-    std::vector<CustomLayer::Ptr> customLayersForType;
-    if (_customLayers.count(layer->type) > 0) {
-        customLayersForType.push_back(chooseSuitable(_customLayers.find(layer->type)->second, layer->params));
-    } else if (_customLayers.count(layer->type + "@stage_0") > 0) {
-        int stageNum = 0;
-        while (_customLayers.count(layer->type + "@stage_" + std::to_string(stageNum)) > 0) {
-            customLayersForType.push_back(chooseSuitable(_customLayers.find(layer->type + "@stage_" + std::to_string(stageNum))->second,
-                                                         layer->params));
-            stageNum++;
-        }
-    } else {
-        IE_ASSERT(false);
-    }
+    const auto suitableLayer = [&] {
+        const auto customLayersForType = _customLayers.find(layer->type);
+        IE_ASSERT(customLayersForType != _customLayers.end());
+        return getSuitableCustomLayer(customLayersForType->second, layer);
+    }();
+    IE_ASSERT(suitableLayer);
  
+    const auto kernels = suitableLayer->kernels();
      // Get all buffers, buffers must be unique associated by port index
      std::map<int, Data> tempBuffsMap;
-    for (size_t stageNum = 0; stageNum < customLayersForType.size(); stageNum++) {
-        for (auto& param : customLayersForType[stageNum]->bindings()) {
+    for (const auto& kernel : kernels) {
+        for (const auto& param : kernel.bindings()) {
              if (param.type == CustomParamType::InputBuffer || param.type == CustomParamType::OutputBuffer) {
-                SmallVector<int, 3> sizes;
-                auto desc = (param.dimSource == CustomDimSource::Input) ? inputs[param.dimIdx]->desc() : outputs[param.dimIdx]->desc();
-                calcSizesFromParams(desc, param.bufferSizeRules, sizes);
-                auto buf = model->addNewData("custom_" + layer->type + "_buf", DataDesc({sizes[0], sizes[1], sizes[2], 1}));
+                const auto desc = (param.dimSource == CustomDimSource::Input) ? inputs[param.dimIdx]->desc()
+                                                                              : outputs[param.dimIdx]->desc();
+                const auto sizes = calcSizesFromParams(desc, { param.bufferSizeRule }, layer->params);
+                const auto buf = model->addNewData("custom_" + layer->type + "_buf", DataDesc({sizes[0], 1, 1, 1}));
                  if (tempBuffsMap.find(param.portIndex) == tempBuffsMap.end()) {
                      tempBuffsMap[param.portIndex] = buf;
                  }
@@ -387,15 +337,15 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
      }
  
      // Gather inputs and outputs for each stage for the layer
-    for (int stage_num = 0; stage_num < customLayersForType.size(); stage_num++) {
-        auto customLayer = customLayersForType[stage_num];
+    for (int stage_num = 0; stage_num < kernels.size(); stage_num++) {
+        const auto& kernel = kernels[stage_num];
  
          std::map<std::string, int> ports;
          std::vector<CustomDataFormat> formats;
  
          // Gather inputs
          DataVector stageInputs;
-        for (auto& param : customLayer->bindings()) {
+        for (auto& param : kernel.bindings()) {
              if (param.type == CustomParamType::Input) {
                  ports[param.argName] = stageInputs.size();
                  formats.emplace_back(param.format);
@@ -408,7 +358,7 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
          }
  
          // Gather data blobs
-        for (auto& param : customLayer->bindings()) {
+        for (auto& param : kernel.bindings()) {
              if (param.type == CustomParamType::Data) {
                  auto blobIterator = layer->blobs.find(param.irSource);
                  if (blobIterator != layer->blobs.end()) {
@@ -424,27 +374,26 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
              }
          }
  
-        customLayer->setStageNumInputs(stageInputs.size());
          formats.emplace_back(CustomDataFormat::Any);
  
          // Get kernel binary
-        auto kernelNode = _kernelNodes.find(customLayer->kernelBinary());
+        auto kernelNode = _kernelNodes.find(kernel.kernelBinary());
          if (kernelNode != _kernelNodes.end()) {
              stageInputs.emplace_back((kernelNode->second));
          } else {
-            auto kernelBinaryDesc = DataDesc({customLayer->kernelBinary().length()});
+            auto kernelBinaryDesc = DataDesc({kernel.kernelBinary().length()});
              kernelBinaryDesc.setType(DataType::U8);
  
              auto kernelBinary = model->addConstData(
                  layer->type + "@kernelBinary",
                  kernelBinaryDesc,
-                std::make_shared<KernelBinaryContent>(customLayer->kernelBinary()));
+                std::make_shared<KernelBinaryContent>(kernel.kernelBinary()));
              stageInputs.emplace_back((kernelBinary));
-            _kernelNodes[customLayer->kernelBinary()] = kernelBinary;
+            _kernelNodes[kernel.kernelBinary()] = kernelBinary;
          }
  
          DataVector stageOutputs;
-        for (auto& param : customLayer->bindings()) {
+        for (auto& param : kernel.bindings()) {
              if (param.type == CustomParamType::Output) {
                  ports[param.argName] = stageInputs.size() + stageOutputs.size();
                  stageOutputs.emplace_back(outputs[param.portIndex]);
@@ -455,30 +404,45 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
          }
  
          auto stage = model->addNewStage<CustomStage>(
-            layer->name + ((customLayersForType.size() == 1) ? "" : "@stage_" + std::to_string(stage_num)),
+            layer->name + ((kernels.size() == 1) ? "" : "@stage_" + std::to_string(stage_num)),
              StageType::Custom,
              layer,
              stageInputs,
              stageOutputs);
  
-        stage->attrs().set("customLayer", customLayer);
+        stage->attrs().set("customKernel", suitableLayer->kernels()[stage_num]);
          stage->attrs().set("ports", ports);
          stage->attrs().set("formats", formats);
  
-        SmallVector<int, 3> gws;
-        SmallVector<int, 3> lws;
-        auto dimSource = (customLayer->dimSource() == CustomDimSource::Input) ? inputs : outputs;
-        calcSizesFromParams(dimSource[customLayer->dimSourceIndex()]->desc(), customLayer->globalSizeRules(), gws);
-        calcSizesFromParams(dimSource[customLayer->dimSourceIndex()]->desc(), customLayer->localSizeRules(), lws);
+        const auto& dimSource = (kernel.dimSource() == CustomDimSource::Input) ? inputs : outputs;
+        const auto& dataDesc = dimSource[kernel.dimSourceIndex()]->desc();
+
+        const auto gws = calcSizesFromParams(dataDesc, kernel.globalGridSizeRules(), layer->params);
+        const auto lws = calcSizesFromParams(dataDesc, kernel.localGridSizeRules(), layer->params);
  
          stage->attrs().set("gws", gws);
          stage->attrs().set("lws", lws);
  
+        const auto localDataSizes = [&] {
+            auto sizes = std::map<std::string, int>{};
+            for (const auto& bind : kernel.bindings()) {
+                if (bind.type == CustomParamType::LocalData) {
+                    const auto& source = bind.dimSource == CustomDimSource::Input ? inputs : outputs;
+                    const auto& desc = source[bind.dimIdx]->desc();
+                    const auto size = calcSizesFromParams(desc, { bind.bufferSizeRule }, layer->params);
+                    sizes.emplace(bind.argName, size[0]);
+                }
+            }
+            return sizes;
+        }();
+
+        stage->attrs().set("localDataSizes", localDataSizes);
+
          std::map<int, DimsOrder> inputOrders;
          std::map<int, DimsOrder> outputOrders;
  
-        std::map<std::string, CustomLayer::KernelParam> b2b;
-        for (const auto& kp : customLayer->bindings()) {
+        std::map<std::string, CustomKernel::KernelParam> b2b;
+        for (const auto& kp : kernel.bindings()) {
              b2b[kp.argName] = kp;
          }
  
@@ -489,7 +453,7 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
              { CustomDataFormat::FYX, DimsOrder::CHW }
          };
  
-        for (const auto& kp : customLayer->parameters()) {
+        for (const auto& kp : kernel.parameters()) {
              const auto& parameter = b2b[kp];
  
              if (parameter.type == CustomParamType::Input) {
@@ -512,7 +476,7 @@ void FrontEnd::parseCustom(const Model& model, const ie::CNNLayerPtr& layer, con
          stage->attrs().set("inputOrders", std::move(inputOrders));
          stage->attrs().set("outputOrders", std::move(outputOrders));
  
-        int buffer_size = customLayer->kernelBinary().length() + 1024;
+        int buffer_size = kernel.kernelBinary().length() + 1024;
          model->addTempBuffer(
              stage,
              DataDesc({buffer_size}));
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp

index 82b23c6..cb95807 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp
@@ -7,48 +7,53 @@
  namespace vpu {
  
  void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
-    VPU_THROW_UNLESS(inputs.size() == 2, "Error while parsing {} with type {}, got {} inputs, while {} were expected",
+    VPU_THROW_UNLESS(inputs.size() == 2, "Error while parsing {} of type {}, got {} inputs, while {} were expected",
          layer->name, layer->type, inputs.size(), 2);
-
-    VPU_THROW_UNLESS(outputs.size() == 1, "Error while parsing {} with type {}, got {} outputs, while {} were expected",
-                     layer->name, layer->type, outputs.size(), 1);
-
      const auto& data = inputs[0];
      const auto& shape = inputs[1];
  
+    VPU_THROW_UNLESS(outputs.size() == 1, "Parsing layer {} of type {} failed: got {} outputs, while {} were expected",
+         layer->name, layer->type, outputs.size(), 1);
      const auto& dataOutput = outputs[0];
  
+    const auto dataProducerEdge = data->producerEdge();
+    VPU_THROW_UNLESS(dataProducerEdge != nullptr, "Parsing layer {} of type {} failed: input with index {} (of name {}) must have a producer",
+        layer->name, layer->type, 0, data->name());
+
      VPU_THROW_UNLESS(shape->desc().numDims() == 1,
-        "Error while parsing {} with type {}, the number of dimensions for the second input {} should be equal to 1 "
-        "but got {} instead",
-        layer->name, layer->type, shape->name(), shape->desc().numDims());
+        "Parsing layer {} of type {} failed: input with index {} (of name {}) must have rank equal to {}, actual is {}",
+        layer->name, layer->type, 0, shape->name(), 1, shape->desc().numDims());
  
      VPU_THROW_UNLESS(shape->desc().totalDimSize() == data->desc().numDims(),
-        "Error while parsing {} with type {}, the total number of elements for the second input {} should be equal to "
-        "the number of dimensions for the first input {}, but got {} and {} respectively",
-        layer->name, layer->type, shape->name(), data->name(), shape->desc().totalDimSize(), data->desc().numDims());
+        "Parsing layer {} of type {} failed: input with index {} (of name {}) must have the same total elements number as "
+        "input with index {} (of name {}), actual {} and {} respectively",
+        layer->name, layer->type, 0, shape->name(), 1, data->name(), shape->desc().totalDimSize(), data->desc().numDims());
  
-    // Dynamic input shape is unsupported
-    VPU_THROW_UNLESS(data->producer() != nullptr,
-        "Parsing layer {} with type {} failed: DSR stages must have a producer, but actually it doesn't",
-        layer->name, layer->type);
+    const auto shapeProducerEdge = shape->producerEdge();
+    VPU_THROW_UNLESS(shapeProducerEdge != nullptr, "Parsing layer {} of type {} failed: input with index {} (of name {}) must have a producer",
+        layer->name, layer->type, 1, shape->name());
  
-    const auto dataOutputEdge = data->producerEdge();
-    const auto shapeOutputEdge = shape->producerEdge();
+    model->replaceStageOutput(dataProducerEdge, dataOutput);
+    if (const auto& dataToShapeEdge = data->parentDataToShapeEdge()) {
+        model->replaceDataToShapeChild(dataToShapeEdge, dataOutput);
+    }
+    model->removeUnusedData(data);
  
      if (dataOutput->usage() == DataUsage::Output) {
          // Create the second output with shape in case of dynamic output
          const auto& shapeOutput = model->addOutputData(dataOutput->name() + "@shape", shape->desc());
  
-        model->replaceStageOutput(shapeOutputEdge, shapeOutput);
+        model->replaceStageOutput(shapeProducerEdge, shapeOutput);
+        model->connectDataWithShape(shapeOutput, dataOutput);
+
+        for (const auto& dataToShapeEdge : shape->childDataToShapeEdges()) {
+            model->replaceDataToShapeParent(dataToShapeEdge, shapeOutput);
+        }
  
          model->removeUnusedData(shape);
      } else {
          model->connectDataWithShape(shape, dataOutput);
      }
-
-    model->replaceStageOutput(dataOutputEdge, dataOutput);
-    model->removeUnusedData(data);
  }
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp

new file mode 100644 (file)

index 0000000..66d794c
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <vector>
+#include <map>
+#include <unordered_set>
+#include <memory>
+#include <set>
+
+namespace vpu {
+
+namespace {
+
+class OutShapeOfReshapeStage final : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<OutShapeOfReshapeStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+                                 {{DataType::S32}, {DataType::S32}},
+                                 {{DataType::S32}});
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        auto specialZero = attrs().get<bool>("specialZero");
+
+        serializer.append(static_cast<int32_t>(specialZero));
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        input(0)->serializeBuffer(serializer);
+        input(1)->serializeBuffer(serializer);
+        output(0)->serializeBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseOutShapeOfReshape(
+        const Model& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) const {
+    VPU_THROW_UNLESS(inputs.size() == 2,
+                     "OutShapeOfReshape stage with name %s must have only 2 inputs, "
+                     "actually provided %d", layer->name, inputs.size());
+    VPU_THROW_UNLESS(outputs.size() == 1,
+                     "OutShapeOfReshape stage with name %s must have only 1 output, "
+                     "actually provided %d", layer->name, outputs.size());
+
+    auto inDataShape = inputs[0];
+    auto outShapeDescriptor = inputs[1];
+    auto outDataShape = outputs[0];
+
+    VPU_THROW_UNLESS(inDataShape->desc().numDims() == 1,
+                     "OutShapeOfReshape stage with name %s must have 1D input data shape tensor, "
+                     "actually provided %dD tensor", layer->name, inDataShape->desc().numDims());
+    VPU_THROW_UNLESS(outShapeDescriptor->desc().numDims() == 1,
+                     "OutShapeOfReshape stage with name %s must have 1D output shape descriptor "
+                     "tensor, actually provided %dD tensor",
+                     layer->name, outShapeDescriptor->desc().numDims());
+    VPU_THROW_UNLESS(outDataShape->desc().numDims() == 1,
+                     "OutShapeOfReshape stage with name %s must have 1D output data shape tensor, "
+                     "actually provided %dD tensor", layer->name, outDataShape->desc().numDims());
+
+    VPU_THROW_UNLESS(outShapeDescriptor->desc().totalDimSize() == outDataShape->desc().totalDimSize(),
+                     "OutShapeOfReshape stage with name %s must have output shape descriptor and "
+                     "output data shape tensor with equal length, actually provided %d vs %d",
+                     layer->name, outShapeDescriptor->desc().totalDimSize(),
+                     outDataShape->desc().totalDimSize());
+
+
+    auto outShapeOfReshapeStage = model->addNewStage<OutShapeOfReshapeStage>(
+            layer->name,
+            StageType::OutShapeOfReshape,
+            layer,
+            inputs,
+            outputs);
+
+    auto specialZero = layer->GetParamAsInt("special_zero", 0);
+    outShapeOfReshapeStage->attrs().set<bool>("specialZero", specialZero);
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/pooling.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/pooling.cpp

index d493988..2877522 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/pooling.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/pooling.cpp
@@ -228,6 +228,10 @@ void parsePool2D(const     Model      & model,
      int outputWidth = output->desc().dim(Dim::W);
      int outputHeight = output->desc().dim(Dim::H);
  
+    // kernelStrideY doesn't matter when kernelSizeY==InputSizeY, change it to try HW in 1D case
+    if (kernelSizeY == inputHeight + padTop + padBottom)
+        kernelStrideY = kernelStrideX;
+
      bool tryHW = canTryHW(poolType,
                            inputWidth,
                            inputHeight,
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp

index 6d211d3..b898b4e 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp
@@ -3,12 +3,14 @@
  //
  
  #include <vpu/frontend/frontend.hpp>
+#include <vpu/model/data_desc.hpp>
  
  #include <vpu/model/data_contents/ie_blob_content.hpp>
  
  #include <algorithm>
  #include <memory>
  #include <set>
+#include <vector>
  #include <string>
  
  namespace vpu {
@@ -25,41 +27,33 @@ private:
      }
  
      void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
-         auto input0 = inputEdge(0)->input();
-         auto input1 = inputEdge(1)->input();
-         auto output = outputEdge(0)->output();
-
-         auto in0Desc = input0->desc();
-         auto in1Desc = input1->desc();
-         auto outDesc = output->desc();
-
-         auto in0Order = DimsOrder::fromNumDims(in0Desc.numDims());
-         auto in1Order = DimsOrder::fromNumDims(in1Desc.numDims());
-         auto outOrder = DimsOrder::fromNumDims(outDesc.numDims());
-
-         orderInfo.setInput(inputEdge(0), in0Order);
-         orderInfo.setInput(inputEdge(1), in1Order);
-         orderInfo.setOutput(outputEdge(0), outOrder);
+         orderInfo.setInput(inputEdge(0), input(0)->desc().dimsOrder());
+         orderInfo.setInput(inputEdge(1), input(1)->desc().dimsOrder());
+         orderInfo.setOutput(outputEdge(0), output(0)->desc().dimsOrder());
      }
  
      void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
      }
  
      void finalizeDataLayoutImpl() override {
-        auto input0 = inputEdge(0)->input();
-        auto input1 = inputEdge(1)->input();
-
-        auto in0Desc = input0->desc();
-        auto in1Desc = input1->desc();
-
-        IE_ASSERT(input1->usage() == DataUsage::Const);
+        auto reductionAxes = input(1);
+        auto in0Desc = input(0)->desc();
+        auto in1Desc = reductionAxes->desc();
  
+        VPU_THROW_UNLESS(reductionAxes->usage() == DataUsage::Const,
+                        "Stage {} of type {} expects input with index {} ({}) to be {}, but it is {}",
+                        name(), type(), 1, reductionAxes->name(), DataUsage::Const, reductionAxes->usage());
          size_t ndims = in0Desc.numDims();
-        IE_ASSERT(in1Desc.numDims() == 1);
+        VPU_THROW_UNLESS(in1Desc.numDims() == 1,
+                        "Stage {} of type {} expects input with index {} ({}) to have dimensions number is {}, but it is {}",
+                        name(), type(), 1, reductionAxes->name(), 1, in1Desc.numDims());
          size_t indicesSize = in1Desc.totalDimSize();
-        IE_ASSERT(indicesSize <= ndims);
+        VPU_THROW_UNLESS(indicesSize <= ndims,
+                        "Stage {} of type {} expects input with index {} ({}) to have total size not greater than dimensions ",
+                        "number of input with index {} ({}), but it is {} > {}",
+                        name(), type(), 1, reductionAxes->name(), 0, input(0)->name(), indicesSize, ndims);
  
-        const auto oldIndices = input1->content()->get<int32_t>();
+        const auto oldIndices = reductionAxes->content()->get<int32_t>();
  
          auto newIndicesBlob = ie::make_shared_blob<int32_t>(InferenceEngine::TensorDesc(
              ie::Precision::I32,
@@ -69,28 +63,26 @@ private:
  
          auto newIndices = newIndicesBlob->buffer().as<int32_t*>();
  
-        const auto defDimsOrder = DimsOrder::fromNumDims(ndims);
-        const auto defPerm = defDimsOrder.toPermutation();
+        const auto defPerm = DimsOrder::fromNumDims(ndims).toPermutation();
+        const auto dimsOrder = in0Desc.dimsOrder();
          for (size_t i = 0; i < indicesSize; ++i) {
              auto irIndex = oldIndices[i];
              if (irIndex < 0) {
                  // handle negative indices
-                irIndex = ndims - irIndex;
+                irIndex = ndims - std::abs(irIndex);
              }
-            IE_ASSERT(irIndex < ndims);
-
-            const auto irRevIndex = ndims - 1 - irIndex;
+            VPU_THROW_UNLESS(irIndex < ndims,
+                            "Stage {} of type {} expects input with index {} ({}) include values less than ",
+                            "dimensions number of input with index {} ({}), but it is {} >= {}",
+                             name(), type(), 1, reductionAxes->name(), 0, input(0)->name(), irIndex, ndims);
  
-            const auto irDim = defPerm[irRevIndex];
-
-            const auto vpuDimInd = in0Desc.dimsOrder().dimInd(irDim);
-            newIndices[i] = vpuDimInd;
+            const auto reducedDim = defPerm[ndims - 1 - irIndex];
+            newIndices[i] = dimsOrder.dimInd(reducedDim);
          }
-
          std::sort(newIndices, newIndices + indicesSize);
  
          auto newList = model()->duplicateData(
-            input1,
+            reductionAxes,
              "",
              DataDesc(),
              ieBlobContent(newIndicesBlob, DataType::S32));
@@ -106,7 +98,10 @@ private:
      }
  
      void initialCheckImpl() const override {
-        IE_ASSERT(input(0)->desc().type() == output(0)->desc().type());
+        VPU_THROW_UNLESS(input(0)->desc().type() == output(0)->desc().type(),
+                         "Stage {} of type {} expects that data types of input with index {} ({}) ",
+                         "and output with index {} ({}) are the same, but it is {} and {}",
+                         name(), type(), 0, input(0)->name(), 0, output(0)->name(), input(0)->desc().type(), output(0)->desc().type());
          assertInputsOutputsTypes(this,
                                   {{DataType::FP16, DataType::S32}, {DataType::S32}},
                                   {{DataType::FP16, DataType::S32}});
@@ -133,10 +128,15 @@ private:
  
  void FrontEnd::parseReduce(const Model& model, const ie::CNNLayerPtr& _layer, const DataVector& inputs, const DataVector& outputs) const {
      auto layer = std::dynamic_pointer_cast<ie::ReduceLayer>(_layer);
-    IE_ASSERT(layer != nullptr);
-
-    IE_ASSERT(inputs.size() == 2);
-    IE_ASSERT(outputs.size() == 1);
+    VPU_THROW_UNLESS(layer != nullptr,
+                     "Layer {} of type {} is nullptr",
+                     layer->name, layer->type);
+    VPU_THROW_UNLESS(inputs.size() == 2,
+                     "Layer {} of type {} expects {} inputs, but provided {}",
+                     layer->name, layer->type, 2, inputs.size());
+    VPU_THROW_UNLESS(outputs.size() == 1,
+                     "Layer {} of type {} expects {} output, but provided {}",
+                     layer->name, layer->type, 1, outputs.size());
  
      auto stageType = StageType::None;
      if (layer->type == "ReduceAnd") {
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp

index 3314362..f8777d5 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/topk.cpp
@@ -157,12 +157,8 @@ void FrontEnd::parseTopK(const Model& model, const ie::CNNLayerPtr& _layer, cons
          realOutputs = {outputValues};
      }
  
-    const bool isArgMaxPossible = outputsMode != TopKOutputs::All && mode == TopKMode::Max
-               && ((sort == TopKSort::Value && outputsMode == TopKOutputs::ValueOnly)
-                || (sort == TopKSort::Index && outputsMode == TopKOutputs::IndexOnly));
-
      auto stage = model->addNewStage<TopKStage>(layer->name,
-                                               isArgMaxPossible ? StageType::ArgMax : StageType::TopK,
+                                               StageType::TopK,
                                                 layer, inputs, realOutputs);
  
      stage->attrs().set<Dim>("axis", axis);
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp

index 7e44237..146c93c 100644 (file)
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@@ -16,6 +16,7 @@
  #include <vpu/parsed_config.hpp>
  #include <vpu/utils/profiling.hpp>
  #include <vpu/utils/error.hpp>
+#include <transformations/common_optimizations/common_optimizations.hpp>
  
  #include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
  #include "generic_ie.hpp"
@@ -28,7 +29,6 @@ using namespace InferenceEngine::VPUConfigParams;
  using namespace vpu::MyriadPlugin;
  
  ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(
-        const ICore* /*core*/,
          const ICNNNetwork& network,
          const std::map<std::string, std::string>& config) {
      VPU_PROFILE(LoadExeNetworkImpl);
@@ -39,7 +39,8 @@ ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(
      auto clonedNetwork = cloneNetwork(network);
      if (auto function = clonedNetwork->getFunction()) {
          ngraph::op::GenericIE::DisableReshape noReshape(function);
-        vpu::DynamicToStaticShape().transform(*function);
+        ngraph::pass::CommonOptimizations().run_on_function(function);
+        vpu::DynamicToStaticShape().transform(function);
      }
  
      return std::make_shared<ExecutableNetwork>(*clonedNetwork, _devicePool, parsedConfigCopy);
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h

index cbbd12f..7567a5f 100644 (file)
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h
@@ -28,7 +28,6 @@ public:
      void SetConfig(const std::map<std::string, std::string>& config) override;
  
      ie::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
-            const ie::ICore* core,
              const ie::ICNNNetwork& network,
              const std::map<std::string, std::string>& config) override;
  
diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt

index da353a0..83ae257 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@@ -13,14 +13,16 @@ addIeTargetTest(
          EXCLUDED_SOURCE_DIRS
              ${CMAKE_CURRENT_SOURCE_DIR}/extension_lib
          LINK_LIBRARIES
+            gmock
              funcTestUtils
              ngraphFunctions
              inference_engine_transformations
          ADD_CPPLINT
-        LABELS
-            INFERENCE_ENGINE
          DEPENDENCIES
              extension_tests
+            mock_engine
+        LABELS
+            IE
  )
  
  include(CMakeParseArguments)
@@ -86,6 +88,7 @@ function(ie_headers_compilation_with_custom_flags)
          set(content "#include <${header_file}>\n${content}")
      endforeach()
      set(source_file "${CMAKE_CURRENT_BINARY_DIR}/modern_flags_${IE_TEST_TEST_SUFFIX}.cpp")
+    file(REMOVE ${source_file})
      file(GENERATE OUTPUT ${source_file} CONTENT ${content})
  
      set(target_name ieFuncTestsCompilation${IE_TEST_TEST_SUFFIX})
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/caslesseq_tests.cpp b/inference-engine/tests/functional/inference_engine/caseless_tests.cpp

similarity index 90%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/caslesseq_tests.cpp

rename to inference-engine/tests/functional/inference_engine/caseless_tests.cpp

index 720a611..611304c 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/caslesseq_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/caseless_tests.cpp
@@ -10,17 +10,7 @@
  using namespace std;
  using namespace InferenceEngine::details;
  
-class CaselessTests : public ::testing::Test {
- protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
- public:
-
-};
+using CaselessTests = ::testing::Test;
  
  TEST_F(CaselessTests, emptyAreEqual) {
      ASSERT_TRUE(InferenceEngine::details::equal("", ""));
@@ -49,7 +39,6 @@ TEST_F(CaselessTests, canFindCaslessInMap) {
  }
  
  TEST_F(CaselessTests, canFindCaslessInUnordered) {
-
      caseless_unordered_map <string, int> storage = {
          {"Abc", 1},
          {"bC", 2},
diff --git a/inference-engine/tests/functional/inference_engine/core_threading_tests.cpp b/inference-engine/tests/functional/inference_engine/core_threading_tests.cpp

new file mode 100644 (file)

index 0000000..9b786f9
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/core_threading_tests.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_core.hpp>
+#include <details/ie_exception.hpp>
+#include <ie_plugin_config.hpp>
+#include <ie_extension.h>
+
+#include <file_utils.h>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <functional_test_utils/test_model/test_model.hpp>
+#include <common_test_utils/file_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <chrono>
+#include <fstream>
+
+class CoreThreadingTests : public ::testing::Test {
+public:
+    void SetUp() override {
+    }
+
+    void runParallel(std::function<void(void)> func,
+                     const unsigned int iterations = 100,
+                     const unsigned int threadsNum = 8) {
+        std::vector<std::thread> threads(threadsNum);
+
+        for (auto & thread : threads) {
+            thread = std::thread([&](){
+                for (unsigned int i = 0; i < iterations; ++i) {
+                    func();
+                }
+            });
+        }
+
+        for (auto & thread : threads) {
+            if (thread.joinable())
+                thread.join();
+        }
+    }
+
+    void safeAddExtension(InferenceEngine::Core & ie) {
+        try {
+            auto extension = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(
+                FileUtils::makeSharedLibraryName<char>({},
+                    std::string("extension_tests") + IE_BUILD_POSTFIX));
+            ie.AddExtension(extension);
+        } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+            ASSERT_STR_CONTAINS(ex.what(), "name: experimental. Opset");
+        }
+    }
+};
+
+// tested function: SetConfig
+TEST_F(CoreThreadingTests, SetConfigPluginDoesNotExist) {
+    InferenceEngine::Core ie;
+    std::map<std::string, std::string> localConfig = {
+        { CONFIG_KEY(PERF_COUNT), InferenceEngine::PluginConfigParams::YES }
+    };
+
+    runParallel([&] () {
+        ie.SetConfig(localConfig);
+    }, 10000);
+}
+
+// tested function: RegisterPlugin
+TEST_F(CoreThreadingTests, RegisterPlugin) {
+    InferenceEngine::Core ie;
+    std::atomic<int> index{0};
+    runParallel([&] () {
+        const std::string deviceName = std::to_string(index++);
+        ie.RegisterPlugin(std::string("mock_engine") + IE_BUILD_POSTFIX, deviceName);
+        ie.GetVersions(deviceName);
+        ie.UnregisterPlugin(deviceName);
+    }, 4000);
+}
+
+// tested function: RegisterPlugins
+TEST_F(CoreThreadingTests, RegisterPlugins) {
+    InferenceEngine::Core ie;
+    std::atomic<unsigned int> index{0};
+
+    auto getPluginXml = [&] () -> std::tuple<std::string, std::string> {
+        std::string indexStr = std::to_string(index++);
+        std::string pluginsXML = InferenceEngine::getIELibraryPath() +
+            FileUtils::FileSeparator +
+            "test_plugins" + indexStr + ".xml";
+        std::ofstream file(pluginsXML);
+
+        file << "<ie><plugins><plugin location=\"";
+        file << FileUtils::FileTraits<char>::SharedLibraryPrefix();
+        file << "mock_engine";
+        file << IE_BUILD_POSTFIX;
+        file << FileUtils::DotSymbol<char>::value;
+        file << FileUtils::FileTraits<char>::SharedLibraryExt();
+        file << "\" name=\"";
+        file << indexStr;
+        file << "\"></plugin></plugins></ie>";
+        file.flush();
+        file.close();
+
+        return std::tie(pluginsXML, indexStr);
+    };
+
+    runParallel([&] () {
+        std::string fileName, deviceName;
+        std:tie(fileName, deviceName) = getPluginXml();
+        ie.RegisterPlugins(fileName);
+        ie.GetVersions(deviceName);
+        ASSERT_EQ(0, std::remove(fileName.c_str()));
+    }, 1000);
+}
+
+// tested function: GetAvailableDevices, UnregisterPlugin
+// TODO: some plugins initialization (e.g. GNA) failed during such stress-test scenario
+TEST_F(CoreThreadingTests, DISABLED_GetAvailableDevices) {
+    InferenceEngine::Core ie;
+    runParallel([&] () {
+        std::vector<std::string> devices = ie.GetAvailableDevices();
+
+        // unregister all the devices
+        for (auto && deviceName : devices) {
+            try {
+                ie.UnregisterPlugin(deviceName);
+            } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+                // if several threads unload plugin at once, the first thread does this
+                // while all others will throw an exception that plugin is not registered
+                ASSERT_STR_CONTAINS(ex.what(), "name is not registered in the");
+            }
+        }
+    }, 30);
+}
+
+// tested function: ReadNetwork, AddExtension
+TEST_F(CoreThreadingTests, ReadNetwork) {
+    InferenceEngine::Core ie;
+    auto model = FuncTestUtils::TestModel::convReluNormPoolFcModelFP32;
+    auto network = ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+
+    runParallel([&] () {
+        safeAddExtension(ie);
+        (void)ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+    }, 100, 12);
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/data_test.cpp b/inference-engine/tests/functional/inference_engine/data_test.cpp

similarity index 100%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/data_test.cpp

rename to inference-engine/tests/functional/inference_engine/data_test.cpp
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/debug_tests.cpp b/inference-engine/tests/functional/inference_engine/debug_tests.cpp

similarity index 77%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/debug_tests.cpp

rename to inference-engine/tests/functional/inference_engine/debug_tests.cpp

index afb9d32..0a36c68 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/debug_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/debug_tests.cpp
@@ -6,17 +6,7 @@
  #include "debug.h"
  #include <string>
  
-class DebugTests : public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-public:
-
-};
+using DebugTests = ::testing::Test;
  
  TEST_F(DebugTests, tolowerWorksWithEmptyString) {
      std::string str = "";
diff --git a/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp b/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp

index e894913..a72df36 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp
@@ -20,34 +20,8 @@ using namespace InferenceEngine;
  class ExtensionLibTests : public CommonTestUtils::TestsCommon {
  public:
      std::string getExtensionPath() {
-#ifdef _WIN32
-#ifdef __MINGW32__
-        const char pre[] = "lib";
-#else
-        const char pre[] = "";
-#endif
-#ifdef NDEBUG
-        const char ext[] = ".dll";
-#else
-        const char ext[] = "d.dll";
-#endif
-        const char FileSeparator[] = "\\";
-
-#else
-        const char FileSeparator[] = "/";
-#if defined __APPLE__
-#ifdef NDEBUG
-        const char ext[] = ".dylib";
-#else
-        const char ext[] = "d.dylib";
-#endif
-        const char pre[] = "lib";
-#else
-        const char pre[] = "lib";
-        const char ext[] = ".so";
-#endif
-#endif
-        return getIELibraryPath() + FileSeparator + pre + "extension_tests" + ext;
+        return FileUtils::makeSharedLibraryName<char>({},
+            std::string("extension_tests") + IE_BUILD_POSTFIX);
      }
  };
  
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp

index f8d6143..9567cab 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp
@@ -4,6 +4,111 @@
  
  #include <string>
  #include "ngraph_reader_tests.hpp"
+#include <ngraph/opsets/opset.hpp>
+#include <ngraph/ngraph.hpp>
+#include <ie_iextension.h>
+
+class FakeAbs : public ngraph::op::Op {
+public:
+    static constexpr ngraph::NodeTypeInfo type_info{"Abs", 100500};
+    const ngraph::NodeTypeInfo& get_type_info() const override { return type_info;  }
+
+    FakeAbs() = default;
+    FakeAbs(const ngraph::Output<ngraph::Node>& arg): ngraph::op::Op({arg}) {
+        constructor_validate_and_infer_types();
+    }
+    void validate_and_infer_types() override {
+        set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+    }
+    std::shared_ptr<ngraph::Node> copy_with_new_args(const ngraph::NodeVector& new_args) const override {
+        return std::make_shared<FakeAbs>(new_args.at(0));
+    }
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
+        return true;
+    }
+};
+constexpr ngraph::NodeTypeInfo FakeAbs::type_info;
+
+class AbsFakeExtension: public InferenceEngine::IExtension {
+public:
+    void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override {}
+    void Release() noexcept override { delete this; }
+    void Unload() noexcept override {}
+
+    std::map<std::string, ngraph::OpSet> getOpSets() override{
+        std::map<std::string, ngraph::OpSet> opsets;
+        ngraph::OpSet opset;
+        opset.insert<FakeAbs>();
+        opsets["experimental"] = opset;
+        return opsets;
+    }
+};
+
+TEST_F(NGraphReaderTests, ReadAbsFromCustomOpsetNetwork) {
+    std::string model = R"V0G0N(
+<net name="Network" version="10">
+    <layers>
+        <layer name="in1" type="Parameter" id="0" version="opset1">
+            <data element_type="f32" shape="1,3,22,22"/>
+            <output>
+                <port id="0" precision="FP32">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>22</dim>
+                    <dim>22</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="Abs" id="1" type="Abs" version="experimental">
+            <input>
+                <port id="1" precision="FP32">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>22</dim>
+                    <dim>22</dim>
+                </port>
+            </input>
+            <output>
+                <port id="2" precision="FP32">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>22</dim>
+                    <dim>22</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" type="Result" id="2" version="opset1">
+            <input>
+                <port id="0" precision="FP32">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>22</dim>
+                    <dim>22</dim>
+                </port>
+            </input>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
+        <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+
+    Core ie;
+    ie.AddExtension(std::make_shared<AbsFakeExtension>());
+    Blob::Ptr weights;
+
+    auto network = ie.ReadNetwork(model, weights);
+    auto nGraph = network.getFunction();
+    bool genericNodeExists = false;
+    const std::string type = "Abs";
+    for (auto op : nGraph->get_ops()) {
+        if (type == op->get_type_info().name && 100500 == op->get_type_info().version)
+            genericNodeExists = true;
+    }
+    ASSERT_TRUE(genericNodeExists);
+}
  
  TEST_F(NGraphReaderTests, ReadAbsNetwork) {
      std::string model = R"V0G0N(
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp

index 253501e..d17b602 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/linear_ops_tests.cpp
@@ -1757,7 +1757,7 @@ TEST_F(NGraphReaderTests, RemoveAdd2) {
              </output>
          </layer>
          <layer id="3" name="add" precision="FP32" type="ReLU">
-            <data originalLayersNames="relu" />
+            <data originalLayersNames="relu"/>
              <input>
                  <port id="0">
                      <dim>1</dim>
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/pre_allocator_test.cpp b/inference-engine/tests/functional/inference_engine/pre_allocator_test.cpp

similarity index 84%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/pre_allocator_test.cpp

rename to inference-engine/tests/functional/inference_engine/pre_allocator_test.cpp

index 8f05fb8..464f9fa 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/pre_allocator_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/pre_allocator_test.cpp
@@ -25,14 +25,13 @@ class PreallocatorTests: public ::testing::Test {
          allocator = details::make_pre_allocator(&*mybuf.begin(), mybuf.size());
      }
      std::shared_ptr<IAllocator> allocator;
-
  };
  
  TEST_F(PreallocatorTests, canAccessPreAllocatedMemory) {
-    void * handle  = allocator->alloc(3);
-    float * ptr = (float *)allocator->lock(handle);
+    void * handle = allocator->alloc(3);
+    float * ptr = reinterpret_cast<float*>(allocator->lock(handle));
  
-    mybuf = {1.1f,2.2f,3.3f};
+    mybuf = { 1.1f, 2.2f, 3.3f };
  
      ASSERT_EQ(ptr, &*mybuf.begin());
      ASSERT_EQ(ptr[0], 1.1f);
@@ -48,5 +47,5 @@ TEST_F(PreallocatorTests, canNotAllocateMoreMemory) {
  
  TEST_F(PreallocatorTests, canNotLockWrongHandle) {
      void * handle  = allocator->alloc(3);
-    EXPECT_EQ(nullptr, allocator->lock(1 + (int*)handle));
+    EXPECT_EQ(nullptr, allocator->lock(1 + reinterpret_cast<int*>(handle)));
  }
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/preprocess_test.cpp b/inference-engine/tests/functional/inference_engine/preprocess_test.cpp

similarity index 91%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/preprocess_test.cpp

rename to inference-engine/tests/functional/inference_engine/preprocess_test.cpp

index 0fcacdd..06c65c1 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/preprocess_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/preprocess_test.cpp
@@ -7,17 +7,7 @@
  
  using namespace std;
  
-class PreProcessTests : public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-public:
-
-};
+using PreProcessTests = ::testing::Test;
  
  TEST_F(PreProcessTests, throwsOnSettingNullMeanImage) {
      InferenceEngine::PreProcessInfo info;
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/response_buffer_test.cpp b/inference-engine/tests/functional/inference_engine/response_buffer_test.cpp

similarity index 94%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/response_buffer_test.cpp

rename to inference-engine/tests/functional/inference_engine/response_buffer_test.cpp

index 3bb427d..6a3299c 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/response_buffer_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/response_buffer_test.cpp
@@ -8,18 +8,7 @@
  using namespace std;
  using namespace InferenceEngine;
  
-class ResponseBufferTests: public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-public:
-
-};
-
+using ResponseBufferTests = ::testing::Test;
  
  TEST_F(ResponseBufferTests, canCreateResponseMessage) {
      ResponseDesc desc;
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/shared_object_loader_test.cpp b/inference-engine/tests/functional/inference_engine/shared_object_loader_test.cpp

similarity index 81%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/shared_object_loader_test.cpp

rename to inference-engine/tests/functional/inference_engine/shared_object_loader_test.cpp

index 7c5de17..5818af7 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/shared_object_loader_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/shared_object_loader_test.cpp
@@ -2,10 +2,11 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
+#include <gtest/gtest.h>
+
  #include <ie_plugin_ptr.hpp>
-#include "tests_common.hpp"
+#include <file_utils.h>
  #include "details/ie_so_loader.h"
-#include "inference_engine.hpp"
  
  using namespace std;
  using namespace InferenceEngine;
@@ -13,16 +14,21 @@ using namespace InferenceEngine::details;
  
  IE_SUPPRESS_DEPRECATED_START
  
-class SharedObjectLoaderTests: public TestsCommon {
+class SharedObjectLoaderTests: public ::testing::Test {
  protected:
+    std::string get_mock_engine_name() {
+        return FileUtils::makeSharedLibraryName<char>(getIELibraryPath(),
+            std::string("mock_engine") + IE_BUILD_POSTFIX);
+    }
+
      void loadDll(const string &libraryName) {
          sharedObjectLoader.reset(new details::SharedObjectLoader(libraryName.c_str()));
      }
      unique_ptr<SharedObjectLoader> sharedObjectLoader;
-    
+
      template <class T>
      std::function<T> make_std_function(const std::string& functionName) {
-        std::function <T> ptr (reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
+        std::function<T> ptr(reinterpret_cast<T*>(sharedObjectLoader->get_symbol(functionName.c_str())));
          return ptr;
      }
  };
@@ -41,14 +47,14 @@ TEST_F(SharedObjectLoaderTests, loaderThrowsIfNoPlugin) {
  
  TEST_F(SharedObjectLoaderTests, canFindExistedMethod) {
      loadDll(get_mock_engine_name());
-   
+
      auto factory = make_std_function<StatusCode(IInferencePlugin*&, ResponseDesc*)>("CreatePluginEngine");
      EXPECT_NE(nullptr, factory);
  }
  
  TEST_F(SharedObjectLoaderTests, throwIfMethodNofFoundInLibrary) {
      loadDll(get_mock_engine_name());
-    
+
      EXPECT_THROW(make_std_function<IInferencePlugin*()>("wrong_function"), InferenceEngine::details::InferenceEngineException);
  }
  
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/so_pointer_tests.cpp b/inference-engine/tests/functional/inference_engine/so_pointer_tests.cpp

similarity index 75%

rename from inference-engine/tests_deprecated/unit/inference_engine_tests/so_pointer_tests.cpp

rename to inference-engine/tests/functional/inference_engine/so_pointer_tests.cpp

index 9cb8935..4ad9517 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/so_pointer_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/so_pointer_tests.cpp
@@ -6,11 +6,15 @@
  #include <gmock/gmock.h>
  #include <gmock/gmock-spec-builders.h>
  
+#include <file_utils.h>
+#include <ie_plugin_ptr.hpp>
+
  #include <memory>
-#include <tests_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
  #include <details/ie_so_pointer.hpp>
  #include <details/ie_irelease.hpp>
  
+using namespace InferenceEngine;
  using namespace InferenceEngine::details;
  using namespace ::testing;
  using ::testing::InSequence;
@@ -33,7 +37,7 @@ public:
      }
  };
  
-template<class T=PointedObjHelper, class L = SharedObjectLoaderHelper>
+template <class T = PointedObjHelper, class L = SharedObjectLoaderHelper>
  class SoPointerHelper : public SOPointer<T, L> {
  public:
      SoPointerHelper(std::shared_ptr<L>&& loader, std::shared_ptr<T>&& object)
@@ -100,3 +104,19 @@ TEST_F(SoPointerTests, UnknownPluginExceptionStr) {
          ASSERT_STR_DOES_NOT_CONTAIN(e.what(), "from CWD:");
      }
  }
+
+using SymbolLoaderTests = ::testing::Test;
+
+TEST_F(SymbolLoaderTests, throwCreateNullPtr) {
+    ASSERT_THROW(SymbolLoader<SharedObjectLoader>(nullptr), InferenceEngineException);
+}
+
+TEST_F(SymbolLoaderTests, instantiateSymbol) {
+    std::string name = FileUtils::makeSharedLibraryName<char>(getIELibraryPath(),
+        std::string("mock_engine") + IE_BUILD_POSTFIX);
+    std::shared_ptr<SharedObjectLoader> sharedLoader(new SharedObjectLoader(name.c_str()));
+    SymbolLoader<SharedObjectLoader> loader(sharedLoader);
+    IE_SUPPRESS_DEPRECATED_START
+    ASSERT_NE(nullptr, loader.instantiateSymbol<IInferencePlugin>(SOCreatorTrait<IInferencePlugin>::name));
+    IE_SUPPRESS_DEPRECATED_END
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_broadcast3_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_broadcast3_test.cpp

new file mode 100644 (file)

index 0000000..f81076b
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_broadcast3_test.cpp
@@ -0,0 +1,157 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_broadcast3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+// Broadcast-3 is converted directly to Broadcast-1 for modes NUMPY, NONE and PDPD
+TEST(TransformationTests, ConvertBroadcast3WithNumpyModeToBroadcast1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+        auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::NUMPY);
+        broadcast->set_friendly_name("broadcast");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertBroadcast3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+        auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, ngraph::op::AutoBroadcastType::NUMPY);
+        broadcast->set_friendly_name("broadcast");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertBroadcast3WithPDPDModeToBroadcast1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+        auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::PDPD);
+        broadcast->set_friendly_name("broadcast");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertBroadcast3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 1, 2});
+        auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, ngraph::op::AutoBroadcastType::PDPD);
+        broadcast->set_friendly_name("broadcast");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertBroadcast3WithExplicitModeToBroadcast1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 5, 2});
+        auto brodcast_axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, 2});
+        auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, brodcast_axis, ngraph::op::BroadcastType::EXPLICIT);
+        broadcast->set_friendly_name("broadcast");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertBroadcast3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 5, 2});
+        auto brodcast_axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{0, 1, 2});
+        auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 2});
+        auto broadcast = std::make_shared<ngraph::opset1::Broadcast>(input1, target_shape, brodcast_axis, ngraph::op::AutoBroadcastType::EXPLICIT);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto broadcast_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    auto crop_node = broadcast_node->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(broadcast_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
+
+// Broadcast-3 with mode BIDIRECTIONAL is converted to Multiply with constant with 1s of the corresponding type
+TEST(TransformationTests, ConvertBroadcast3WithBidirectionalModeToBroadcast1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1, 2});
+        auto target_shape = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 1});
+        auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(input1, target_shape, ngraph::op::BroadcastType::BIDIRECTIONAL);
+        broadcast->set_friendly_name("broadcast");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{broadcast}, ngraph::ParameterVector{input1});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertBroadcast3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 1, 2});
+        auto target_shape = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<int64_t>{3, 5, 1});
+        auto constant_one = std::make_shared<ngraph::opset1::Constant>(input->get_output_element_type(0), ngraph::Shape({1}), std::vector<int>{1});
+        auto broadcast_ones = std::make_shared<ngraph::opset1::Broadcast>(constant_one, target_shape, ngraph::op::AutoBroadcastType::NUMPY);
+        auto multiply = std::make_shared<ngraph::opset1::Multiply>(input, broadcast_ones);
+        multiply->set_friendly_name("broadcast");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{multiply}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto result_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    auto crop_node = result_node->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(result_node->get_friendly_name() == "broadcast") << "Transformation ConvertBroadcast3 should keep output names.\n";
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_nms3_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_nms3_test.cpp

new file mode 100644 (file)

index 0000000..1fc7528
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_nms3_test.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_nms3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertNMS3I32Output) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+        auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
+        nms->set_friendly_name("nms");
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        pass::InitNodeInfo().run_on_function(f);
+        pass::ConvertNMS3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+        auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset2::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset1::NonMaxSuppression::BoxEncodingType::CORNER, true);
+        nms->set_friendly_name("nms");
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto nms_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(nms_node->get_friendly_name() == "nms") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertNMS3I64Output) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+        auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i64);
+        nms->set_friendly_name("nms");
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        pass::InitNodeInfo().run_on_function(f);
+        pass::ConvertNMS3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto boxes = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset3::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset3::Constant::create(element::i64, Shape{}, {10});
+        auto iou_threshold = opset3::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset3::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset2::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset2::NonMaxSuppression::BoxEncodingType::CORNER, true);
+        auto convert = std::make_shared<ngraph::opset2::Convert>(nms, element::i64);
+        convert->set_friendly_name("nms");
+
+        f_ref = std::make_shared<Function>(NodeVector{convert}, ParameterVector{boxes, scores});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto nms_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(nms_node->get_friendly_name() == "nms") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_scatter_elements_to_scatter_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_scatter_elements_to_scatter_test.cpp

new file mode 100644 (file)

index 0000000..9b28daf
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_scatter_elements_to_scatter_test.cpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/convert_scatter_elements_to_scatter.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & data_shape,
+                                                       const ngraph::PartialShape & indexes_shape,
+                                                       const ngraph::PartialShape & updates_shape,
+                                                       const ngraph::PartialShape & broadcast_shape,
+                                                       const int64_t & axis) {
+    auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, data_shape);
+    auto indexes = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, indexes_shape);
+    auto updates = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, updates_shape);
+    auto axis_const = ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {axis});
+
+    uint64_t broadcast_len = broadcast_shape.rank().get_length();
+    auto broadcast_shape_param = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{broadcast_len});
+    auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(indexes, broadcast_shape_param);
+
+    auto scatter = std::make_shared<ngraph::opset3::ScatterElementsUpdate>(data, broadcast, updates, axis_const);
+
+    return std::make_shared<ngraph::Function>(ngraph::NodeVector{scatter}, ngraph::ParameterVector{data, indexes, updates, broadcast_shape_param});
+}
+
+std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & data_shape,
+                                                         const ngraph::PartialShape & indexes_shape,
+                                                         const ngraph::PartialShape & updates_shape,
+                                                         const int64_t & axis,
+                                                         const ngraph::Shape & reshape_shape = {},
+                                                         const std::vector<int64_t> & squeeze_indices = {}) {
+    auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, data_shape);
+    auto indexes = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, indexes_shape);
+    auto updates = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, updates_shape);
+    auto axis_const = ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {axis});
+
+    ngraph::Output<ngraph::Node> index_out = indexes->output(0);
+    if (!reshape_shape.empty()) {
+        index_out = std::make_shared<ngraph::opset3::Reshape>(indexes,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {reshape_shape.size()}, reshape_shape), false);
+    }
+
+    if (!squeeze_indices.empty()) {
+        index_out = std::make_shared<ngraph::opset3::Squeeze>(indexes,
+                ngraph::opset3::Constant::create(ngraph::element::i64, {squeeze_indices.size()}, squeeze_indices));
+    }
+
+    auto scatter = std::make_shared<ngraph::opset3::ScatterUpdate>(data, index_out, updates, axis_const);
+
+    return std::make_shared<ngraph::Function>(ngraph::NodeVector{scatter}, ngraph::ParameterVector{data, indexes, updates});
+}
+
+void test(std::shared_ptr<ngraph::Function> f, std::shared_ptr<ngraph::Function> f_ref) {
+    ngraph::pass::InitNodeInfo().run_on_function(f);
+    ngraph::pass::ConvertScatterElementsToScatter().run_on_function(f);
+    ASSERT_NO_THROW(check_rt_info(f));
+    ngraph::pass::ConstantFolding().run_on_function(f);
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+void test(std::shared_ptr<ngraph::Function> f) {
+    test(f, f);
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0) {
+    test(get_initial_function({1000, 256, 7, 7}, {1000, 1, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 0),
+         get_reference_function({1000, 256, 7, 7}, {1000, 1, 1, 1}, {1000, 256, 7, 7}, 0, {1000}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1) {
+    test(get_initial_function({1000, 256, 7, 7}, {256, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 1),
+         get_reference_function({1000, 256, 7, 7}, {256, 1, 1}, {1000, 256, 7, 7}, 1, {256}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNoReshape) {
+    test(get_initial_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, {1000, 1, 7, 7}, 1),
+         get_reference_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, 1));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNoReshapeNegAxis) {
+    test(get_initial_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, {1000, 1, 7, 7}, -3),
+         get_reference_function({1000, 256, 7, 7}, {1}, {1000, 1, 7, 7}, -3));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestNegative) {
+    test(get_initial_function({1000, 256, 7, 7}, {1000, 256, 1, 1}, {1000, 256, 7, 7}, {1000, 256, 7, 7}, 0));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Dyn) {
+    test(get_initial_function({DYN, 256, 7, 7}, {DYN, 1, 1, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0),
+         get_reference_function({DYN, 256, 7, 7}, {DYN, 1, 1, 1}, {DYN, 256, 7, 7}, 0, {}, {1, 2, 3}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1Dyn) {
+    test(get_initial_function({1000, DYN, 7, 7}, {DYN, 1, 1}, {1000, DYN, 7, 7}, {1000, DYN, 7, 7}, 1),
+         get_reference_function({1000, DYN, 7, 7}, {DYN, 1, 1}, {1000, DYN, 7, 7}, 1, {}, {1, 2}));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis1NoSqueezeDyn) {
+    test(get_initial_function({1000, DYN, 7, 7}, {DYN}, {1000, 256, 7, 7}, {1000, DYN, 7, 7}, 1),
+         get_reference_function({1000, DYN, 7, 7}, {DYN}, {1000, 256, 7, 7}, 1));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Neg1Dyn) {
+    test(get_initial_function({DYN, 256, 7, 7}, {DYN, DYN, 1, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0));
+}
+
+TEST(TransformationTests, ConvertScatterElementsToScatterTestAxis0Neg2Dyn) {
+    test(get_initial_function({DYN, 256, 7, 7}, {DYN, 1, 2, 1}, {DYN, 256, 7, 7}, {DYN, 256, 7, 7}, 0));
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_shapeof3.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_shapeof3.cpp

new file mode 100644 (file)

index 0000000..5019643
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_shapeof3.cpp
@@ -0,0 +1,79 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_shapeof3.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+TEST(TransformationTests, ConvertShapeOf3WithI64) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+        auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input, ngraph::element::i64);
+        shapeof->set_friendly_name("shapeof");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertShapeOf3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+        auto shapeof = std::make_shared<ngraph::opset1::ShapeOf>(input);
+        shapeof->set_friendly_name("shapeof");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shapeof") << "Transformation ConvertShapeOf3 should keep output names.\n";
+}
+
+TEST(TransformationTests, ConvertShapeOf3WithI32) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+        auto shapeof = std::make_shared<ngraph::opset3::ShapeOf>(input, ngraph::element::i32);
+        shapeof->set_friendly_name("shapeof");
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{shapeof}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertShapeOf3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 2, 3});
+        auto shapeof = std::make_shared<ngraph::opset1::ShapeOf>(input);
+        auto convert = std::make_shared<ngraph::opset1::Convert>(shapeof, ngraph::element::i32);
+        convert->set_friendly_name("shapeof");
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{convert}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(output_node->get_friendly_name() == "shapeof") << "Transformation ConvertShapeOf3 should keep output names.\n";
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_topk3_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_topk3_test.cpp

new file mode 100644 (file)

index 0000000..e5ae2e1
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_topk3_test.cpp
@@ -0,0 +1,161 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <transformations/convert_opset3_to_opset2/convert_topk3.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+// check that the first output from the TopK-3 with I32 output indices is equal to the TopK-1 first output
+TEST(TransformationTests, ConvertTopK3I32Output0) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertTopK3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the second output from the TopK-3 with I32 output indices is equal to the TopK-1 second output
+TEST(TransformationTests, ConvertTopK3I32Output1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertTopK3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the first output from the TopK-3 with I64 output indices is equal to the TopK-1 first output
+TEST(TransformationTests, ConvertTopK3I64Output0) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i64);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertTopK3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f_ref = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(0)}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+    ASSERT_TRUE(topk_node->get_friendly_name() == "topk") << "Transformation ConvertTopK3 should keep output names.\n";
+}
+
+// check that the second output from the TopK-3 with I64 output indices is equal to the TopK-1 second output converted to I64
+TEST(TransformationTests, ConvertTopK3I64Output1) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset3::TopK>(input, k, 1, "min", "value", ngraph::element::i64);
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f = std::make_shared<ngraph::Function>(ngraph::OutputVector{topk->output(1)}, ngraph::ParameterVector{input});
+
+        ngraph::pass::InitNodeInfo().run_on_function(f);
+        ngraph::pass::ConvertTopK3().run_on_function(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto input = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, ngraph::Shape{15, 20, 3});
+        auto k = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{}, {10});
+        auto topk = std::make_shared<ngraph::opset2::TopK>(input, k, 1, "min", "value", ngraph::element::i32);
+        auto convert = std::make_shared<ngraph::opset2::Convert>(topk->output(1), topk->get_index_element_type());
+        topk->set_friendly_name("topk");
+
+        // due to the 'compare_functions' limitation we will check only one output
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{convert}, ngraph::ParameterVector{input});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+
+    auto result_node_of_converted_f = f->get_output_op(0);
+    auto topk_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr();
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp

index 6d2c9de..42194e7 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp
@@ -43,9 +43,9 @@ std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Fun
          }
  
          for (int i = 0; i < node1->inputs().size(); ++i) {
-            if (node1->input(i).get_shape() != node2->input(i).get_shape()) {
+            if (!node1->input(i).get_partial_shape().compatible(node2->input(i).get_partial_shape())) {
                  std::ostringstream out("Different shape detected");
-                out << node1->input(i).get_shape() << " and " << node2->input(i).get_shape();
+                out << node1->input(i).get_partial_shape() << " and " << node2->input(i).get_partial_shape();
                  return {false, out.str()};
              }
  
diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp

index 5764088..01a9ee2 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp
@@ -7,9 +7,12 @@
  #include <memory>
  
  #include <ngraph/function.hpp>
+#include <ngraph/dimension.hpp>
  
  #include "common_test_utils/test_common.hpp"
  
+#define DYN ngraph::Dimension::dynamic()
+
  using TransformationTests = CommonTestUtils::TestsCommon;
  
  std::pair<bool, std::string> compare_functions(const std::shared_ptr<ngraph::Function> & f1, const std::shared_ptr<ngraph::Function> & f2);
diff --git a/inference-engine/tests/functional/inference_engine/transformations/optimize_strided_slice_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/optimize_strided_slice_test.cpp

new file mode 100644 (file)

index 0000000..0d74467
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/optimize_strided_slice_test.cpp
@@ -0,0 +1,274 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph_ops/fully_connected.hpp>
+#include <transformations/convert_opset1_to_legacy/fc_bias_fusion.hpp>
+#include <transformations/optimize_strided_slice.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion_Negative1) {
+    std::shared_ptr<ngraph::Function> f(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+        std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask = {1, 1, 1, 1};   // ignoring end -- slicing to the end
+
+        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+
+    auto res = compare_functions(f, f);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion_Negative2) {
+    std::shared_ptr<ngraph::Function> f(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(4));
+        auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+        std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask = {1, 1, 1, 1};   // ignoring end -- slicing to the end
+
+        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(relu, begin, end, stride, begin_mask, end_mask);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+
+    auto res = compare_functions(f, f);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_UselessDeletion) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+        auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+        std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask = {1, 1, 1, 1};   // ignoring end -- slicing to the end
+
+        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(relu, begin, end, stride, begin_mask, end_mask);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+        auto relu = std::make_shared<ngraph::opset1::Relu>(data);
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{relu}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Usefull_Test) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+        std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask = {0, 0, 0, 0};
+
+        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+    {
+        auto data = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+        auto begin = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+
+        std::vector<int64_t> begin_mask = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask = {0, 0, 0, 0};
+
+        auto ss = std::make_shared<ngraph::opset1::StridedSlice>(data, begin, end, stride, begin_mask, end_mask);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ss}, ngraph::ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Shared_Test) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+        auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+        auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+        auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+        auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin2, end2, stride2, begin_mask2, end_mask2);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+        auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+        auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss1}, 0);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+    }
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_NotShared_Test) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 6, 5, 5});
+
+        auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+        auto split = std::make_shared<ngraph::opset1::Split>(source, axis, 2);
+
+        auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+        auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(0), begin1, end1, stride1, begin_mask1, end_mask1);
+
+        auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+        auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(1), begin2, end2, stride2, begin_mask2, end_mask2);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 6, 5, 5});
+
+        auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+        auto split = std::make_shared<ngraph::opset1::Split>(source, axis, 2);
+
+        auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+        auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(0), begin1, end1, stride1, begin_mask1, end_mask1);
+
+        auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {-1, -1, -1, -1});
+        auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+        auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(split->output(1), begin2, end2, stride2, begin_mask2, end_mask2);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 0);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, OptimizeSS_Groupped_Test) {
+    std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+        auto begin1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 0, 0, 0});
+        auto end1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {5, 3, 5, 5});
+        auto stride1 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask1 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask1 = {0, 0, 0, 0};
+        auto ss1 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin1, end1, stride1, begin_mask1, end_mask1);
+
+        auto begin2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 0, 0});
+        auto end2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {5, 5, 5, 5});
+        auto stride2 = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {1});
+        std::vector<int64_t> begin_mask2 = {0, 0, 0, 0};
+        std::vector<int64_t> end_mask2 = {0, 0, 0, 0};
+        auto ss2 = std::make_shared<ngraph::opset1::StridedSlice>(source, begin2, end2, stride2, begin_mask2, end_mask2);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::NodeVector{ss1, ss2}, 1);
+
+        f = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+        ngraph::pass::StridedSliceOptimization().run_on_function(f);
+        ngraph::pass::ConstantFolding().run_on_function(f);
+    }
+    {
+        auto source = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 5, 5, 5});
+
+        auto axis = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+        auto split_sizes = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {3, 2});
+        auto variadic_split = std::make_shared<ngraph::opset1::VariadicSplit>(source, axis, split_sizes);
+
+        auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{variadic_split->output(0), variadic_split->output(1)}, 1);
+
+        f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{concat}, ngraph::ParameterVector{source});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp

new file mode 100644 (file)

index 0000000..7028797
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
@@ -0,0 +1,137 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <transformations/utils/utils.hpp>
+#include <transformations/init_node_info.hpp>
+#include <ngraph/pass/algebraic_simplification.hpp>
+#include <ngraph/pass/visualize_tree.hpp>
+
+#include "ngraph_test_utils.hpp"
+
+using namespace testing;
+
+using InputShape = ngraph::PartialShape;
+using TransposeOrder = std::vector<int64_t>;
+
+struct ReferenceParams {
+    bool no_changes = false;
+    bool is_empty = false;
+    std::vector<int64_t> reshape_value;
+
+    ReferenceParams() = default;
+
+    explicit ReferenceParams(bool no_changes, bool is_empty) : no_changes(no_changes), is_empty(is_empty) {}
+
+    explicit ReferenceParams(const std::vector<int64_t> & reshape_value): reshape_value(reshape_value) {}
+};
+
+class TransposeToReshapeTests: public CommonTestUtils::TestsCommon,
+                               public testing::WithParamInterface<std::tuple<InputShape, TransposeOrder, ReferenceParams> > {
+public:
+    std::shared_ptr<ngraph::Function> f, f_ref;
+
+    void SetUp() override {
+        const auto& input_shape = std::get<0>(GetParam());
+        const auto& transpose_order = std::get<1>(GetParam());
+        const auto& reference_params = std::get<2>(GetParam());
+
+        f = get_initial_function(input_shape, transpose_order);
+        f_ref = get_reference_function(input_shape, transpose_order, reference_params);
+    }
+
+private:
+    std::shared_ptr<ngraph::Function> get_initial_function(const ngraph::PartialShape & input_shape,
+                                                           const std::vector<int64_t> & transpose_order) {
+        auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, input_shape);
+        auto order_const = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order);
+        auto transpose = std::make_shared<ngraph::opset3::Transpose>(data, order_const);
+
+        // WA to test cases with transpose elimination
+        auto relu = std::make_shared<ngraph::opset3::Relu>(transpose);
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{relu}, ngraph::ParameterVector{data});
+    }
+
+    std::shared_ptr<ngraph::Function> get_reference_function(const ngraph::PartialShape & input_shape,
+                                                             const std::vector<int64_t> & transpose_order,
+                                                             const ReferenceParams & params) {
+        if (params.no_changes) {
+            return get_initial_function(input_shape, transpose_order);
+        }
+
+        auto data = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::f32, input_shape);
+
+        ngraph::Output<ngraph::Node> reshape_dims, last(data);
+        if (!params.reshape_value.empty()) {
+            reshape_dims = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{params.reshape_value.size()}, params.reshape_value);
+        } else {
+            auto shape_of = std::make_shared<ngraph::opset3::ShapeOf>(data);
+            reshape_dims = std::make_shared<ngraph::opset3::Gather>(shape_of,
+                    ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{transpose_order.size()}, transpose_order),
+                    ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}));
+        }
+
+        if (!params.is_empty) {
+            last = std::make_shared<ngraph::opset3::Reshape>(last, reshape_dims, true);
+        }
+
+        last = std::make_shared<ngraph::opset3::Relu>(last);
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{last.get_node_shared_ptr()}, ngraph::ParameterVector{data});
+    }
+};
+
+TEST_P(TransposeToReshapeTests, CompareFunctions) {
+    ngraph::pass::InitNodeInfo().run_on_function(f);
+    ngraph::pass::AlgebraicSimplification().run_on_function(f);
+    ASSERT_NO_THROW(check_rt_info(f));
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+#define SAME_FUNCTION    ReferenceParams(true, false)
+#define EMPTY_FUNCTION   ReferenceParams(false, true)
+#define SHAPE_OF_GATHER  ReferenceParams()
+
+INSTANTIATE_TEST_CASE_P(KeepTranspose, TransposeToReshapeTests,
+        testing::Values(std::make_tuple(InputShape{1, 3, 64, 64},  TransposeOrder{0, 1, 3, 2}, SAME_FUNCTION),
+                        std::make_tuple(InputShape{1, 3, 1, 64},   TransposeOrder{2, 0, 3, 1}, SAME_FUNCTION),
+                        std::make_tuple(InputShape{1, 3, 1, 3},    TransposeOrder{3, 0, 2, 1}, SAME_FUNCTION),
+                        std::make_tuple(InputShape{DYN, 2, 64, 1}, TransposeOrder{1, 0, 3, 2}, SAME_FUNCTION),
+                        std::make_tuple(InputShape{DYN, 3},        TransposeOrder{1, 0},       SAME_FUNCTION),
+                        std::make_tuple(InputShape{DYN, DYN, 1},   TransposeOrder{2, 1, 0},    SAME_FUNCTION),
+                        std::make_tuple(InputShape{DYN, DYN},      TransposeOrder{1, 0},       SAME_FUNCTION)));
+
+INSTANTIATE_TEST_CASE_P(EliminateTranspose, TransposeToReshapeTests,
+        testing::Values(std::make_tuple(InputShape{1, 3, 64, 64}, TransposeOrder{0, 1, 2, 3}, EMPTY_FUNCTION),
+                        std::make_tuple(InputShape{1, 1, 1},      TransposeOrder{2, 0, 1},    EMPTY_FUNCTION),
+                        std::make_tuple(InputShape{DYN, DYN},     TransposeOrder{0, 1},       EMPTY_FUNCTION)));
+
+INSTANTIATE_TEST_CASE_P(ReshapeWithConstant, TransposeToReshapeTests,
+        testing::Values(std::make_tuple(InputShape{1, 3, 64, 1},   TransposeOrder{0, 1, 3, 2}, ReferenceParams({1, 3, 1, 64})),
+                        std::make_tuple(InputShape{1, 3, 1, 64},   TransposeOrder{1, 0, 3, 2}, ReferenceParams({3, 1, 64, 1})),
+                        std::make_tuple(InputShape{DYN, DYN, 1},   TransposeOrder{0, 2, 1},    ReferenceParams({0, 1, -1})),
+                        std::make_tuple(InputShape{1, 1, DYN},     TransposeOrder{2, 1, 0},    ReferenceParams({-1, 1, 1})),
+                        std::make_tuple(InputShape{DYN, 1, 64, 1}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({1, -1, 1, 64}))));
+
+INSTANTIATE_TEST_CASE_P(ReshapeWithGather, TransposeToReshapeTests,
+        testing::Values(std::make_tuple(InputShape{DYN, 1, DYN, 1},   TransposeOrder{1, 0, 3, 2}, SHAPE_OF_GATHER),
+                        std::make_tuple(InputShape{1, DYN, DYN, DYN}, TransposeOrder{1, 2, 3, 0}, SHAPE_OF_GATHER)));
+
+#undef SAME_FUNCTION
+#undef EMPTY_FUNCTION
+#undef SHAPE_OF_GATHER
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp

index 3f02f6c..3c352c8 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class BF16NetworkRestore1 : public BasicBF16Test  {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //   +   Power1(FP32)
          //        |
          //   +  AvgPooling1(FP32)
@@ -170,7 +170,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp

index 53532c5..7bb2a55 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
@@ -130,7 +130,7 @@ typedef std::tuple<
   *
   * class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
   * protected:
- * void SetUp()override {
+ * void SetUp() override {
   *  fnPtr = std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
  
          // STAGE1:
@@ -253,12 +253,11 @@ public:
          //      BFloat16Helpers::getMaxAbsValue(lm1.as<const float *>(), mout1->size()) << std::endl;
          // std::cout << "Max in fp32 network by output " << outputNameFP32 << ": " <<
          //     BFloat16Helpers::getMaxAbsValue(lm2.as<const float *>(), mout2->size()) << std::endl;
-
          FuncTestUtils::compareRawBuffers(lm1.as<const float *>(),
                                           lm2.as<const float *>(),
                                           mout1->size(), mout2->size(),
+                                         FuncTestUtils::CompareType::ABS,
                                           threshold);
-
          // Stage2: verification of performance counters
          std::pair<std::string, std::string> wrongLayer =
              BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp

new file mode 100644 (file)

index 0000000..5037e20
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/concat_in_place.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+namespace {
+    static const int inputSize = 2, concatAxe = 1;
+    static std::vector<SizeVector> paramVector = {
+            SizeVector({ 1, 1, inputSize, inputSize }),
+            SizeVector({ 1, 2, inputSize, inputSize }),
+            SizeVector({ 1, 3, inputSize, inputSize }),
+            SizeVector({ 1, 4, inputSize, inputSize }),
+            SizeVector({ 1, 5, inputSize, inputSize }),
+            SizeVector({ 1, 6, inputSize, inputSize }),
+            SizeVector({ 1, 7, inputSize, inputSize }),
+            SizeVector({ 1, 8, inputSize, inputSize }),
+            SizeVector({ 1, 9, inputSize, inputSize }),
+            SizeVector({ 1, 10, inputSize, inputSize })};
+}  // namespace
+
+class Concat_in_place : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+        //              scaleshift
+        //             /         \
+        //           Conv      Conv
+        //             \       /
+        //              concat
+        //                |
+        //               relu
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        auto channelsCount = inputShapes[1];
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode, weightsNode,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode, weightsNode,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // Concat
+        ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
+
+        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, concatAxe);
+        concNode->set_friendly_name("CONC_1_TEST");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(concNode);
+        reluNode->set_friendly_name("RELU_1");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 10e-1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["CONC_1_TEST"] = "FP32";
+        expectedPrecisions["RELU_1"] = "FP32";
+    }
+};
+
+TEST_P(Concat_in_place, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Concat_in_place,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::ValuesIn(paramVector),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Concat_in_place::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Concat_in_place,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::ValuesIn(paramVector),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Concat_in_place::getTestCaseName);
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp

new file mode 100644 (file)

index 0000000..8f70ce8
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_add.cpp
@@ -0,0 +1,162 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ConvAdd : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+//              Power (FP32)
+//                |
+//            Conv(BF16)
+//                |
+//            Eltwise (SUM)(BF16)
+//                |
+//            Conv (BF16)
+
+        auto channelsCount = inputShapes[1];
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> eltConst0 = nullptr, eltConst1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            eltConst0 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+            eltConst1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            eltConst0 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+            eltConst1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode0 = std::make_shared<opset1::Multiply>(input1, eltConst0);
+        addNode0->set_friendly_name("Add_0");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode0, weightsNode0,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode0->set_friendly_name("Convolution_0");
+
+        // eltwise, i.e. sum
+        auto eltSumNode = std::make_shared<opset1::Add>(convNode0, eltConst1);
+        eltSumNode->set_friendly_name("Elt_sum");
+
+        // convolution
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                eltSumNode, weightsNode1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+
+        // 256 channels
+        // threshold = 0.26f;  // Max in fp32 network by output: 5.26852
+
+        // 3 channels
+        threshold = 0.2f;  // Max in fp32 network by output: 4.90418
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_0"] = "BF16";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Elt_sum"] = "FP32";
+    }
+};
+
+TEST_P(ConvAdd, CompareWithRefImpl) {
+    test();
+};
+
+//    CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvAdd,
+//                        ::testing::Combine(
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(SizeVector({1, 256, 38, 38})),
+//                                ::testing::Values(SizeVector()),
+//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                        ConvAdd::getTestCaseName);
+//
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvAdd,
+//                        ::testing::Combine(
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(Precision::BF16),
+//                                ::testing::Values(SizeVector({1, 256, 38, 38})),
+//                                ::testing::Values(SizeVector()),
+//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                        ConvAdd::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvAdd,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({1, 3, 38, 38})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvAdd::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvAdd,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({1, 3, 38, 38})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvAdd::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp

index a55c7e1..4db4d9f 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
@@ -27,15 +27,16 @@ namespace LayerTestsDefinitions {
  
  class ConvConv : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //     ScaleShift (FP32)
          //          |
          //        Conv (BF16)
          //          |
          //        Conv (BF16)
  
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShapes});
          auto const1 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 2.0f });
          auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
  
@@ -45,11 +46,22 @@ protected:
          addNode->set_friendly_name("ADD_1");
  
          // convolution
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        std::vector<float> weightValues;
-        weightValues.resize(3 * 3 * 3 * 3);
-        BFloat16Helpers::fillInputsBySinValues(weightValues.data(), weightValues.size());
-        auto weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+
+        auto channelsCount = inputShapes[1];
+
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
  
          std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
              addNode, weightsNode,
@@ -61,9 +73,9 @@ protected:
          convNode1->set_friendly_name("CONV_1");
  
          // Convolution
-        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          std::vector<float> weightValues2;
-        weightValues2.resize(3 * 3 * 3 * 3);
+        weightValues2.resize(channelsCount * channelsCount * 3 * 3);
          BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
          auto weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape2, weightValues2);
          std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
@@ -77,13 +89,12 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
          // STAGE1:
-        // the maximum values in the latest tensor for this test is 24.4. It would be safe to set threshold eq to 0.1
-        threshold = 0.3f;
+        threshold = 1.0f;  // Max in fp32 network by output CONV_2: 49.3427
          // STAGE2:
          // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
          // performance counters
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp

index 1ab6f50..051c9bd 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
@@ -19,7 +19,7 @@ namespace LayerTestsDefinitions {
  
  class ConvDWConvReLU : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //             scaleshift (FP32)
          //                |
          //               Conv (BF16)
@@ -28,11 +28,10 @@ protected:
          //                |
          //               ReLU (Fused Info DW convolution)
  
-
          // multiply
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -53,16 +52,18 @@ protected:
          addNode->set_friendly_name("ADD_1");
  
          // convolution
+        auto channelsCount = inputShapes[1];
+
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -78,15 +79,15 @@ protected:
  
          // DW convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValues2FP32;
-            weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+            weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
          } else {
              std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+            weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
          }
@@ -106,7 +107,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(reluNode2, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp

new file mode 100644 (file)

index 0000000..dc9aec4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_eltwise_depthwise.cpp
@@ -0,0 +1,256 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+typedef std::tuple< Precision, SizeVector, string, size_t, CoordinateDiff, string> convEltwiseDepthwiseTestParamsSet;
+
+class ConvEltwiseDepthwise :
+    public testing::WithParamInterface<convEltwiseDepthwiseTestParamsSet>, public LayerTestsUtils::LayerTestsCommon {
+public:
+    std::shared_ptr<Function> fnPtr;
+    SizeVector inputShapes;
+    std::map<string, string> expectedPrecisions;
+    float threshold = 3e-2;
+    Precision netPrecision;
+    size_t kernel;
+    CoordinateDiff pads;
+    string mkldnnPrimitive;
+
+protected:
+    std::shared_ptr<Function> createGraph(InferenceEngine::Precision netPrecision) {
+        //            scaleshift (FP32)
+        //                |
+        //               Conv (BF16)
+        //                |
+        //              Relu (Eltwise Fused into Conv)
+        //                |
+        //            scaleshift (Depthwise Fused into Conv)
+
+        element::Type ntype = (netPrecision == Precision::FP32) ? element::f32 : element::bf16;
+        size_t chCnt = inputShapes[1];
+
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, Shape{ inputShapes });
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{ 1 }, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{ 1 }, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("SS_1");
+
+        // convolution
+        std::shared_ptr<opset1::Constant> weightsNode = nullptr;
+        Shape convFilterShape = { chCnt, chCnt, kernel, kernel };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(chCnt * chCnt * kernel * kernel);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(chCnt * chCnt * kernel * kernel);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<Node> convNode1 = std::make_shared<opset1::Convolution>(
+            addNode, weightsNode, Strides({ 1, 1 }), pads, pads, Strides({ 1, 1 }), op::PadType::EXPLICIT);
+        convNode1->set_friendly_name("CONV");
+
+        // Eltwise, i.e. Relu
+        auto reluNode = std::make_shared<opset1::Relu>(convNode1);
+        reluNode->set_friendly_name("RELU");
+
+        // multiply
+        std::shared_ptr<opset1::Constant> const3 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 3.0f });
+        } else {
+            const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+        }
+        auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
+
+        // add
+        std::shared_ptr<opset1::Constant> const4 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 2.0f });
+        } else {
+            const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
+        addNode2->set_friendly_name("SS_2");
+
+        return std::make_shared<Function>(NodeVector{ addNode2 }, ParameterVector{ input1 });
+    }
+public:
+    static string getTestCaseName(testing::TestParamInfo<convEltwiseDepthwiseTestParamsSet> obj) {
+        Precision netPrecision;
+        SizeVector inputShapes;
+        string targetDevice;
+        size_t kernel;
+        CoordinateDiff pads;
+        string mkldnnPrimitive;
+        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "mkldnnPrimitive=" << mkldnnPrimitive << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    void Run_test() {
+        if (!InferenceEngine::with_cpu_x86_bfloat16()) {
+            // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
+            // tests are useless on such platforms
+            return;
+        }
+        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = this->GetParam();
+        InferenceEngine::CNNNetwork cnnNet(fnPtr);
+
+        for (const auto& inputItem : cnnNet.getInputsInfo()) {
+            inputItem.second->setPrecision(Precision::FP32);
+        }
+
+        string inputName = cnnNet.getInputsInfo().begin()->first;
+        string outputName = cnnNet.getOutputsInfo().begin()->first;
+        auto ie = InferenceEngine::Core();
+        // BF16 inference
+        std::map<string, string> options;
+        if (netPrecision == InferenceEngine::Precision::FP32) {
+            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
+        } else {
+            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
+        }
+        options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
+        options[InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT] = "egraph_test";
+
+        auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
+        auto req1 = exec_net1.CreateInferRequest();
+
+        InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
+        BFloat16Helpers::fillInputsBySinValues(inBlob1);
+
+        req1.Infer();
+        auto outBlobBF16 = req1.GetBlob(outputName);
+        InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
+        ASSERT_NE(mout1, nullptr);
+        auto lm1 = mout1->rmap();
+
+        // FP32 infrence
+        // if netPrecision is not eq to the FP32 - change network precision and recreate network
+        InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
+        string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
+        string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
+        for (const auto& inputItem : cnnNetFP32.getInputsInfo()) {
+            inputItem.second->setPrecision(Precision::FP32);
+        }
+        auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
+            { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
+        auto req2 = exec_net2.CreateInferRequest();
+
+        req2.SetBlob(inputNameFP32, inBlob1);
+
+        req2.Infer();
+        auto outBlobFP32 = req2.GetBlob(outputNameFP32);
+        InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
+        ASSERT_NE(mout2, nullptr);
+        auto lm2 = mout2->rmap();
+
+        FuncTestUtils::compareRawBuffers(lm1.as<const float*>(), lm2.as<const float*>(), mout1->size(), mout2->size(),
+                                                         FuncTestUtils::CompareType::ABS_AND_REL,
+                                                         threshold, threshold);
+
+        // Stage2: verification of performance counters
+        std::pair<string, string> wrongLayer =
+            BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
+        if (wrongLayer.first != string("")) {
+            string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
+            string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
+            ASSERT_EQ(layerInPerfCounts, layerExpected);
+        }
+        fnPtr.reset();
+    }
+
+    void SetUp() override {
+        std::vector<size_t> inputShape;
+        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, mkldnnPrimitive) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        expectedPrecisions["SS_1"] = "FP32";
+        expectedPrecisions["CONV"] = mkldnnPrimitive;
+        expectedPrecisions["RELU"] = "ndef";
+        expectedPrecisions["SS_2"] = "ndef";
+    }
+};
+
+TEST_P(ConvEltwiseDepthwise, CompareWithRefImpl) {
+    Run_test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_1x1_depthwise_BF16, ConvEltwiseDepthwise,
+    ::testing::Combine(
+        ::testing::Values(Precision::FP32),
+        ::testing::Values(SizeVector({ 1, 5, 1, 1 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(size_t(1)),
+        ::testing::Values(CoordinateDiff({ 0, 0 })),
+        ::testing::Values(string("jit_avx512_1x1_BF16"))),
+    ConvEltwiseDepthwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_gemm_depthwise_BF16, ConvEltwiseDepthwise,
+    ::testing::Combine(
+        ::testing::Values(Precision::FP32),
+        ::testing::Values(SizeVector({ 1, 3, 10, 10 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(size_t(3)),
+        ::testing::Values(CoordinateDiff({ 1, 1 })),
+        ::testing::Values(string("jit_gemm_BF16"))),
+    ConvEltwiseDepthwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_conv_depthwise_BF16, ConvEltwiseDepthwise,
+    ::testing::Combine(
+        ::testing::Values(Precision::FP32),
+        ::testing::Values(SizeVector({ 1, 5, 10, 10 })),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(size_t(3)),
+        ::testing::Values(CoordinateDiff({ 0, 0 })),
+        ::testing::Values(string("jit_avx512_BF16"))),
+    ConvEltwiseDepthwise::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp

index 8ec2cfd..49d47e6 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ConvReLUPoolConvReLUPool : public BasicBF16Test  {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //    Convolution1  (FP32)
          //        |
          //       ReLU1      (Fused)
@@ -46,21 +46,23 @@ protected:
          // STAGE1: construction of the GRAPH
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
  
          // convolution1
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -90,15 +92,15 @@ protected:
  
          // convolution2
          std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
          }
@@ -127,15 +129,15 @@ protected:
  
          // convolution3
          std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
-        ngraph::Shape convFilterShape3 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape3 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
          }
@@ -149,12 +151,9 @@ protected:
              ngraph::op::PadType::EXPLICIT);   // pad type
          convNode3->set_friendly_name("Convolution_3");
  
-
-
-
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override  {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp

new file mode 100644 (file)

index 0000000..5c8e392
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_max.cpp
@@ -0,0 +1,151 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Elt_max : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+//         Power (FP32)
+//              |
+//           Conv(BF16)  Const(FP32)
+//              |        /
+//        Eltwise(MAX)(FP32)
+//              |
+//            Conv(BF16)
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+        const int conv0OutputChannels = 1;
+
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> powerConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            powerConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            powerConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto powerNode0 = std::make_shared<opset1::Multiply>(input1, powerConst);
+        powerNode0->set_friendly_name("Power_0");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
+        ngraph::Shape convFilterShape0 = { conv0OutputChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape1 = { 1, conv0OutputChannels, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32_0, weightValuesFP32_1;
+            weightValuesFP32_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
+            weightValuesFP32_1.resize(1 * conv0OutputChannels * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32_0.data(), weightValuesFP32_0.size());
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32_1.data(), weightValuesFP32_1.size());
+            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesFP32_0);
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesFP32_1);
+        } else {
+            std::vector<short> weightValuesBF16_0, weightValuesBF16_1;
+            weightValuesBF16_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
+            weightValuesBF16_1.resize(1 * conv0OutputChannels * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16_0.data(), weightValuesBF16_0.size());
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16_1.data(), weightValuesBF16_1.size());
+            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesBF16_0.data());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesBF16_1.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
+                powerNode0, weightsNode0,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode0->set_friendly_name("Convolution_0");
+
+        // Eltwise, i.e. Max
+        std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
+        auto batchSize = inputShapes[0];
+        auto heightSize = inputShapes[2];
+        auto widthSize = inputShapes[3];
+        if (netPrecision == Precision::FP32) {
+            maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize}, { 2.0f });
+        } else {
+            maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize},
+                    { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        maxConst->set_friendly_name("Max_const");
+        auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0, maxConst);
+        eltMaxNode->set_friendly_name("Elt_max");
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                eltMaxNode, weightsNode1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+        threshold = 0.2f;  // Max in fp32 network by output: 20.0761
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_0"] = "BF16";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Elt_max"] = "FP32";
+    }
+};
+
+TEST_P(Elt_max, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_max,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({1, 3, 40, 40})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Elt_max::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_max,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({1, 3, 40, 40})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Elt_max::getTestCaseName);
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp

new file mode 100644 (file)

index 0000000..987c53c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/elt_x3.cpp
@@ -0,0 +1,229 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Elt_x3 : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+//                       Power (FP32)
+//                    /    |          \
+//            Conv(BF16)   Conv(BF16)  Conv(BF16)
+//                /        |          /
+// ----------------------------------------------
+//    Eltwise(MAX)(FP32)  Eltwise(Mul) (FP32)
+//             |            |
+//            Conv(BF16)   Conv(BF16)
+//             \           /
+//            Eltwise (SUM)(BF16)
+//                |
+//            Conv (BF16)
+
+        auto channelsCount = inputShapes[1];
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+        addNode0->set_friendly_name("Add_0");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode0_1 = nullptr, weightsNode0_2 = nullptr,
+        weightsNode0_3 = nullptr, weightsNode1 = nullptr,
+        weightsNode2 = nullptr, weightsNode3 = nullptr;
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode0_1 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode0, weightsNode0_1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode0_1->set_friendly_name("Convolution_0_1");
+
+        std::shared_ptr<ngraph::Node> convNode0_2 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode0, weightsNode0_2,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode0_2->set_friendly_name("Convolution_0_2");
+
+        std::shared_ptr<ngraph::Node> convNode0_3 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode0, weightsNode0_3,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode0_3->set_friendly_name("Convolution_0_3");
+
+        // Eltwise, i.e. Mul
+        auto eltMulNode = std::make_shared<opset1::Multiply>(convNode0_1, convNode0_2);
+        eltMulNode->set_friendly_name("Elt_mul");
+
+        // Eltwise, i.e. Max
+        std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
+        } else {
+            maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0_3, maxConst);
+        eltMaxNode->set_friendly_name("Elt_max");
+
+        // convolution
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                eltMulNode, weightsNode1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+                eltMaxNode, weightsNode2,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+
+        // eltwise, i.e. sum
+        auto eltSumNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+        eltSumNode->set_friendly_name("Elt_sum");
+
+        // convolution
+        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
+                eltSumNode, weightsNode3,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode3->set_friendly_name("Convolution_3");
+
+        return std::make_shared<ngraph::Function>(convNode3, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+
+        // 256 channels, 38 x 38 size
+        // threshold = 0.6f;  // Max in fp32 network by output: 12.0983
+
+        // 3 channels, 4 x 4 size
+        threshold = 20.6f;  // Max in fp32 network by output: 879.077
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Convolution_2"] = "BF16";
+        expectedPrecisions["Convolution_3"] = "BF16";
+        expectedPrecisions["Elt_max"] = "FP32";
+        expectedPrecisions["Elt_mul"] = "FP32";
+        expectedPrecisions["Elt_sum"] = "ndef";
+    }
+};
+
+TEST_P(Elt_x3, CompareWithRefImpl) {
+    test();
+};
+
+//    CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_x3,
+//                        ::testing::Combine(
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(SizeVector({1, 256, 38, 38})),
+//                                ::testing::Values(SizeVector()),
+//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                        Elt_x3::getTestCaseName);
+//
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_x3,
+//                        ::testing::Combine(
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(Precision::BF16),
+//                                ::testing::Values(SizeVector({1, 256, 38, 38})),
+//                                ::testing::Values(SizeVector()),
+//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                        Elt_x3::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Elt_x3,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({1, 3, 4, 4})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Elt_x3::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Elt_x3,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({1, 3, 4, 4})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Elt_x3::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp

index efc9c6d..ca97b10 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
@@ -28,16 +28,18 @@ namespace LayerTestsDefinitions {
  
  class Faster100_5_1_1_Conv : public BasicBF16Test  {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //                     Power (FP32)
          //                       |
          //                     Convolution (BF16)
  
          // STAGE1: constructin og the GRAPH
+        auto channelsCount = inputShapes[1];
+
          // multiply
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{10, 5, 1, 1});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -59,10 +61,10 @@ protected:
  
          // problematic convolution: 100x5x1x1
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 5, 5, 1, 1 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 1, 1 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValues;
-            weightValues.resize(5 * 5 * 1 * 1, 0.f);
+            weightValues.resize(channelsCount * channelsCount * 1 * 1, 0.f);
              weightValues[0] = 1.0f;
              weightValues[7] = 1.0f;
              weightValues[11] = 1.0f;
@@ -71,7 +73,7 @@ protected:
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(5 * 5 * 1 * 1, BFloat16Helpers::reducePrecisionBitwiseS(0.0f));
+            weightValuesBF16.resize(channelsCount * channelsCount * 1 * 1, BFloat16Helpers::reducePrecisionBitwiseS(0.0f));
              weightValuesBF16[0] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
              weightValuesBF16[7] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
              weightValuesBF16[11] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
@@ -96,7 +98,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp

new file mode 100644 (file)

index 0000000..596a018
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_multiply.cpp
@@ -0,0 +1,130 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Gather_multiply : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+//                   Add (FP32)
+//                    |
+//                  FC (BF16)
+//                   /
+// -------------------------------------------
+//                Gather(BF16)  Const
+//                 \           /
+//                   Mul(FP32)
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto inputSize = inputShapes[1];
+
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+        addNode0->set_friendly_name("Add_1");
+
+        // matmul
+        std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
+        } else {
+            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
+                                                    { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
+        matmulNode->set_friendly_name("Matmul_0");
+
+        // gather
+        auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
+        std::vector<size_t> gatherArray;
+        for (size_t i = 0; i < inputSize; i++) {
+            gatherArray.push_back(i);
+        }
+        auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
+        auto gatherNode = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+        gatherNode->set_friendly_name("Gather_1");
+
+        // multiply
+        std::shared_ptr<ngraph::opset1::Constant> mulConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
+        } else {
+            mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(gatherNode, mulConst);
+        mulNode->set_friendly_name("Mul_1");
+
+        return std::make_shared<ngraph::Function>(mulNode, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+        threshold = 0.1f;  // Max in fp32 network by output:  21.7285
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+
+        expectedPrecisions["Matmul_0"] = "BF16";
+        expectedPrecisions["Mul_1"] = "FP32";
+    }
+};
+
+TEST_P(Gather_multiply, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Gather_multiply,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({1, 4})),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Gather_multiply::getTestCaseName);
+//    CPU plug-in failure in that case
+
+//INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Gather_multiply,
+//                        ::testing::Combine(
+//                                ::testing::Values(Precision::FP32),
+//                                ::testing::Values(Precision::BF16),
+//                                ::testing::Values(SizeVector({1, 4})),
+//                                ::testing::Values(SizeVector()),
+//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+//                        Gather_multiply::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp

new file mode 100644 (file)

index 0000000..6c8c714
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
@@ -0,0 +1,156 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Gather_x2_add_mul_relu_concat_matmul : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+//                       Add (FP32)
+//                        |
+//                     FullyConnected (BF16)
+//                   /             |       \
+// -------------------------------------------
+//             Gather(FP32)  Gather(FP32)    Add (FP32)
+//                 \           /              /
+//                   Mul(FP32)     ReLU(FP32)
+//                     \        /
+//                       Concat(FP32)    Const
+//                           \     /
+//                           Matmul(BF16)
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        auto inputSize = inputShapes[1];
+
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
+        addNode0->set_friendly_name("Add_1");
+
+        // matmul
+        std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
+        } else {
+            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
+                    { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
+        matmulNode->set_friendly_name("Matmul_0");
+
+        // gather
+        std::vector<size_t> gatherArray;
+        for (size_t i = 0; i < inputSize; i++) {
+            gatherArray.push_back(i);
+        }
+        auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
+        auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
+        auto gatherNode1 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+        gatherNode1->set_friendly_name("Gather_1");
+
+        auto gatherNode2 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
+        gatherNode2->set_friendly_name("Gather_2");
+
+        // multiply
+        auto mulNode = std::make_shared<opset1::Multiply>(gatherNode1, gatherNode2);
+        mulNode->set_friendly_name("Mul_1");
+
+        // add
+        auto addNode1 = std::make_shared<opset1::Multiply>(matmulNode, addConst);
+        addNode0->set_friendly_name("Add_1");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(addNode1);
+        reluNode->set_friendly_name("Relu_1");
+
+        // Concat
+        ngraph::NodeVector concInputNodes = {mulNode, reluNode};
+        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
+        concNode->set_friendly_name("Conc_1");
+
+        // matmul
+        std::shared_ptr<ngraph::opset1::Constant> matmulConst1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2}, { 2.0f });
+        } else {
+            matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2},
+                    { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto matmulNode1 = std::make_shared<opset1::MatMul>(concNode, matmulConst1);
+        matmulNode1->set_friendly_name("Matmul_1");
+
+        return std::make_shared<ngraph::Function>(matmulNode1, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+        threshold = 170.02f;  // Max in fp32 network by output:  3887.11
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Matmul_0"] = "BF16";
+        expectedPrecisions["Mul_1"] = "FP32";
+        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Relu_1"] = "FP32";
+        expectedPrecisions["Conc_1"] = "FP32";
+        expectedPrecisions["Matmul_1"] = "BF16";
+    }
+};
+
+TEST_P(Gather_x2_add_mul_relu_concat_matmul, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 2048, 64 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({ 2048, 64 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/interpolation.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/interpolation.cpp

new file mode 100644 (file)

index 0000000..75047fe
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/interpolation.cpp
@@ -0,0 +1,146 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Interpolation : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+        //    Convolution  (BF16)
+        //        |
+        //       Interpolation (In the case of  mode = "linear")      (FP32)
+        //        |
+        //     Convolution     (BF16)
+
+        // STAGE1: construction of the GRAPH
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Multiply>(input1, addConst);
+        addNode->set_friendly_name("Add_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode1 = nullptr, weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode, weightsNode1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        // interpolation
+        auto heightSize = static_cast<long>(inputShapes[2]);
+        auto weigthSize = static_cast<long>(inputShapes[3]);
+        std::vector<int64_t> outShape = {2 * heightSize, 2 * weigthSize};
+
+        auto interpolShape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, outShape);
+        ngraph::op::InterpolateAttrs attrs;
+        attrs.pads_begin.push_back(0);
+        attrs.pads_end.push_back(0);
+        attrs.axes = ngraph::AxisSet{2, 3};
+        attrs.align_corners = false;
+        attrs.mode = "linear";
+        attrs.antialias = false;
+        auto interpolNode = std::make_shared<opset1::Interpolate>(
+                convNode1,
+                interpolShape, attrs);
+        interpolNode->set_friendly_name("Interp");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+                interpolNode, weightsNode2,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+        return std::make_shared<ngraph::Function>(convNode2, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+        threshold = 0.02f;  // Max in fp32 network by output: 2.531
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Interp"] = "FP32";
+        expectedPrecisions["Convolution_2"] = "BF16";
+    }
+};
+
+TEST_P(Interpolation, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Interpolation,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Interpolation::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Interpolation,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Interpolation::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp

index e8eafcb..e249818 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
@@ -19,7 +19,7 @@ namespace LayerTestsDefinitions {
  
  class MobileNet_ssd_with_branching : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //                scaleshift
          //                    |
          //                   Conv1 (FP32)
@@ -35,8 +35,10 @@ protected:
          //                    Concat
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -58,15 +60,15 @@ protected:
  
          // Conv1
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -96,15 +98,15 @@ protected:
  
          // DW convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValues2FP32;
-            weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+            weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
          } else {
              std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+            weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
          }
@@ -138,7 +140,7 @@ protected:
          return std::make_shared<ngraph::Function>(concNode, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/resample.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/resample.cpp

new file mode 100644 (file)

index 0000000..2c80305
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/resample.cpp
@@ -0,0 +1,146 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Resample : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
+        //    Convolution  (BF16)
+        //        |
+        //       Interpolation (Resample in the case of  mode = "nearest")   (FP32)
+        //        |
+        //     Convolution     (BF16)
+
+        // STAGE1: construction of the GRAPH
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // add
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        auto channelsCount = inputShapes[1];
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
+        if (netPrecision == Precision::FP32) {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Multiply>(input1, addConst);
+        addNode->set_friendly_name("Add_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode1 = nullptr, weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+                addNode, weightsNode1,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        // interpolation
+        auto heightSize = static_cast<long>(inputShapes[2]);
+        auto weigthSize = static_cast<long>(inputShapes[3]);
+        std::vector<int64_t> outShape = {2 * heightSize, 2 * weigthSize};
+
+        auto interpolShape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, outShape);
+        ngraph::op::InterpolateAttrs attrs;
+        attrs.pads_begin.push_back(0);
+        attrs.pads_end.push_back(0);
+        attrs.axes = ngraph::AxisSet{2, 3};
+        attrs.align_corners = false;
+        attrs.mode = "nearest";
+        attrs.antialias = false;
+        auto interpolNode = std::make_shared<opset1::Interpolate>(
+                convNode1,
+                interpolShape, attrs);
+        interpolNode->set_friendly_name("Interp");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+                interpolNode, weightsNode2,
+                ngraph::Strides({ 1, 1 }),   // strides
+                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+                ngraph::Strides({ 1, 1 }),        // dilation
+                ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+        return std::make_shared<ngraph::Function>(convNode2, ngraph::ParameterVector{input1});
+    }
+    void SetUp() override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
+        threshold = 0.02f;  // Max in fp32 network by output: 2.35926
+
+        // STAGE3:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Interp"] = "FP32";
+        expectedPrecisions["Convolution_2"] = "BF16";
+    }
+};
+
+TEST_P(Resample, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Resample,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Resample::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Resample,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::BF16),
+                                ::testing::Values(SizeVector({ 1, 1, 2, 2 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Resample::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp

index e165f86..fb1308f 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConvEltwiseConv : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //        scaleshift (FP32)     Conv (FP32)
          //                   \          /
          //              Eltwise (Fused into Conv)
@@ -37,7 +37,9 @@ protected:
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
+        auto channelsCount = inputShapes[1];
+
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -59,15 +61,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -87,15 +89,15 @@ protected:
  
          // Convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValues2;
-            weightValues2.resize(3 * 3 * 3 * 3);
+            weightValues2.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
          } else {
              std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(3 * 3 * 3 * 3);
+            weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
          }
@@ -112,12 +114,12 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
          // STAGE1:
-        threshold = 0.2f;  // max value in the latest tensor for FP32 network is 37.77
+        threshold = 1.0f;  // max value in the latest tensor for FP32 network is 37.77
          // STAGE2:
          // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
          // performance counters
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp

index ba7c17b..2ac468e 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConvEltwiseReluConv : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //        scaleshift (FP32)     Conv (FP32_
          //             \          /
          //               Eltwise (Fused into conv)
@@ -37,8 +37,10 @@ protected:
          //               Conv (BF16)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -60,15 +62,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -92,15 +94,15 @@ protected:
  
          // Convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValues2;
-            weightValues2.resize(3 * 3 * 3 * 3);
+            weightValues2.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
          } else {
              std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(3 * 3 * 3 * 3);
+            weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
              weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
          }
@@ -116,12 +118,12 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
          // STAGE1:
-        threshold = 9e-2;
+        threshold = 1.0f;  // Max in fp32 network by output CONV_2: 30.1374
          // STAGE2:
          // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
          // performance counters
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp

index 41f81cb..e0fa904 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConvEltwiseScaleshift : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //                    scaleshift (FP32)
          //                        |
          //                       Conv (BF16)
@@ -37,8 +37,10 @@ protected:
          //            scaleshift (FP32)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -60,15 +62,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -110,7 +112,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp

index fc63492..110ef28 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConvEluConv : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //              scaleshift (FP32)
          //                 |
          //                Conv (BF16)
@@ -37,8 +37,10 @@ protected:
          //                Conv (BF16)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -60,15 +62,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -98,7 +100,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
@@ -109,7 +111,6 @@ protected:
          // performance counters
          expectedPrecisions["ADD_1"] = "FP32";
          expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["ELU_1"] = "FP32";
          expectedPrecisions["CONV_2"] = "BF16";
      }
  };
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp

index 6e5df70..e6a62a8 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConvRelu : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //              scaleshift (FP32)
          //                  |
          //                Conv (BF16)
@@ -35,8 +35,10 @@ protected:
          //                relu (Fused into convolution)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -58,15 +60,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -86,7 +88,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp

index 11dcb1e..5597b97 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConv_x2_ConcatRelu : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //              scaleshift
          //             /         \
          //           Conv      Conv
@@ -37,8 +37,10 @@ protected:
          //               relu
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -60,15 +62,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -93,9 +95,7 @@ protected:
  
          // Concat
          ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
-
-        // test is to be failed, if axis == 1 - TODO
-        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 2);
+        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
          concNode->set_friendly_name("CONC_1");
  
          // ReLU
@@ -105,7 +105,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp

index 82340d2..d4f1fda 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConv_x2_Eltwise : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //              scaleshift (FP32)
          //             /             \
          //           Conv1 (BF16)     Conv1 (BF16)
@@ -35,8 +35,10 @@ protected:
          //                eltwise (Fused into Conv1) produce FP32 output
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -58,15 +60,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -95,7 +97,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp

index 86bcc52..d3b7263 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConv_x2_mixed1_Eltwise : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //         scaleshift (FP32)
          //             |               |
          //      Conv1(BF16)       Conv2(FP32)
@@ -35,8 +35,10 @@ protected:
          //            eltwise(Fused into Conv1)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -58,15 +60,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -95,7 +97,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp

index 1e483bf..f76c908 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConv_x2_mixed2_Eltwise : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //                   scaleshift (FP32)
          //             |         |
          //     Conv1 (FP32)      Conv2 (Bf16)
@@ -35,19 +35,21 @@ protected:
          //             eltwise (Fused into Conv1)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto channelsCount = inputShapes[1];
+
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -98,7 +100,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp

index 361dc01..1a4fb81 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //        scaleshift (FP32)
          //
          //        /        \
@@ -43,8 +43,11 @@ protected:
          //         Conv3 (BF16)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+        const int outChannels = 16;
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -66,15 +69,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -103,15 +106,15 @@ protected:
  
          // Convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
-        ngraph::Shape convFilterShape3 = { 16, 16, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape3 = { outChannels, outChannels, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(16 * 16 * 3 * 3);
+            weightValuesFP32.resize(outChannels * outChannels * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(16 * 16 * 3 * 3);
+            weightValuesBF16.resize(outChannels * outChannels * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
          }
@@ -128,12 +131,12 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
          // STAGE1:
-        threshold = 1.0f;  // max value in the latest tensor for FP32 network is 93.3
+        threshold = 2.0f;  // max value in the latest tensor for FP32 network is 93.3
  
          // STAGE2:
          // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp

index 3daece1..f90d4f4 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class Scaleshift_x2_Conv_x2_Eltwise : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //
          //  scaleshift (FP32)    scaleshift (FP32)
          //        \             /      \
@@ -37,8 +37,10 @@ protected:
          //                            |
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -83,15 +85,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -116,7 +118,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode, convNode2}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp

index 3c22d18..2b0de26 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class Scaleshift_x3_ConvEltwiseRelu : public BasicBF16Test {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //
          //   scaleshift (FP32)
          //        |
@@ -42,9 +42,10 @@ protected:
          //       scaleshift  (FP32)
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
  
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
              const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
@@ -65,15 +66,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -136,7 +137,7 @@ protected:
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode3}, ngraph::ParameterVector{input1});
      }
  
-    void SetUp()override {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
@@ -155,9 +156,9 @@ protected:
      }
  };
  
-    TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
-        test();
-    };
+TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
+    test();
+};
  
  INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
                          ::testing::Combine(
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp

index 3a99203..4ed11f7 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class PoolingAfterConv : public BasicBF16Test  {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //    Scaleshift   (FP32)
          //        |
          //    Convolution  (BF16)
@@ -40,8 +40,11 @@ protected:
          // STAGE1: construction of the GRAPH
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+        const int outChannels = 16;
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -63,15 +66,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -100,7 +103,7 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{avgpoolNode}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override  {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp

index ef704c0..29ae8e0 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
@@ -27,7 +27,7 @@ namespace LayerTestsDefinitions {
  
  class TopKInputsI32 : public BasicBF16Test  {
  protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
          //      Power   (FP32)
          //        |
          //      Convolution1 (BF16)       Const (I32)
@@ -41,8 +41,11 @@ protected:
          // STAGE1: construction of the GRAPH
  
          ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto channelsCount = inputShapes[1];
+        const int intermediateChannelsCount = 16;
+
          // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
          input1->set_friendly_name("Input_1");
          std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
          if (netPrecision == Precision::FP32) {
@@ -64,15 +67,15 @@ protected:
  
          // convolution
          std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        ngraph::Shape convFilterShape = { intermediateChannelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
          if (netPrecision == Precision::FP32) {
              std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            weightValuesFP32.resize(intermediateChannelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
          } else {
              std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            weightValuesBF16.resize(intermediateChannelsCount * channelsCount * 3 * 3);
              BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
              weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
          }
@@ -123,11 +126,11 @@ protected:
  
          return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2, goe1}, ngraph::ParameterVector{input1});
      }
-    void SetUp()override  {
+    void SetUp() override {
          std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
          fnPtr = createGraph(netPrecision);
  
-        threshold = 0.14f;  // max value in the latest tensor for FP32 network is 22.6
+        threshold = 0.5f;  // max value in the latest tensor for FP32 network is 22.6
  
          // STAGE2:
          // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/core_threading_tests.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/core_threading_tests.cpp

new file mode 100644 (file)

index 0000000..70f2f25
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/core_threading_tests.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <behavior/core_threading_tests.hpp>
+
+namespace {
+
+Params params[] = {
+    std::tuple<Device, Config> { "CPU", { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) } } },
+    std::tuple<Device, Config> { "HETERO", { { "TARGET_FALLBACK", "CPU" } } },
+    std::tuple<Device, Config> { "MULTI", { { MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , "CPU" } } }
+};
+
+}  // namespace
+
+INSTANTIATE_TEST_CASE_P(CPU, CoreThreadingTests, testing::ValuesIn(params));
+
+INSTANTIATE_TEST_CASE_P(CPU, CoreThreadingTestsWithIterations,
+    testing::Combine(testing::ValuesIn(params),
+                     testing::Values(4),
+                     testing::Values(50)));
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp

index 1968599..6a61983 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -58,8 +58,7 @@ INSTANTIATE_TEST_CASE_P(MaxPool_ExplicitPad_FloorRpunding, PoolingLayerTest,
  const auto maxPool_ExplicitPad_CeilRounding_Params = ::testing::Combine(
          ::testing::Values(ngraph::helpers::PoolingTypes::MAX),
          ::testing::ValuesIn(kernels),
-        // TODO: Non 1 strides fails in ngraph reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body.
-        ::testing::Values(std::vector<size_t>({1, 1})),
+        ::testing::ValuesIn(strides),
          ::testing::ValuesIn(padBegins),
          ::testing::ValuesIn(padEnds),
          ::testing::Values(ngraph::op::RoundingType::CEIL),
@@ -82,10 +81,9 @@ const auto avgPoolExplicitPadCeilRoundingParams = ::testing::Combine(
          ::testing::Values(ngraph::helpers::PoolingTypes::AVG),
          ::testing::ValuesIn(kernels),
          // TODO: Non 1 strides fails in ngraph reference implementation with error "The end corner is out of bounds at axis 3" thrown in the test body.
-        ::testing::Values(std::vector<size_t>({1, 1})),
-        // TODO: Non zero pads excluded because of accuracy mismatch
-        ::testing::Values(std::vector<size_t>({0, 0})),
-        ::testing::Values(std::vector<size_t>({0, 0})),
+        ::testing::ValuesIn(strides),
+        ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}, {0, 1}})),
+        ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}, {0, 1}})),
          ::testing::Values(ngraph::op::RoundingType::CEIL),
          ::testing::Values(ngraph::op::PadType::EXPLICIT),
          ::testing::Values(true, false)
@@ -98,14 +96,27 @@ INSTANTIATE_TEST_CASE_P(AvgPool_ExplicitPad_CeilRounding, PoolingLayerTest,
                                  ::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
                                  ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                          PoolingLayerTest::getTestCaseName);
+
+std::vector<poolSpecificParams> psParams({poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {2, 2}, {2, 2}, {0, 0}, {0, 0},
+                        ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false),
+    poolSpecificParams(ngraph::helpers::PoolingTypes::AVG, {7, 7}, {1, 1}, {0, 0}, {1, 1},
+                        ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false)});
+
+INSTANTIATE_TEST_CASE_P(AvgPool_ExplicitPad_CeilRounding_corner, PoolingLayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(psParams),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(std::vector<size_t >({1, 1024, 6, 6})),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        PoolingLayerTest::getTestCaseName);
+
  /* +========== Explicit Pad Floor Rounding ========== */
  const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine(
          ::testing::Values(ngraph::helpers::PoolingTypes::AVG),
          ::testing::ValuesIn(kernels),
          ::testing::ValuesIn(strides),
-        // TODO: Non zero pads excluded because of accuracy mismatch
-        ::testing::Values(std::vector<size_t>({0, 0})),
-        ::testing::Values(std::vector<size_t>({0, 0})),
+        ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}})),
+        ::testing::ValuesIn(std::vector<std::vector<size_t>>({{0, 0}, {1, 1}})),
          ::testing::Values(ngraph::op::RoundingType::FLOOR),
          ::testing::Values(ngraph::op::PadType::EXPLICIT),
          ::testing::Values(true, false)
@@ -144,4 +155,5 @@ INSTANTIATE_TEST_CASE_P(MAX_and_AVGPool_ValidPad, PoolingLayerTest,
  
  
  
-}  // namespace
-\ No newline at end of file
+}  // namespace
+
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp

index 8db78c1..536f0bb 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -9,5 +9,7 @@
  
  std::vector<std::string> disabledTestPatterns() {
      return {
+        // TODO: Issue 26264
+        R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)"
      };
  }
 \ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp

index 42198d3..05d1c8c 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -78,7 +78,7 @@ const std::vector<std::vector<size_t >> strides3d = {{1, 1, 1},
  const std::vector<std::vector<size_t >> dilations3d = {{1, 1, 1},
                                                                {1, 2, 1}};
  
-const auto conv3DParams_FP32 = ::testing::Combine(
+const auto conv3DParams = ::testing::Combine(
          ::testing::ValuesIn(kernels3d),
          ::testing::ValuesIn(strides3d),
          ::testing::ValuesIn(paddings3d),
@@ -88,30 +88,11 @@ const auto conv3DParams_FP32 = ::testing::Combine(
          ::testing::Values(ngraph::op::PadType::EXPLICIT)
  );
  
-const auto conv3DParams_FP16 = ::testing::Combine(
-        ::testing::Values(std::vector<size_t >({3, 3, 3})),
-        ::testing::ValuesIn(strides3d),
-        ::testing::ValuesIn(paddings3d),
-        ::testing::ValuesIn(paddings3d),
-        ::testing::ValuesIn(dilations3d),
-        ::testing::Values(5),
-        ::testing::Values(ngraph::op::PadType::EXPLICIT)
-);
-
-INSTANTIATE_TEST_CASE_P(Convolution3D_FP32, ConvolutionLayerTest,
+INSTANTIATE_TEST_CASE_P(Convolution3D, ConvolutionLayerTest,
                          ::testing::Combine(
-                                conv3DParams_FP32,
-                                ::testing::Values(InferenceEngine::Precision::FP32),
-                                ::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
-                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
-                        ConvolutionLayerTest::getTestCaseName);
-
-INSTANTIATE_TEST_CASE_P(Convolution3D_FP16, ConvolutionLayerTest,
-                        ::testing::Combine(
-                                conv3DParams_FP16,
-                                ::testing::Values(InferenceEngine::Precision::FP16),
+                                conv3DParams,
+                                ::testing::ValuesIn(netPrecisions),
                                  ::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
                                  ::testing::Values(CommonTestUtils::DEVICE_GPU)),
                          ConvolutionLayerTest::getTestCaseName);
-
  }  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/out_shape_of_reshape.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/out_shape_of_reshape.cpp

new file mode 100644 (file)

index 0000000..d1b0525
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/out_shape_of_reshape.cpp
@@ -0,0 +1,175 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+
+#include <common_test_utils/test_common.hpp>
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/function.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using TensorShape = ngraph::PartialShape;
+using TensorType = ngraph::element::Type;
+
+using TestParams = std::tuple<
+        TensorShape,
+        TensorType,
+        TensorShape,
+        TensorType>;
+
+class OutShapeOfReshapeTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<TestParams> {
+public:
+    void SetUp() override {
+        const auto& parameters  = GetParam();
+        const auto& inDataShapeTensorShape  = std::get<0>(parameters);
+        const auto& inTensorShapeTensorType = std::get<1>(parameters);
+        const auto& outShapeDescriptorTensorShape = std::get<2>(parameters);
+        const auto& outShapeDescriptorTensorType = std::get<3>(parameters);
+
+        m_inDataShapeParam = std::make_shared<ngraph::op::Parameter>(
+                inTensorShapeTensorType, inDataShapeTensorShape);
+        m_outShapeDescriptorParam = std::make_shared<ngraph::op::Parameter>(
+                outShapeDescriptorTensorType, outShapeDescriptorTensorShape);
+    }
+
+protected:
+    std::shared_ptr<ngraph::op::Parameter> m_inDataShapeParam;
+    std::shared_ptr<ngraph::op::Parameter> m_outShapeDescriptorParam;
+};
+
+std::vector<ngraph::PartialShape> tensorShapes {
+        TensorShape{1},
+        TensorShape{3},
+        TensorShape{4},
+};
+
+std::set<ngraph::element::Type> allNGraphTypes() {
+    return {
+            ngraph::element::dynamic,
+            ngraph::element::boolean,
+            ngraph::element::bf16,
+            ngraph::element::f16,
+            ngraph::element::f32,
+            ngraph::element::f64,
+            ngraph::element::i8,
+            ngraph::element::i16,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u1,
+            ngraph::element::u8,
+            ngraph::element::u16,
+            ngraph::element::u32,
+            ngraph::element::u64
+    };
+}
+
+std::set<ngraph::element::Type> allNGraphIntegralNumberTypes() {
+    return {
+            ngraph::element::i8,
+            ngraph::element::i16,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u1,
+            ngraph::element::u8,
+            ngraph::element::u16,
+            ngraph::element::u32,
+            ngraph::element::u64
+    };
+}
+
+//
+// Positive tests
+//
+
+TEST_P(OutShapeOfReshapeTests, CanValidateAndInferTypes) {
+    std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+    ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+            m_inDataShapeParam, m_outShapeDescriptorParam, true));
+    ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{op->output(0)},
+            ngraph::ParameterVector{m_inDataShapeParam, m_outShapeDescriptorParam}));
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, OutShapeOfReshapeTests, testing::Combine(
+        testing::ValuesIn(tensorShapes),
+        testing::ValuesIn(allNGraphIntegralNumberTypes()),
+        testing::ValuesIn(tensorShapes),
+        testing::ValuesIn(allNGraphIntegralNumberTypes()))
+);
+
+//
+// Negative tests
+//
+
+std::set<ngraph::element::Type> allNGraphNotIntegralTypes() {
+    auto notIntegralTypes = std::set<ngraph::element::Type>{};
+    const auto& allTypes = allNGraphTypes();
+    const auto& allIntegralTypes = allNGraphIntegralNumberTypes();
+    std::set_difference(allTypes.cbegin(), allTypes.cend(), allIntegralTypes.cbegin(), allIntegralTypes.cend(),
+                        std::inserter(notIntegralTypes, notIntegralTypes.begin()));
+    return notIntegralTypes;
+}
+
+using OutShapeOfReshapeTestsNegativeDataType = OutShapeOfReshapeTests;
+TEST_P(OutShapeOfReshapeTestsNegativeDataType, ThrowsOnInvalidDataType) {
+    std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+            m_inDataShapeParam, m_outShapeDescriptorParam, true),
+                 ngraph::NodeValidationFailure);
+}
+INSTANTIATE_TEST_CASE_P(InvalidInDataShapeTensorType, OutShapeOfReshapeTestsNegativeDataType,
+        testing::Combine(
+            testing::Values(TensorShape{4}),
+            testing::ValuesIn(allNGraphNotIntegralTypes()),
+            testing::Values(TensorShape{3}),
+            testing::Values(ngraph::element::i64))
+);
+
+INSTANTIATE_TEST_CASE_P(InvalidOutShapeDescriptorTensorType, OutShapeOfReshapeTestsNegativeDataType,
+        testing::Combine(
+            testing::Values(TensorShape{4}),
+            testing::Values(ngraph::element::i64),
+            testing::Values(TensorShape{3}),
+            testing::ValuesIn(allNGraphNotIntegralTypes()))
+);
+
+std::vector<ngraph::PartialShape> invalidTensorShapes {
+        TensorShape{},
+        TensorShape{4, 8},
+        TensorShape{ngraph::Dimension::dynamic()},
+};
+
+using OutShapeOfReshapeTestsNegativeDataShape = OutShapeOfReshapeTests;
+TEST_P(OutShapeOfReshapeTestsNegativeDataShape, ThrowsOnInvalidDataShape) {
+    std::shared_ptr<ngraph::vpu::op::OutShapeOfReshape> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+            m_inDataShapeParam, m_outShapeDescriptorParam, true),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(InvalidInDataShapeTensorShape, OutShapeOfReshapeTestsNegativeDataShape,
+        testing::Combine(
+            testing::ValuesIn(invalidTensorShapes),
+            testing::Values(ngraph::element::i64),
+            testing::ValuesIn(tensorShapes),
+            testing::Values(ngraph::element::i64))
+);
+
+INSTANTIATE_TEST_CASE_P(InvalidOutShapeDescriptorTensorShape, OutShapeOfReshapeTestsNegativeDataShape,
+        testing::Combine(
+            testing::ValuesIn(tensorShapes),
+            testing::Values(ngraph::element::i64),
+            testing::ValuesIn(invalidTensorShapes),
+            testing::Values(ngraph::element::i64))
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_broadcast.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_broadcast.cpp

new file mode 100644 (file)

index 0000000..f1e7dd7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_broadcast.cpp
@@ -0,0 +1,202 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+
+#include <common_test_utils/test_common.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/function.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using TensorType  = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+using AxesMapping = std::vector<size_t>;
+
+struct BroadcastNumpyShapes {
+    TensorShape srcShape;
+    TensorShape targetShape;
+};
+
+struct BroadcastExplicitShapes {
+    TensorShape srcShape;
+    TensorShape targetShape;
+    AxesMapping axesMapping;
+};
+
+using BroadcastNumpyTestParams = std::tuple<TensorType, BroadcastNumpyShapes>;
+using BroadcastExplicitTestParams = std::tuple<TensorType, BroadcastExplicitShapes>;
+
+class StaticShapeBroadcastNumpyTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<BroadcastNumpyTestParams> {
+public:
+    void SetUp() override {
+        const auto& parameters  = GetParam();
+        const auto& tensorType  = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters).srcShape;
+        const auto& targetShape = std::get<1>(parameters).targetShape;
+
+        m_tensor = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+        m_tensorWithTargetShape = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);
+    }
+protected:
+    std::shared_ptr<ngraph::opset3::Parameter> m_tensor;
+    std::shared_ptr<ngraph::opset3::Parameter> m_tensorWithTargetShape;
+};
+
+class StaticShapeBroadcastExplicitTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<BroadcastExplicitTestParams> {
+public:
+    void SetUp() override {
+        const auto& parameters  = GetParam();
+        const auto& tensorType  = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters).srcShape;
+        const auto& targetShape = std::get<1>(parameters).targetShape;
+        const auto& axesMapping = std::get<1>(parameters).axesMapping;
+
+        m_tensor = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+        m_tensorWithTargetShape = std::make_shared<ngraph::opset3::Parameter>(tensorType, targetShape);
+        m_axesMapping = std::make_shared<ngraph::opset3::Constant>(
+                ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+    }
+protected:
+    std::shared_ptr<ngraph::opset3::Parameter> m_tensor;
+    std::shared_ptr<ngraph::opset3::Parameter> m_tensorWithTargetShape;
+    std::shared_ptr<ngraph::opset3::Constant> m_axesMapping;
+};
+
+std::vector<BroadcastNumpyShapes> testNumpyStaticShapes {
+        BroadcastNumpyShapes{TensorShape{1, 100}, TensorShape{4, 100}},
+        BroadcastNumpyShapes{TensorShape{1, 100}, TensorShape{2, 4, 100}},
+        BroadcastNumpyShapes{TensorShape{16, 1, 1}, TensorShape{2, 16, 50, 50}},
+};
+
+std::vector<BroadcastExplicitShapes> testExplicitStaticShapes {
+        BroadcastExplicitShapes{TensorShape{16}, TensorShape{1, 16, 50, 50}, AxesMapping{1}},
+        BroadcastExplicitShapes{TensorShape{50, 50}, TensorShape{1, 50, 50, 16}, AxesMapping{1, 2}},
+};
+
+std::vector<ngraph::element::Type> testNGraphNumericTypes {
+        ngraph::element::dynamic,
+        ngraph::element::bf16,
+        ngraph::element::f16,
+        ngraph::element::f32,
+        ngraph::element::f64,
+        ngraph::element::i8,
+        ngraph::element::i16,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u1,
+        ngraph::element::u8,
+        ngraph::element::u16,
+        ngraph::element::u32,
+        ngraph::element::u64,
+};
+
+//
+// Positive tests
+//
+
+TEST_P(StaticShapeBroadcastNumpyTests, CanValidateAndInferTypes) {
+    const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, shapeOf));
+    ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{op->output(0)},
+            ngraph::ParameterVector{m_tensor, m_tensorWithTargetShape}));
+    ASSERT_EQ(m_tensorWithTargetShape->get_shape(), op->output(0).get_shape());
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastNumpyTests, testing::Combine(
+        testing::ValuesIn(testNGraphNumericTypes),
+        testing::ValuesIn(testNumpyStaticShapes))
+);
+
+TEST_P(StaticShapeBroadcastExplicitTests, CanValidateAndInferTypes) {
+    const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, shapeOf, m_axesMapping));
+    ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{op->output(0)},
+            ngraph::ParameterVector{m_tensor, m_tensorWithTargetShape}));
+    ASSERT_EQ(m_tensorWithTargetShape->get_shape(), op->output(0).get_shape());
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastExplicitTests, testing::Combine(
+        testing::ValuesIn(testNGraphNumericTypes),
+        testing::ValuesIn(testExplicitStaticShapes))
+);
+
+//
+// Negative tests
+//
+
+using StaticShapeBroadcastNumpyTestsNegativeNumInputs = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastNumpyTestsNegativeNumInputs, ThrowsOnInvalidNumInputs) {
+    const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+    const auto axesMapping = std::make_shared<ngraph::opset3::Constant>(
+            ngraph::element::u64, ngraph::Shape{1}, 0);
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, shapeOf, axesMapping, ngraph::op::BroadcastType::NUMPY),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastNumpyTestsNegativeNumInputs, testing::Combine(
+        testing::Values(ngraph::element::f16),
+        testing::Values(testNumpyStaticShapes[0]))
+);
+
+using StaticShapeBroadcastExplicitTestsNegativeNumInputs = StaticShapeBroadcastExplicitTests;
+TEST_P(StaticShapeBroadcastExplicitTestsNegativeNumInputs, ThrowsOnInvalidNumInputs) {
+    const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, shapeOf, ngraph::op::BroadcastType::EXPLICIT),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastExplicitTestsNegativeNumInputs, testing::Combine(
+        testing::Values(ngraph::element::f16),
+        testing::Values(testExplicitStaticShapes[0]))
+);
+
+using StaticShapeBroadcastTestsNegativeMode = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastTestsNegativeMode, ThrowsOnInvalidMode) {
+    const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(m_tensorWithTargetShape);
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, shapeOf, ngraph::op::BroadcastType::BIDIRECTIONAL),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastTestsNegativeMode, testing::Combine(
+        testing::Values(ngraph::element::f16),
+        testing::Values(testNumpyStaticShapes[0]))
+);
+
+using StaticShapeBroadcastTestsNegativeEvaluate = StaticShapeBroadcastNumpyTests;
+TEST_P(StaticShapeBroadcastTestsNegativeEvaluate, ThrowsOnInvalidMode) {
+    const auto targetShape = std::make_shared<ngraph::opset3::Parameter>(
+            ngraph::element::u64, ngraph::Shape{4});
+    std::shared_ptr<ngraph::vpu::op::StaticShapeBroadcast> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+            m_tensor, targetShape), ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeBroadcastTestsNegativeEvaluate, testing::Combine(
+        testing::Values(ngraph::element::f16),
+        testing::Values(testNumpyStaticShapes[0]))
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp

index e1e856f..c6e9b28 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp
@@ -18,19 +18,27 @@ namespace {
  using TensorType  = ngraph::element::Type;
  using TensorShape = ngraph::PartialShape;
  
+typedef std::tuple<
+    TensorType, // input type
+    TensorShape, // input shape
+    TensorType // output type
+> staticShapeNonZeroTestParams;
+
  class StaticShapeNonZeroTests
          : public CommonTestUtils::TestsCommon,
-          public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+          public testing::WithParamInterface<staticShapeNonZeroTestParams> {
  public:
      void SetUp() override {
          const auto& parameters  = GetParam();
          const auto& tensorType  = std::get<0>(parameters);
          const auto& tensorShape = std::get<1>(parameters);
+        m_outputType            = std::get<2>(parameters);
  
          m_param = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
      }
  protected:
      std::shared_ptr<ngraph::opset3::Parameter> m_param;
+    ngraph::element::Type m_outputType;
  };
  
  std::vector<ngraph::PartialShape> testStaticShapes {
@@ -63,13 +71,19 @@ std::vector<ngraph::element::Type> testNGraphNumericTypes {
          ngraph::element::u64,
  };
  
+std::vector<ngraph::element::Type> outputTypes {
+        ngraph::element::i32,
+        ngraph::element::i64,
+};
+
+
  //
  // Positive tests
  //
  
  TEST_P(StaticShapeNonZeroTests, CanValidateAndInferTypes) {
      std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
-    ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param));
+    ASSERT_NO_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType));
      ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
              ngraph::OutputVector{op->output(0), op->output(1)},
              ngraph::ParameterVector{m_param}));
@@ -77,35 +91,51 @@ TEST_P(StaticShapeNonZeroTests, CanValidateAndInferTypes) {
  
  INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTests, testing::Combine(
          testing::ValuesIn(testNGraphNumericTypes),
-        testing::ValuesIn(testStaticShapes))
+        testing::ValuesIn(testStaticShapes),
+        testing::ValuesIn(outputTypes))
  );
  
  //
  // Negative tests
  //
  
-using StaticShapeNonZeroTestsNegativeDataType = StaticShapeNonZeroTests;
-TEST_P(StaticShapeNonZeroTestsNegativeDataType, ThrowsOnInvalidDataType) {
+using StaticShapeNonZeroTestsNegativeInputDataType = StaticShapeNonZeroTests;
+TEST_P(StaticShapeNonZeroTestsNegativeInputDataType, ThrowsOnInvalidInputType) {
      std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
-    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param),
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
                   ngraph::NodeValidationFailure);
  }
  
-INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataType, testing::Combine(
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeInputDataType, testing::Combine(
          testing::Values(ngraph::element::boolean),
-        testing::ValuesIn(testStaticShapes))
+        testing::ValuesIn(testStaticShapes),
+        testing::ValuesIn(outputTypes))
  );
  
  using StaticShapeNonZeroTestsNegativeDataShape = StaticShapeNonZeroTests;
  TEST_P(StaticShapeNonZeroTestsNegativeDataShape, ThrowsOnInvalidDataShape) {
      std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
-    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param),
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
                   ngraph::NodeValidationFailure);
  }
  
  INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataShape, testing::Combine(
          testing::ValuesIn(testNGraphNumericTypes),
-        testing::ValuesIn(testDynamicShapes))
+        testing::ValuesIn(testDynamicShapes),
+        testing::ValuesIn(outputTypes))
+);
+
+using StaticShapeNonZeroTestsNegativeOutputDataType = StaticShapeNonZeroTests;
+TEST_P(StaticShapeNonZeroTestsNegativeOutputDataType, ThrowsOnInvalidOutputType) {
+    std::shared_ptr<ngraph::vpu::op::StaticShapeNonZero> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(m_param, m_outputType),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeOutputDataType, testing::Combine(
+        testing::ValuesIn(testNGraphNumericTypes),
+        testing::ValuesIn(testStaticShapes),
+        testing::Values(ngraph::element::boolean))
  );
  
  }  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.cpp

index 668d71d..f85deef 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_binary_elementwise.cpp
@@ -62,7 +62,7 @@ protected:
          eltwise->set_output_type(0, eltwise->get_input_element_type(0), ngraph::PartialShape::dynamic(eltwise->get_output_partial_shape(0).rank()));
  
          const auto transformations = vpu::Transformations{{eltwiseType, vpu::dynamicToStaticShapeBinaryEltwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
@@ -200,7 +200,7 @@ protected:
          eltwise->set_output_type(0, eltwise->get_input_element_type(0), ngraph::PartialShape::dynamic(eltwise->get_output_partial_shape(0).rank()));
  
          const auto transformations = vpu::Transformations{{eltwiseType, vpu::dynamicToStaticShapeBinaryEltwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp

new file mode 100644 (file)

index 0000000..06e6913
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_broadcast.cpp
@@ -0,0 +1,134 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_broadcast.hpp"
+#include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
+#include "vpu/ngraph/operations/static_shape_broadcast.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+
+namespace {
+
+using TensorType  = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+using AxesMapping = std::vector<size_t>;
+
+struct BroadcastExplicitShapes {
+    TensorShape srcShape;
+    TensorShape targetShape;
+    AxesMapping axesMapping;
+};
+using BroadcastExplicitTestParams = std::tuple<TensorType, BroadcastExplicitShapes>;
+
+class DynamicToStaticShapeBroadcastTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<BroadcastExplicitTestParams> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& tensorType  = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters).srcShape;
+        const auto& targetShape = std::get<1>(parameters).targetShape;
+        const auto& axesMapping = std::get<1>(parameters).axesMapping;
+
+        ngraph::helpers::CompareFunctions(
+                *transform(tensorType, tensorShape, targetShape, axesMapping),
+                *reference(tensorType, tensorShape, targetShape, axesMapping));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const TensorType& tensorType,
+            const TensorShape& tensorShape,
+            const TensorShape& targetShape,
+            const AxesMapping& axesMapping) const {
+        const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+                tensorType, tensorShape);
+        const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(
+                tensorType, targetShape);
+
+        const auto shapeOfNode = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam);
+        shapeOfNode->set_is_foldable(false);
+
+        const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+                ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+
+        const auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(
+                tensorParam, shapeOfNode, axesMappingConstant);
+
+        auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{broadcast},
+                ngraph::ParameterVector{tensorParam, tensorWithTargetShapeParam},
+                "Actual");
+
+        // We need to set broadcast output shape to make its rank static.
+        // In opset3::Broadcast implementation with Explicit mode output shape gets
+        // static rank only in cases when the second input is Concat
+        std::vector<ngraph::Dimension> broadcastOutShape(
+                shapeOfNode->get_output_shape(0)[0], ngraph::Dimension::dynamic());
+        broadcast->set_output_type(0, tensorParam->get_output_element_type(0),
+                                   ngraph::PartialShape(broadcastOutShape));
+        function->get_result()->set_output_type(0, tensorParam->get_output_element_type(0),
+                                                targetShape);
+
+        const auto transformations = vpu::Transformations{{
+            ngraph::opset3::Broadcast::type_info, vpu::dynamicToStaticShapeBroadcast}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const TensorType& tensorType,
+            const TensorShape& tensorShape,
+            const TensorShape& targetShape,
+            const AxesMapping& axesMapping) const {
+        const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+                tensorType, tensorShape);
+        const auto tensorWithTargetShapeParam = std::make_shared<ngraph::opset3::Parameter>(
+                tensorType, targetShape);
+        const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(tensorWithTargetShapeParam);
+
+        const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+                ngraph::element::u64, ngraph::Shape{axesMapping.size()}, axesMapping);
+
+        const auto staticShapeBroadcast = std::make_shared<ngraph::vpu::op::StaticShapeBroadcast>(
+                tensorParam, shapeOf, axesMappingConstant);
+
+        const auto dsrOut = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                staticShapeBroadcast, shapeOf);
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{dsrOut},
+                ngraph::ParameterVector{tensorParam, tensorWithTargetShapeParam},
+                "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeBroadcastTests, compareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeBroadcastTests, testing::Combine(
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8),
+        testing::Values(
+                BroadcastExplicitShapes{TensorShape{16}, TensorShape{1, 16, 50, 50}, AxesMapping{1}},
+                BroadcastExplicitShapes{TensorShape{50, 50}, TensorShape{1, 50, 50, 16}, AxesMapping{1, 2}})
+
+));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_clamp.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_clamp.cpp

index 576ee66..08c34f3 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_clamp.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_clamp.cpp
@@ -48,7 +48,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{ngraph::opset3::Clamp::type_info, vpu::dynamicToStaticUnaryElementwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_concat.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_concat.cpp

new file mode 100644 (file)

index 0000000..f2bd533
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_concat.cpp
@@ -0,0 +1,141 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_concat.hpp>
+#include <vpu/utils/error.hpp>
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+
+#include <numeric>
+#include <queue>
+#include <random>
+
+namespace {
+
+using DataType = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using DataShapes = std::vector<DataShape>;
+
+struct ConcatParam {
+    DataShapes dataShapes;
+    int axis;
+};
+using ConcatTestParam = std::tuple<DataType, ConcatParam>;
+
+class DynamicToStaticShapeConcatTests
+        : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<ConcatTestParam> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& dataType = std::get<0>(parameters);
+        const auto& concatParam = std::get<1>(parameters);
+        const auto& dataShapes = concatParam.dataShapes;
+        const auto& axis = concatParam.axis;
+
+        ngraph::helpers::CompareFunctions(
+                *transform(dataType, dataShapes, axis),
+                *reference(dataType, dataShapes, axis));
+    }
+
+protected:
+    std::shared_ptr<ngraph::Node> createDSRWithParams(
+            const DataShape& dataShape,
+            const ngraph::element::Type& dataType,
+            ngraph::ParameterVector& params) const {
+        const auto param = std::make_shared<ngraph::opset3::Parameter>(
+                dataType, dataShape);
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(
+                ngraph::element::i64, ngraph::Shape{dataShape.size()});
+        params.push_back(param);
+        params.push_back(shape);
+        return std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(param, shape);
+    }
+
+    std::shared_ptr<const ngraph::Function> transform(
+            const ngraph::element::Type& dataType,
+            const DataShapes& dataShapes,
+            const int axis) const {
+        ngraph::NodeVector dsrVector;
+        ngraph::ParameterVector params;
+        for (const auto& dataShape : dataShapes) {
+            dsrVector.push_back(createDSRWithParams(dataShape, dataType, params));
+        }
+
+        const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+        const auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{concat}, params, "Actual");
+        concat->set_output_type(0, dsrVector[0]->get_input_element_type(0),
+                                ngraph::PartialShape::dynamic(concat->get_output_partial_shape(0).rank()));
+
+        const auto transformations = vpu::Transformations{
+            {ngraph::opset3::Concat::type_info, vpu::dynamicToStaticShapeConcat}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type& dataType,
+            const DataShapes& dataShapes,
+            const int axis) const {
+        ngraph::NodeVector dsrVector;
+        ngraph::ParameterVector params;
+
+        dsrVector.push_back(createDSRWithParams(dataShapes.front(), dataType, params));
+
+        auto accumulatedShape = params.back()->output(0);
+        for (size_t inputIdx = 1; inputIdx < dataShapes.size(); ++inputIdx) {
+            dsrVector.push_back(createDSRWithParams(
+                    dataShapes.at(inputIdx), dataType, params));
+            const auto shapeAccumulatorOp = std::make_shared<ngraph::opset3::Add>(
+                    accumulatedShape, params.back());
+            accumulatedShape = shapeAccumulatorOp->output(0);
+        }
+
+        const size_t rank = dataShapes.front().size();
+        std::vector<int64_t> dividerValues(rank, dataShapes.size());
+        dividerValues[axis < 0 ? axis + rank : axis] = 1;
+        const auto divider = std::make_shared<ngraph::opset3::Constant>(
+                ngraph::element::i64, ngraph::Shape{rank}, dividerValues);
+        const auto divide = std::make_shared<ngraph::opset3::Divide>(accumulatedShape, divider);
+
+        const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+        const auto outDsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(concat, divide);
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{outDsr}, params, "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeConcatTests, CompareFunctions) {
+}
+
+std::vector<ngraph::element::Type> dataTypes = {
+        ngraph::element::f16,
+        ngraph::element::f32,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u8,
+};
+
+std::vector<ConcatParam> concatParams = {
+        {DataShapes{DataShape{128}, DataShape{256}, DataShape{512}, DataShape{1024}}, 0},
+        {DataShapes{DataShape{1, 1000}, DataShape{2, 1000}, DataShape{4, 1000}, DataShape{8, 1000}}, 0},
+        {DataShapes{DataShape{128, 100}, DataShape{128, 200}, DataShape{128, 400}, DataShape{128, 800}}, 1},
+        {DataShapes{DataShape{3, 64, 128}, DataShape{4, 64, 128}, DataShape{5, 64, 128}}, 0},
+        {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, 2},
+        {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, -1},
+};
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeConcatTests, testing::Combine(
+        testing::ValuesIn(dataTypes),
+        testing::ValuesIn(concatParams)));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_convert.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_convert.cpp

index c222af0..f0863d3 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_convert.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_convert.cpp
@@ -48,7 +48,7 @@ protected:
          convert->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{ngraph::opset3::Convert::type_info, vpu::dynamicToStaticUnaryElementwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_gather.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_gather.cpp

new file mode 100644 (file)

index 0000000..cbefc4b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_gather.cpp
@@ -0,0 +1,320 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph/op/parameter.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <numeric>
+#include <random>
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_gather.hpp>
+#include <queue>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/utils/error.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+struct GatherTestCase {
+    ngraph::Shape data_shape, index_shape;
+    int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+    testing::Values(
+            ngraph::element::f16,
+            ngraph::element::f32,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            GatherTestCase{{6}, {15, 4, 20, 28}, 0, 0, 0},
+            GatherTestCase{{6, 12, 10, 24}, {6}, 0, 0, 1},
+            GatherTestCase{{6, 12}, {15, 4, 20, 28}, 1, 1, 2},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, 3, 3, 4},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -1, 3, 4},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -4, 0, 1}));
+
+
+class DynamicToStaticShapeGatherDataDSR : public CommonTestUtils::TestsCommon,
+        public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+                *reference(data_type, idx_type, gather_setup));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+        const ngraph::element::Type_t& data_type,
+        const ngraph::element::Type_t& idx_type,
+        const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+        auto outputShape = node->get_output_partial_shape(0);
+        const auto function = std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{node},
+            ngraph::ParameterVector{data, dims, indices},
+            "Actual");
+        node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+                gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+        const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type_t& data_type,
+            const ngraph::element::Type_t& idx_type,
+            const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+        const auto indices_shape = ngraph::opset3::Constant::create(dims->get_element_type(), {gather_setup.index_shape.size()}, gather_setup.index_shape);
+        ngraph::OutputVector output_dims;
+        if (gather_setup.first_split_point) {
+            std::vector<int64_t> idxs(gather_setup.first_split_point);
+            std::iota(idxs.begin(), idxs.end(), 0);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        if (!gather_setup.index_shape.empty())
+            output_dims.push_back(indices_shape);
+        if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+            std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+            std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+        const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+        return std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{dsr1},
+            ngraph::ParameterVector{data, dims, indices},
+            "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeGatherDataDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGatherDataDSR, combinations);
+
+class DynamicToStaticShapeGatherIdxDSR : public CommonTestUtils::TestsCommon,
+        public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+                *reference(data_type, idx_type, gather_setup));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+        const ngraph::element::Type_t& data_type,
+        const ngraph::element::Type_t& idx_type,
+        const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+        auto outputShape = node->get_output_partial_shape(0);
+        const auto function = std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{node},
+            ngraph::ParameterVector{data, dims, indices},
+            "Actual");
+        node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+                gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+        const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type_t& data_type,
+            const ngraph::element::Type_t& idx_type,
+            const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+        const auto data_shape = ngraph::opset3::Constant::create(dims->get_element_type(), {gather_setup.data_shape.size()}, gather_setup.data_shape);
+
+        ngraph::OutputVector output_dims;
+        if (gather_setup.first_split_point) {
+            std::vector<int64_t> idxs(gather_setup.first_split_point);
+            std::iota(idxs.begin(), idxs.end(), 0);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            data_shape,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        if (!gather_setup.index_shape.empty())
+            output_dims.push_back(dims);
+        if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+            std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+            std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            data_shape,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+        const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+        return std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{dsr1},
+            ngraph::ParameterVector{data, dims, indices},
+            "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeGatherIdxDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGatherIdxDSR, combinations);
+
+class DynamicToStaticShapeGather : public CommonTestUtils::TestsCommon,
+        public testing::WithParamInterface<std::tuple<DataType, DataType, GatherTestCase>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, gather_setup),
+                *reference(data_type, idx_type, gather_setup));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+        const ngraph::element::Type_t& data_type,
+        const ngraph::element::Type_t& idx_type,
+        const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+        const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+        const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+        auto outputShape = node->get_output_partial_shape(0);
+        const auto function = std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{node},
+            ngraph::ParameterVector{data, data_dims, indices, indices_dims},
+            "Actual");
+        node->set_output_type(0, node->get_input_element_type(0), ngraph::PartialShape::dynamic(
+                gather_setup.data_shape.size() + gather_setup.index_shape.size() - 1));
+
+        const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeGather}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type_t& data_type,
+            const ngraph::element::Type_t& idx_type,
+            const GatherTestCase& gather_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+        const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+        const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+        ngraph::OutputVector output_dims;
+        if (gather_setup.first_split_point) {
+            std::vector<int64_t> idxs(gather_setup.first_split_point);
+            std::iota(idxs.begin(), idxs.end(), 0);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            data_dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        if (!gather_setup.index_shape.empty())
+            output_dims.push_back(indices_dims);
+        if (gather_setup.first_split_point + 1 < gather_setup.data_shape.size()) {
+            std::vector<int64_t> idxs(gather_setup.data_shape.size() - gather_setup.second_split_point);
+            std::iota(idxs.begin(), idxs.end(), gather_setup.second_split_point);
+            output_dims.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            data_dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+        const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node, output_shape);
+        return std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{dsr1},
+            ngraph::ParameterVector{data, data_dims, indices, indices_dims},
+            "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeGather, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeGather, combinations);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.cpp

index 06e7440..fbe2a96 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_non_max_suppression.cpp
@@ -65,7 +65,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticNonMaxSuppression}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp

index 07486aa..cddf3bd 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
@@ -24,31 +24,39 @@ namespace {
  using TensorType  = ngraph::element::Type_t;
  using TensorShape = ngraph::Shape;
  
-class DynamicToStaticShapeNonZeroTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+typedef std::tuple<
+        TensorType, // input type
+        TensorShape, // input shape
+        TensorType // output type
+> dynamicToStaticShapeNonZeroTestParams;
+
+class DynamicToStaticShapeNonZeroTests : public CommonTestUtils::TestsCommon,
+                                         public testing::WithParamInterface<dynamicToStaticShapeNonZeroTestParams> {
  public:
      void prepareFunctions() {
          const auto& parameters = GetParam();
-        const auto& tensorType = std::get<0>(parameters);
-        const auto& tensorShape = std::get<1>(parameters);
+        const auto& inputType = std::get<0>(parameters);
+        const auto& inputShape = std::get<1>(parameters);
+        const auto& resultType = std::get<2>(parameters);
  
          // Create a function with only op::NonZero
          // And then run conversion pass
          {
-            const auto input = std::make_shared<ngraph::opset3::Parameter>(tensorType, tensorShape);
+            const auto input = std::make_shared<ngraph::opset3::Parameter>(inputType, inputShape);
  
-            const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(input);
+            const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(input, resultType);
              nonZero->set_friendly_name(s_FriendlyName);
  
              actual = std::make_shared<ngraph::Function>(ngraph::NodeVector{nonZero}, ngraph::ParameterVector{input});
              const auto transformation = vpu::Transformations{{ngraph::opset3::NonZero::type_info, vpu::dynamicToStaticShapeNonZero}};
-            vpu::DynamicToStaticShape(transformation).transform(*actual);
+            vpu::DynamicToStaticShape(transformation).transform(actual);
          }
  
          // Create a reference function
          {
-            const auto input = std::make_shared<ngraph::opset1::Parameter>(tensorType, tensorShape);
+            const auto input = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
  
-            const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input);
+            const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input, resultType);
              staticShapeNonZero->set_friendly_name(std::string(s_FriendlyName) + "/static_shape");
              const auto dynamicShapeResolver = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
                      staticShapeNonZero->output(0), staticShapeNonZero->output(1));
@@ -98,7 +106,10 @@ INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeNonZeroTests, testing::Combi
          TensorShape{1000},
          TensorShape{4, 1000},
          TensorShape{3, 128, 256},
-        TensorShape{2, 3, 128, 256})
+        TensorShape{2, 3, 128, 256}),
+    testing::Values(
+        ngraph::element::i32,
+        ngraph::element::i64)
  ));
  
  }  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_reshape.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_reshape.cpp

new file mode 100644 (file)

index 0000000..ee14731
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_reshape.cpp
@@ -0,0 +1,113 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_reshape.hpp"
+#include "vpu/ngraph/operations/out_shape_of_reshape.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+
+namespace {
+
+using DataType  = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using TestParams = std::tuple<DataShape, DataType>;
+
+class DynamicToStaticShapeReshapeTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<TestParams> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& inDataShape  = std::get<0>(parameters);
+        const auto& inDataType = std::get<1>(parameters);
+
+        ngraph::helpers::CompareFunctions(
+                *transform(inDataType, inDataShape),
+                *reference(inDataType, inDataShape));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const ngraph::element::Type& inDataType,
+            const ngraph::Shape& inDataShape) const {
+        const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+                inDataType, inDataShape);
+        const auto inDataDimsParam = std::make_shared<ngraph::op::Parameter>(
+                ngraph::element::i64, ngraph::Shape{inDataShape.size()});
+        const auto outShapeDescriptorParam = std::make_shared<ngraph::op::Constant>(
+                ngraph::element::i64, ngraph::Shape{inDataShape.size()}, inDataShape);
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                inDataParam, inDataDimsParam);
+        const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+                dsr, outShapeDescriptorParam, true);
+
+        auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{reshape},
+                ngraph::ParameterVector{inDataParam, inDataDimsParam},
+                "Actual");
+        reshape->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(
+                outShapeDescriptorParam->get_output_partial_shape(0).rank()));
+
+        const auto transformations = vpu::Transformations{{
+            ngraph::op::v1::Reshape::type_info, vpu::dynamicToStaticShapeReshape}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type& inDataType,
+            const ngraph::Shape& inDataShape) const {
+        const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+                inDataType, inDataShape);
+        const auto inDataDimsParam = std::make_shared<ngraph::op::Parameter>(
+                ngraph::element::i64, ngraph::Shape{inDataShape.size()});
+        const auto outShapeDescriptorParam = std::make_shared<ngraph::op::Constant>(
+                ngraph::element::i64, ngraph::Shape{inDataShape.size()}, inDataShape);
+
+        const auto dsr0 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                inDataParam, inDataDimsParam);
+        const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+                dsr0, outShapeDescriptorParam, true);
+
+        const auto outShapeOfReshape = std::make_shared<ngraph::vpu::op::OutShapeOfReshape>(
+                inDataDimsParam, outShapeDescriptorParam, true);
+        const auto dsr1 = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                reshape, outShapeOfReshape);
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{dsr1},
+                ngraph::ParameterVector{inDataParam, inDataDimsParam},
+                "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeReshapeTests, compareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeReshapeTests, testing::Combine(
+        testing::Values(
+                DataShape{4, 1000},
+                DataShape{3, 128, 256},
+                DataShape{2, 3, 128, 256}),
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8)
+));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_roialign.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_roialign.cpp

index af4cec8..cdbc929 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_roialign.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_roialign.cpp
@@ -62,7 +62,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
@@ -150,7 +150,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
@@ -241,7 +241,7 @@ protected:
          node->set_output_type(0, data_dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeROIAlign}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_scatter.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_scatter.cpp

index 81c1297..b05f17c 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_scatter.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_scatter.cpp
@@ -61,7 +61,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{scatter_setup.scatter_type_info, vpu::dynamicToStaticUnaryElementwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp

new file mode 100644 (file)

index 0000000..4c28ef7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_shapeof.cpp
@@ -0,0 +1,213 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_shapeof.hpp"
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+
+namespace {
+
+using TensorType  = ngraph::element::Type_t;
+using TensorShape = ngraph::Shape;
+
+class DynamicToStaticShapeShapeOfRemoveDSR : public CommonTestUtils::TestsCommon,
+                                             public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type  = std::get<0>(parameters);
+        const auto& data_shape = std::get<1>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+                                          *reference(data_type, data_shape));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+        const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+
+        const auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{shapeOf},
+                ngraph::ParameterVector{data, shape},
+                "Actual");
+
+        vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{shape},
+                ngraph::ParameterVector{shape},
+                "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfRemoveDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfRemoveDSR, testing::Combine(
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8),
+        testing::Values(
+                TensorShape{1000},
+                TensorShape{4, 1000},
+                TensorShape{3, 128, 256},
+                TensorShape{2, 3, 128, 256})
+));
+
+class DynamicToStaticShapeShapeOfWithOutRemoveDSR : public CommonTestUtils::TestsCommon,
+                                             public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type  = std::get<0>(parameters);
+        const auto& data_shape = std::get<1>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+                                          *reference(data_type, data_shape));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+        const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+        const auto shapeOfOutputRelu = std::make_shared<ngraph::opset3::Relu>(shapeOf->output(0));
+
+        const auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{shapeOfOutputRelu},
+                ngraph::ParameterVector{data, shape},
+                "Actual");
+
+        vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        const auto shapeRelu = std::make_shared<ngraph::opset3::Relu>(shape);
+
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{shapeRelu},
+                ngraph::ParameterVector{shape},
+                "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfWithOutRemoveDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfWithOutRemoveDSR, testing::Combine(
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8),
+        testing::Values(
+                TensorShape{1000},
+                TensorShape{4, 1000},
+                TensorShape{3, 128, 256},
+                TensorShape{2, 3, 128, 256})
+));
+
+class DynamicToStaticShapeShapeOfKeepDSR : public CommonTestUtils::TestsCommon,
+                                         public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type  = std::get<0>(parameters);
+        const auto& data_shape = std::get<1>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, data_shape),
+                                          *reference(data_type, data_shape));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+        const auto shapeOf = std::make_shared<ngraph::opset3::ShapeOf>(dsr->output(0));
+        const auto dsrOutputRelu = std::make_shared<ngraph::opset3::Relu>(dsr->output(0));
+        const auto shapeOfOutputRelu = std::make_shared<ngraph::opset3::Relu>(shapeOf->output(0));
+
+        const auto function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{dsrOutputRelu},
+                ngraph::ParameterVector{data, shape},
+                "Actual");
+
+        vpu::DynamicToStaticShapeShapeOf().run_on_function(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const TensorType& dataType,
+            const TensorShape& dataShape) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataShape);
+        const auto shape = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataShape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, shape);
+        const auto shapeRelu = std::make_shared<ngraph::opset3::Relu>(shape);
+        const auto dsrOutputRelu = std::make_shared<ngraph::opset3::Relu>(dsr->output(0));
+
+        return std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{dsrOutputRelu},
+                ngraph::ParameterVector{data, shape},
+                "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeShapeOfKeepDSR, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeShapeOfKeepDSR, testing::Combine(
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8),
+        testing::Values(
+                TensorShape{1000},
+                TensorShape{4, 1000},
+                TensorShape{3, 128, 256},
+                TensorShape{2, 3, 128, 256})
+));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_squeeze.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_squeeze.cpp

index ecf3d19..c5169cf 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_squeeze.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_squeeze.cpp
@@ -65,7 +65,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(node->get_output_partial_shape(0).rank()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeSqueeze}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_transpose.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_transpose.cpp

index aa501d4..e5c9efb 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_transpose.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_transpose.cpp
@@ -73,7 +73,7 @@ protected:
          transpose->set_output_type(0, dsr->get_input_element_type(0), makeDynamicShape(transposition->get_output_partial_shape(0)));
  
          const auto transformations = vpu::Transformations{{ngraph::opset3::Transpose::type_info, vpu::dynamicToStaticShapeTranspose}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.cpp

index 6263fc7..295b16d 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unary_elementwise.cpp
@@ -50,7 +50,7 @@ protected:
          node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(outputShape.rank()));
  
          const auto transformations = vpu::Transformations{{type_info, vpu::dynamicToStaticUnaryElementwise}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unsqueeze.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unsqueeze.cpp

index 530c387..b43e541 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unsqueeze.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_unsqueeze.cpp
@@ -67,7 +67,7 @@ protected:
                  ngraph::PartialShape::dynamic(node->get_output_partial_shape(0).rank() + unsqueeze_axes.size()));
  
          const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeUnsqueeze}};
-        vpu::DynamicToStaticShape(transformations).transform(*function);
+        vpu::DynamicToStaticShape(transformations).transform(function);
          return function;
      }
  
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp

new file mode 100644 (file)

index 0000000..9a6ed08
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_variadic_split.cpp
@@ -0,0 +1,151 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_common.hpp>
+#include <ngraph/shape.hpp>
+#include <ngraph/type/element_type.hpp>
+#include <ngraph/op/parameter.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <numeric>
+#include <random>
+#include <ngraph/opsets/opset3.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape_variadic_split.hpp>
+#include <queue>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <vpu/utils/error.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+struct VariadicSplitTestCase {
+    ngraph::Shape data_shape;
+    std::vector<int64_t> split_lengths;
+    int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+    testing::Values(
+            ngraph::element::f16,
+            ngraph::element::f32,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            ngraph::element::i32,
+            ngraph::element::i64),
+    testing::Values(
+            VariadicSplitTestCase{{6}, {2, 1, 2, 1}, 0, 0, 0},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {1, 1, 3, 1}, 0, 0, 1},
+            VariadicSplitTestCase{{6, 12}, {7, 2, 1, 2}, 1, 1, 2},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {10, 14}, 3, 3, 4},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {14, 10}, -1, 3, 4},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {6}, -4, 0, 1}));
+
+
+class DynamicToStaticShapeVeriadicSplit : public CommonTestUtils::TestsCommon,
+        public testing::WithParamInterface<std::tuple<DataType, DataType, VariadicSplitTestCase>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& variadic_split_setup = std::get<2>(parameters);
+
+        ngraph::helpers::CompareFunctions(*transform(data_type, idx_type, variadic_split_setup),
+                *reference(data_type, idx_type, variadic_split_setup));
+    }
+
+protected:
+    std::shared_ptr<const ngraph::Function> transform(
+            const ngraph::element::Type_t& data_type,
+            const ngraph::element::Type_t& idx_type,
+            const VariadicSplitTestCase& variadic_split_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+        const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+        const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+                {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+        const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(node->outputs(), variadic_split_setup.axis);
+
+        auto outputShape = node->get_output_partial_shape(0);
+        const auto function = std::make_shared<ngraph::Function>(
+            ngraph::NodeVector{tests_wa},
+            ngraph::ParameterVector{data, dims},
+            "Actual");
+        node->set_output_type(0, dsr->get_input_element_type(0), ngraph::PartialShape::dynamic(variadic_split_setup.data_shape.size()));
+
+        const auto transformations = vpu::Transformations{{node->type_info, vpu::dynamicToStaticShapeVariadicSplit}};
+        vpu::DynamicToStaticShape(transformations).transform(function);
+        return function;
+    }
+
+    std::shared_ptr<const ngraph::Function> reference(
+            const ngraph::element::Type_t& data_type,
+            const ngraph::element::Type_t& idx_type,
+            const VariadicSplitTestCase& variadic_split_setup) const {
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+        const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+        const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+                {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+        ngraph::OutputVector first_shape_part, second_shape_part;
+        if (variadic_split_setup.first_split_point) {
+            std::vector<int64_t> idxs(variadic_split_setup.first_split_point);
+            std::iota(idxs.begin(), idxs.end(), 0);
+            first_shape_part.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        if (variadic_split_setup.first_split_point + 1 < variadic_split_setup.data_shape.size()) {
+            std::vector<int64_t> idxs(variadic_split_setup.data_shape.size() - variadic_split_setup.second_split_point);
+            std::iota(idxs.begin(), idxs.end(), variadic_split_setup.second_split_point);
+            second_shape_part.push_back(
+                    std::make_shared<ngraph::opset3::Gather>(
+                            dims,
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {idxs.size()}, idxs),
+                            ngraph::opset3::Constant::create(ngraph::element::i64, {1}, {0})));
+        }
+        ngraph::NodeVector results;
+        for (auto i = 0; i < variadic_split_setup.split_lengths.size(); ++i) {
+            const auto dim = ngraph::opset3::Constant::create(dims->get_element_type(), {1}, {variadic_split_setup.split_lengths[i]});
+            if (!first_shape_part.empty() || !second_shape_part.empty()) {
+                ngraph::OutputVector output_dims{dim};
+                output_dims.insert(output_dims.begin(), first_shape_part.begin(), first_shape_part.end());
+                output_dims.insert(output_dims.end(), second_shape_part.begin(), second_shape_part.end());
+                const auto output_shape = std::make_shared<ngraph::opset3::Concat>(output_dims, 0);
+                results.push_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node->output(i), output_shape));
+            } else {
+                results.push_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(node->output(i), dim));
+            }
+        }
+        const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(results, variadic_split_setup.axis);
+
+        return std::make_shared<ngraph::Function>(
+            tests_wa,
+            ngraph::ParameterVector{data, dims},
+            "Expected");
+    }
+};
+
+TEST_P(DynamicToStaticShapeVeriadicSplit, CompareFunctions) {
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeVeriadicSplit, combinations);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_threading_tests.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_threading_tests.cpp

new file mode 100644 (file)

index 0000000..fd694f5
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/core_threading_tests.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <behavior/core_threading_tests.hpp>
+
+namespace {
+
+Params params[] = {
+    std::tuple<Device, Config> { "MYRIAD", { { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) } } },
+    std::tuple<Device, Config> { "HETERO", { { "TARGET_FALLBACK", "MYRIAD" } } },
+    std::tuple<Device, Config> { "MULTI", { { MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , "MYRIAD" } } }
+};
+
+}  // namespace
+
+INSTANTIATE_TEST_CASE_P(MYRIAD, CoreThreadingTests, testing::ValuesIn(params));
+
+INSTANTIATE_TEST_CASE_P(DISABLED_MYRIAD, CoreThreadingTestsWithIterations,
+    testing::Combine(testing::ValuesIn(params),
+                     testing::Values(2),
+                     testing::Values(2)));
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp

index 1236363..a47f9c1 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
@@ -10,8 +10,7 @@
  using namespace LayerTestsDefinitions;
  
  namespace {
-// TODO: All concat on axis 0 always fails by accuracy
-std::vector<size_t > axes = {1, 2, 3};
+std::vector<size_t> axes = {0, 1, 2, 3};
  std::vector<std::vector<std::vector<size_t>>> inShapes = {
          {{10, 10, 10, 10}, {10, 10, 10, 10}},
          {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}},
@@ -21,21 +20,11 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
  std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP16};
  
  
-INSTANTIATE_TEST_CASE_P(Axis_1_and_3, ConcatLayerTest,
+INSTANTIATE_TEST_CASE_P(Concat_Basic, ConcatLayerTest,
                          ::testing::Combine(
-                                ::testing::Values(1, 3),
+                                ::testing::ValuesIn(axes),
                                  ::testing::ValuesIn(inShapes),
                                  ::testing::ValuesIn(netPrecisions),
                                  ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
                          ConcatLayerTest::getTestCaseName);
-
-
-// TODO: concat on axis 2 fails by accuracy with input precision different from FP16
-INSTANTIATE_TEST_CASE_P(Axis_2, ConcatLayerTest,
-                        ::testing::Combine(
-                                ::testing::Values(2),
-                                ::testing::ValuesIn(inShapes),
-                                ::testing::ValuesIn(netPrecisions),
-                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
-                        ConcatLayerTest::getTestCaseName);
-}  // namespace
-\ No newline at end of file
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/proposal.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/proposal.cpp

new file mode 100644 (file)

index 0000000..afe76b6
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/proposal.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/proposal.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ngraph::helpers;
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+/* ============= Proposal ============= */
+const std::vector<base_size_type> base_size_ = {16};
+const std::vector<pre_nms_topn_type> pre_nms_topn_ = {100};
+const std::vector<post_nms_topn_type> post_nms_topn_ = {100};
+const std::vector<nms_thresh_type> nms_thresh_ = {0.7f};
+const std::vector<min_size_type> min_size_ = {1};
+const std::vector<ratio_type> ratio_ = {{1.0f, 2.0f}};
+const std::vector<scale_type> scale_ = {{1.2f, 1.5f}};
+const std::vector<clip_before_nms_type> clip_before_nms_ = {false};
+const std::vector<clip_after_nms_type> clip_after_nms_ = {false};
+
+// empty string corresponds to Caffe framework
+// Myriad plugin does not take this parameter; uses "" by default
+const std::vector<framework_type> framework_ = {""};
+
+const auto proposalParams = ::testing::Combine(
+        ::testing::ValuesIn(base_size_),
+        ::testing::ValuesIn(pre_nms_topn_),
+        ::testing::ValuesIn(post_nms_topn_),
+        ::testing::ValuesIn(nms_thresh_),
+        ::testing::ValuesIn(min_size_),
+        ::testing::ValuesIn(ratio_),
+        ::testing::ValuesIn(scale_),
+        ::testing::ValuesIn(clip_before_nms_),
+        ::testing::ValuesIn(clip_after_nms_),
+        ::testing::ValuesIn(framework_)
+);
+
+INSTANTIATE_TEST_CASE_P(Proposal_tests, ProposalLayerTest,
+                        ::testing::Combine(
+                                proposalParams,
+                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
+                        ProposalLayerTest::getTestCaseName
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp

index 9042d5a..85c824f 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
@@ -9,6 +9,9 @@
  
  std::vector<std::string> disabledTestPatterns() {
      return {
+            // Issue 26268
+            ".*ConcatLayerTest.*axis=0.*",
+            // Not supported activation types
              ".*ActivationLayerTest\\.CompareWithRefs/Tanh.*netPRC=FP32.*",
              ".*ActivationLayerTest\\.CompareWithRefs/Exp.*netPRC=FP32.*",
              ".*ActivationLayerTest\\.CompareWithRefs/Log.*netPRC=FP32.*",
diff --git a/inference-engine/tests/functional/plugin/myriad/single_layer_tests/static_shape_nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/single_layer_tests/static_shape_nonzero.cpp

new file mode 100644 (file)

index 0000000..faf379b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/single_layer_tests/static_shape_nonzero.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+
+#include "vpu/private_plugin_config.hpp"
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <functional_test_utils/blob_utils.hpp>
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+#include <precision_utils.h>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <tuple>
+#include <vector>
+#include <string>
+#include <memory>
+#include <random>
+
+typedef std::tuple<
+        InferenceEngine::SizeVector,    // Input shape
+        InferenceEngine::Precision,     // Input precision
+        LayerTestsUtils::TargetDevice   // Device name
+> staticShapeNonZeroLayerTestParams;
+
+namespace LayerTestsDefinitions {
+
+class StaticShapeNonZeroLayerTest : public testing::WithParamInterface<staticShapeNonZeroLayerTestParams>,
+                                    public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<staticShapeNonZeroLayerTestParams> obj) {
+        InferenceEngine::SizeVector inputShape;
+        InferenceEngine::Precision inputPrecision;
+        std::string targetDevice;
+        std::tie(inputShape, inputPrecision, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+        result << "inPrc=" << inputPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        SetRefMode(LayerTestsUtils::RefMode::INTERPRETER);
+        configuration[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
+        configuration[VPU_CONFIG_KEY(DISABLE_REORDER)] = CONFIG_VALUE(YES);
+
+        InferenceEngine::SizeVector inputShape;
+        std::tie(inputShape, inPrc, targetDevice) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
+
+        const auto input = std::make_shared<ngraph::opset3::Parameter>(ngPrc, ngraph::Shape(inputShape));
+        const auto staticShapeNonZero = std::make_shared<ngraph::vpu::op::StaticShapeNonZero>(input, ngraph::element::i32);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(staticShapeNonZero->output(0)),
+                std::make_shared<ngraph::opset3::Result>(staticShapeNonZero->output(1))};
+        function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{input});
+        outPrc = InferenceEngine::Precision::I32;
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
+        return FuncTestUtils::createAndFillBlobFloat(info.getTensorDesc(), 4, -2, 2);
+    }
+
+    void Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutput, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override {
+        const auto expectedIndicesPtr = reinterpret_cast<const int32_t*>(expectedOutput[0].data());
+        const auto expectedDimsPtr = reinterpret_cast<const int32_t*>(expectedOutput[1].data());
+
+        const auto actualIndices = actualOutputs[0];
+        const auto actualDims = actualOutputs[1];
+
+        const auto actualIndicesPtr = InferenceEngine::as<InferenceEngine::MemoryBlob>(actualIndices)->rmap().as<const int32_t*>();
+        const auto actualDimsPtr = InferenceEngine::as<InferenceEngine::MemoryBlob>(actualDims)->rmap().as<const int32_t*>();
+
+        ASSERT_EQ(expectedDimsPtr[0], actualDimsPtr[0]);
+        ASSERT_EQ(expectedDimsPtr[1], actualDimsPtr[1]);
+
+        const auto totalDimsSize = actualIndices->getTensorDesc().getDims()[1];
+
+        for (int axis = 0; axis < actualDimsPtr[1]; ++axis) {
+            for (int i = 0; i < actualDimsPtr[0]; ++i) {
+                const auto idx = i + axis * totalDimsSize;
+                ASSERT_EQ(expectedIndicesPtr[idx], actualIndicesPtr[idx]);
+            }
+        }
+    }
+};
+
+TEST_P(StaticShapeNonZeroLayerTest, accuracy) {
+    Run();
+}
+
+std::vector<InferenceEngine::SizeVector> inputDims = {
+        { 7 },
+        { 1000 },
+        { 3, 5 },
+        { 65, 33 },
+        { 33, 65 },
+        { 1, 1000 },
+        { 223, 217, 21 },
+        { 3, 4, 5, 1 },
+        { 3, 4, 1, 5, 1 }
+};
+
+std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::U8,
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::I32,
+};
+
+INSTANTIATE_TEST_CASE_P(accuracy, StaticShapeNonZeroLayerTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputDims),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_concat.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_concat.cpp

new file mode 100644 (file)

index 0000000..3f5995e
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_concat.cpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type;
+using DataShape = ngraph::Shape;
+using DataShapes = std::vector<DataShape>;
+
+struct ConcatParam {
+    DataShapes dataShapes;
+    int axis;
+};
+using ConcatTestParam = std::tuple<DataType, ConcatParam, LayerTestsUtils::TargetDevice>;
+
+class DSR_Concat
+        : public testing::WithParamInterface<ConcatTestParam>,
+          public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& dataType = std::get<0>(parameters);
+        const auto& concatParam = std::get<1>(parameters);
+        targetDevice = std::get<2>(GetParam());
+
+        const auto& dataShapes = concatParam.dataShapes;
+        const auto& axis = concatParam.axis;
+
+        ngraph::NodeVector dsrVector;
+        ngraph::ParameterVector params;
+        for (const auto& dataShape : dataShapes) {
+            const auto param = std::make_shared<ngraph::opset3::Parameter>(
+                    dataType, dataShape);
+            const auto shape = std::make_shared<ngraph::opset3::Parameter>(
+                    ngraph::element::i64, ngraph::Shape{dataShape.size()});
+            dsrVector.emplace_back(std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                    param, shape));
+            params.push_back(param);
+            params.push_back(shape);
+        }
+
+        const auto concat = std::make_shared<ngraph::opset3::Concat>(dsrVector, axis);
+        const auto result = std::make_shared<ngraph::opset3::Result>(concat);
+
+        function = std::make_shared<ngraph::Function>(
+                ngraph::NodeVector{result}, params, "DSR-Concat");
+    }
+};
+
+TEST_P(DSR_Concat, CompareWithReference) {
+    Run();
+}
+
+std::vector<ngraph::element::Type> dataTypes = {
+        ngraph::element::f16,
+        ngraph::element::f32,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u8,
+};
+
+std::vector<ConcatParam> concatParams = {
+        {DataShapes{DataShape{128}, DataShape{256}, DataShape{512}, DataShape{1024}}, 0},
+        {DataShapes{DataShape{1, 1000}, DataShape{2, 1000}, DataShape{4, 1000}, DataShape{8, 1000}}, 0},
+        {DataShapes{DataShape{128, 100}, DataShape{128, 200}, DataShape{128, 400}, DataShape{128, 800}}, 1},
+        {DataShapes{DataShape{3, 64, 128}, DataShape{4, 64, 128}, DataShape{5, 64, 128}}, 0},
+        {DataShapes{DataShape{3, 64, 128}, DataShape{3, 64, 256}, DataShape{3, 64, 512}}, 2},
+};
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicConcat, DSR_Concat, ::testing::Combine(
+        ::testing::ValuesIn(dataTypes),
+        ::testing::ValuesIn(concatParams),
+        ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_gather.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_gather.cpp

new file mode 100644 (file)

index 0000000..94fc839
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_gather.cpp
@@ -0,0 +1,143 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+
+struct GatherTestCase {
+    ngraph::Shape data_shape, index_shape;
+    int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+    testing::Values(
+            ngraph::element::f16,
+            ngraph::element::f32,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            GatherTestCase{{6}, {15, 4, 20, 28}, 0, 0, 0},
+            GatherTestCase{{6, 12, 10, 24}, {6}, 0, 0, 1},
+            GatherTestCase{{6, 12}, {15, 4, 20, 28}, 1, 1, 2},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, 3, 3, 4},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -1, 3, 4},
+            GatherTestCase{{6, 12, 10, 24}, {15, 4, 20, 28}, -4, 0, 1}),
+    testing::Values(CommonTestUtils::DEVICE_MYRIAD));
+
+
+using Parameters = std::tuple<
+    DataType,
+    DataType,
+    GatherTestCase,
+    LayerTestsUtils::TargetDevice
+>;
+
+class DSR_GatherData : public testing::WithParamInterface<Parameters>,
+        public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+        targetDevice = std::get<3>(parameters);
+
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(dsr, indices, axis);
+
+        const auto result = std::make_shared<ngraph::opset3::Result>(node);
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                ngraph::ParameterVector{data, indices, dims}, "DSR-GatherData");
+    }
+};
+
+TEST_P(DSR_GatherData, CompareWithReference) {
+    Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherData, DSR_GatherData, combinations);
+
+class DSR_GatherIdx : public testing::WithParamInterface<Parameters>,
+        public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+        targetDevice = std::get<3>(parameters);
+
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data, dsr, axis);
+
+        const auto result = std::make_shared<ngraph::opset3::Result>(node);
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                ngraph::ParameterVector{data, indices, dims}, "DSR-GatherIdx");
+    }
+};
+
+TEST_P(DSR_GatherIdx, CompareWithReference) {
+    Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherIdx, DSR_GatherIdx, combinations);
+
+class DSR_Gather : public testing::WithParamInterface<Parameters>,
+        public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& gather_setup = std::get<2>(parameters);
+        targetDevice = std::get<3>(parameters);
+
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, gather_setup.data_shape);
+        const auto indices = std::make_shared<ngraph::opset3::Parameter>(idx_type, gather_setup.index_shape);
+        const auto axis = ngraph::opset3::Constant::create(ngraph::element::i32, {1}, std::vector<int64_t>{gather_setup.axis});
+
+        const auto data_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.data_shape.size()});
+        const auto indices_dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{gather_setup.index_shape.size()});
+
+        const auto data_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, data_dims);
+        const auto indices_dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(indices, indices_dims);
+        const auto node = std::make_shared<ngraph::opset3::Gather>(data_dsr, indices_dsr, axis);
+
+        const auto result = std::make_shared<ngraph::opset3::Result>(node);
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                ngraph::ParameterVector{data, indices, data_dims, indices_dims}, "DSR-Gather");
+    }
+};
+
+TEST_P(DSR_Gather, CompareWithReference) {
+    Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherIdx, DSR_Gather, combinations);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_reshape.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_reshape.cpp

new file mode 100644 (file)

index 0000000..58413e1
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_reshape.cpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataShape = ngraph::Shape;
+using ShapeDescriptor = std::vector<int32_t>;
+using ReshapeTestParams = std::tuple<DataShape, bool, ShapeDescriptor>;
+
+using Parameters = std::tuple<
+        DataType,
+        ReshapeTestParams,
+        LayerTestsUtils::TargetDevice
+>;
+
+class DSR_Reshape : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& inDataType = std::get<0>(GetParam());
+        const auto& reshapeTestParams = std::get<1>(GetParam());
+        targetDevice = std::get<2>(GetParam());
+
+        const auto& inDataShape = std::get<0>(reshapeTestParams);
+        const auto& specialZero = std::get<1>(reshapeTestParams);
+        const auto& outShapeDescriptor = std::get<2>(reshapeTestParams);
+
+        const auto inDataParam = std::make_shared<ngraph::op::Parameter>(
+                inDataType, inDataShape);
+        const auto inDataShapeParam = std::make_shared<ngraph::op::Parameter>(
+                ngraph::element::i32, ngraph::Shape{inDataShape.size()});
+        const auto dsr  = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(
+                inDataParam, inDataShapeParam);
+
+        const auto outShapeDescriptorConstNode = std::make_shared<ngraph::op::Constant>(
+                ngraph::element::i64, ngraph::Shape{outShapeDescriptor.size()}, outShapeDescriptor);
+        const auto reshape = std::make_shared<ngraph::op::v1::Reshape>(
+                dsr, outShapeDescriptorConstNode, specialZero);
+
+        const auto result = std::make_shared<ngraph::op::Result>(reshape);
+        function = std::make_shared<ngraph::Function>(
+                ngraph::ResultVector{result},
+                ngraph::ParameterVector{inDataParam, inDataShapeParam},
+                "DSR-Reshape");
+    }
+};
+
+TEST_P(DSR_Reshape, CompareWithReference) {
+    Run();
+}
+
+std::vector<ReshapeTestParams> reshapeTestParams = {
+        std::make_tuple(DataShape{1, 5, 5, 24}, true, ShapeDescriptor{0, -1, 4}),
+        std::make_tuple(DataShape{1, 5, 5, 0}, false, ShapeDescriptor{0, 4}),
+        std::make_tuple(DataShape{1, 3, 128, 256}, true, ShapeDescriptor{0, 0, 64, 512}),
+};
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicReshape, DSR_Reshape,
+                        ::testing::Combine(
+                                ::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
+                                ::testing::ValuesIn(reshapeTestParams),
+                                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_variadic_split.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_variadic_split.cpp

new file mode 100644 (file)

index 0000000..ec868b9
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_variadic_split.cpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+namespace {
+
+using DataType = ngraph::element::Type_t;
+using DataDims = ngraph::Shape;
+
+
+struct VariadicSplitTestCase {
+    ngraph::Shape data_shape;
+    std::vector<int64_t> split_lengths;
+    int64_t axis, first_split_point, second_split_point;
+};
+
+const auto combinations = testing::Combine(
+    testing::Values(
+            ngraph::element::f16,
+            ngraph::element::f32,
+            ngraph::element::i32,
+            ngraph::element::i64,
+            ngraph::element::u8),
+    testing::Values(
+            ngraph::element::i32,
+            ngraph::element::i64),
+    testing::Values(
+            VariadicSplitTestCase{{6}, {2, 1, 2, 1}, 0, 0, 0},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {1, 1, 3, 1}, 0, 0, 1},
+            VariadicSplitTestCase{{6, 12}, {7, 2, 1, 2}, 1, 1, 2},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {10, 14}, 3, 3, 4},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {14, 10}, -1, 3, 4},
+            VariadicSplitTestCase{{6, 12, 10, 24}, {6}, -4, 0, 1}),
+    testing::Values(CommonTestUtils::DEVICE_MYRIAD));
+
+
+using Parameters = std::tuple<
+    DataType,
+    DataType,
+    VariadicSplitTestCase,
+    LayerTestsUtils::TargetDevice
+>;
+
+class DSR_VariadicSplit : public testing::WithParamInterface<Parameters>,
+        public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& data_type = std::get<0>(parameters);
+        const auto& idx_type = std::get<1>(parameters);
+        const auto& variadic_split_setup = std::get<2>(parameters);
+        targetDevice = std::get<3>(parameters);
+
+        const auto data = std::make_shared<ngraph::opset3::Parameter>(data_type, variadic_split_setup.data_shape);
+        const auto axis = ngraph::opset3::Constant::create(idx_type, {}, std::vector<int64_t>{variadic_split_setup.axis});
+        const auto split_lengths = ngraph::opset3::Constant::create(idx_type,
+                {variadic_split_setup.split_lengths.size()}, std::vector<int64_t>{variadic_split_setup.split_lengths});
+
+        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{variadic_split_setup.data_shape.size()});
+
+        const auto dsr = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto node = std::make_shared<ngraph::opset3::VariadicSplit>(dsr, axis, split_lengths);
+
+        const auto tests_wa = std::make_shared<ngraph::opset3::Concat>(node->outputs(), variadic_split_setup.axis);
+        const auto result = std::make_shared<ngraph::opset3::Result>(tests_wa);
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                ngraph::ParameterVector{data, dims}, "DSR-VariadicSplit");
+    }
+};
+
+TEST_P(DSR_VariadicSplit, CompareWithReference) {
+    Run();
+}
+
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicGatherData, DSR_VariadicSplit, combinations);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_broadcast.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_broadcast.cpp

new file mode 100644 (file)

index 0000000..b2c5c61
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_broadcast.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <functional_test_utils/layer_test_utils.hpp>
+#include <ngraph_functions/builders.hpp>
+
+namespace {
+
+using TensorType  = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+
+using BroadcastExplicitTestParams = std::tuple<
+        TensorType, TensorShape, LayerTestsUtils::TargetDevice>;
+
+class NonZero_Broadcast : public testing::WithParamInterface<BroadcastExplicitTestParams>,
+                          public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& tensorType  = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters);
+        targetDevice = std::get<2>(GetParam());
+
+        const auto tensorParam = std::make_shared<ngraph::opset3::Parameter>(
+                tensorType, tensorShape);
+        const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(tensorParam);
+        const auto shapeOfNonZero = std::make_shared<ngraph::opset3::ShapeOf>(nonZero);
+
+        const auto broadcastConstant = std::make_shared<ngraph::opset3::Constant>(
+                tensorType, ngraph::Shape{1}, 1);
+
+        const auto axesMappingConstant = std::make_shared<ngraph::opset3::Constant>(
+                ngraph::element::u64, ngraph::Shape{1}, 0);
+
+        const auto broadcast = std::make_shared<ngraph::opset3::Broadcast>(
+                broadcastConstant, shapeOfNonZero, axesMappingConstant);
+
+        const auto result = std::make_shared<ngraph::opset3::Result>(broadcast);
+
+        function = std::make_shared<ngraph::Function>(
+                ngraph::ResultVector{result},
+                ngraph::ParameterVector{tensorParam},
+                "NonZero-Broadcast");
+    }
+};
+
+TEST_P(NonZero_Broadcast, CompareWithReference) {
+    Run();
+}
+// Blocked by #-30913, #-30915
+INSTANTIATE_TEST_CASE_P(DISABLED_DynamicBroadcast, NonZero_Broadcast, ::testing::Combine(
+        ::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
+        ::testing::Values(
+                TensorShape{1000},
+                TensorShape{4, 1000},
+                TensorShape{3, 128, 256}),
+        ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_transpose.cpp b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_transpose.cpp

similarity index 71%

rename from inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_transpose.cpp

rename to inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_transpose.cpp

index 8198d7a..775724a 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/subgraph_tests/dsr_transpose.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/subgraph_tests/nonzero_transpose.cpp
@@ -5,6 +5,7 @@
  #include <functional_test_utils/layer_test_utils.hpp>
  #include <ngraph_functions/builders.hpp>
  #include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+#include <vpu/myriad_plugin_config.hpp>
  
  namespace {
  
@@ -17,7 +18,7 @@ using Parameters = std::tuple<
      LayerTestsUtils::TargetDevice
  >;
  
-class DSR_Transpose : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
+class NonZero_Transpose : public testing::WithParamInterface<Parameters>, public LayerTestsUtils::LayerTestsCommon {
  protected:
      void SetUp() override {
          const auto& parameters = GetParam();
@@ -26,25 +27,29 @@ protected:
          targetDevice = std::get<2>(GetParam());
  
          const auto data = std::make_shared<ngraph::opset3::Parameter>(dataType, dataDims);
-        const auto dims = std::make_shared<ngraph::opset3::Parameter>(ngraph::element::i64, ngraph::Shape{dataDims.size()});
-        const auto dsr  = std::make_shared<ngraph::vpu::op::DynamicShapeResolver>(data, dims);
+        const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(data);
  
          auto permutation = std::vector<std::int64_t>(dataDims.size());
          std::iota(permutation.begin(), permutation.end(), 0);
          std::shuffle(permutation.begin(), permutation.end(), std::mt19937());
          const auto transposition = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i64, ngraph::Shape{dataDims.size()}, permutation);
-        const auto transpose = std::make_shared<ngraph::opset3::Transpose>(dsr, transposition);
+        const auto transpose = std::make_shared<ngraph::opset3::Transpose>(nonZero, transposition);
  
          const auto result = std::make_shared<ngraph::opset3::Result>(transpose);
-        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{data, dims}, "DSR-Transpose");
+        function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{data}, "NonZero-Transpose");
      }
  };
  
-TEST_P(DSR_Transpose, CompareWithReference) {
-    Run();
+TEST_P(NonZero_Transpose, CompareWithReference) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    configuration.emplace(VPU_MYRIAD_CONFIG_KEY(PLATFORM), VPU_MYRIAD_CONFIG_VALUE(2480));
+    ConfigurePlugin();
+
+    ASSERT_NO_THROW(LoadNetwork());
  }
  
-INSTANTIATE_TEST_CASE_P(DISABLED_DynamicTranspose, DSR_Transpose,
+INSTANTIATE_TEST_CASE_P(DynamicTranspose, NonZero_Transpose,
      ::testing::Combine(
          ::testing::Values(ngraph::element::f16, ngraph::element::f32, ngraph::element::i32),
          ::testing::Values(ngraph::Shape{1, 800}),
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/core_threading_tests.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/core_threading_tests.hpp

new file mode 100644 (file)

index 0000000..0af6860
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_threading_tests.hpp
@@ -0,0 +1,198 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_core.hpp>
+#include <details/ie_exception.hpp>
+#include <ie_plugin_config.hpp>
+#include <ie_extension.h>
+#include <multi-device/multi_device_config.hpp>
+
+#include <file_utils.h>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <functional_test_utils/test_model/test_model.hpp>
+#include <common_test_utils/file_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
+
+#include <gtest/gtest.h>
+#include <thread>
+#include <atomic>
+#include <mutex>
+#include <chrono>
+#include <fstream>
+
+using Device = std::string;
+using Config = std::map<std::string, std::string>;
+using Params = std::tuple<Device, Config>;
+
+class CoreThreadingTestsBase {
+public:
+    static void runParallel(std::function<void(void)> func,
+                     const unsigned int iterations = 100,
+                     const unsigned int threadsNum = 8) {
+        std::vector<std::thread> threads(threadsNum);
+
+        for (auto & thread : threads) {
+            thread = std::thread([&](){
+                for (unsigned int i = 0; i < iterations; ++i) {
+                    func();
+                }
+            });
+        }
+
+        for (auto & thread : threads) {
+            if (thread.joinable())
+                thread.join();
+        }
+    }
+
+    void safePluginUnregister(InferenceEngine::Core & ie) {
+        try {
+            ie.UnregisterPlugin(deviceName);
+        } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+            // if several threads unload plugin at once, the first thread does this
+            // while all others will throw an exception that plugin is not registered
+            ASSERT_STR_CONTAINS(ex.what(), "name is not registered in the");
+        }
+    }
+
+    void safeAddExtension(InferenceEngine::Core & ie) {
+        try {
+            auto extension = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(
+                FileUtils::makeSharedLibraryName<char>({}, "extension_tests"));
+            ie.AddExtension(extension);
+        } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+            ASSERT_STR_CONTAINS(ex.what(), "name: experimental");
+        }
+    }
+
+    Device deviceName;
+    Config config;
+};
+
+//
+//  Common threading plugin tests
+//
+
+class CoreThreadingTests : public CoreThreadingTestsBase,
+                           public ::testing::TestWithParam<Params> {
+public:
+    void SetUp() override {
+        std::tie(deviceName, config) = GetParam();
+    }
+};
+
+// tested function: GetVersions, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetVersions) {
+    InferenceEngine::Core ie;
+
+    runParallel([&] () {
+        auto versions = ie.GetVersions(deviceName);
+        ASSERT_LE(1u, versions.size());
+        safePluginUnregister(ie);
+    });
+}
+
+// tested function: SetConfig for already created plugins
+TEST_P(CoreThreadingTests, smoke_SetConfigPluginExists) {
+    InferenceEngine::Core ie;
+
+    ie.SetConfig(config);
+    auto versions = ie.GetVersions(deviceName);
+
+    runParallel([&] () {
+        ie.SetConfig(config);
+    }, 10000);
+}
+
+// tested function: GetConfig, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetConfig) {
+    InferenceEngine::Core ie;
+    std::string configKey = config.begin()->first;
+
+    ie.SetConfig(config);
+    runParallel([&] () {
+        ie.GetConfig(deviceName, configKey);
+        safePluginUnregister(ie);
+    });
+}
+
+// tested function: GetMetric, UnregisterPlugin
+TEST_P(CoreThreadingTests, smoke_GetMetric) {
+    InferenceEngine::Core ie;
+    runParallel([&] () {
+        ie.GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+        safePluginUnregister(ie);
+    });
+}
+
+// tested function: QueryNetwork
+TEST_P(CoreThreadingTests, smoke_QueryNetwork) {
+    InferenceEngine::Core ie;
+    auto model = FuncTestUtils::TestModel::convReluNormPoolFcModelFP32;
+    auto network = ie.ReadNetwork(model.model_xml_str, model.weights_blob);
+
+    ie.SetConfig(config, deviceName);
+    InferenceEngine::QueryNetworkResult refResult = ie.QueryNetwork(network, deviceName);
+
+    runParallel([&] () {
+        const auto result = ie.QueryNetwork(network, deviceName);
+        safePluginUnregister(ie);
+
+        // compare QueryNetworkResult with reference
+        for (auto && r : refResult.supportedLayersMap) {
+            ASSERT_NE(result.supportedLayersMap.end(), result.supportedLayersMap.find(r.first));
+        }
+        for (auto && r : result.supportedLayersMap) {
+            ASSERT_NE(refResult.supportedLayersMap.end(), refResult.supportedLayersMap.find(r.first));
+        }
+    }, 3000);
+}
+
+//
+//  Parametrized tests with numfer of parallel threads, iterations
+//
+
+using Threads = unsigned int;
+using Iterations = unsigned int;
+
+class CoreThreadingTestsWithIterations : public ::testing::TestWithParam<std::tuple<Params, Threads, Iterations> >,
+                                         public CoreThreadingTestsBase {
+public:
+    void SetUp() override {
+        std::tie(deviceName, config) = std::get<0>(GetParam());
+        numThreads =  std::get<1>(GetParam());
+        numIterations =  std::get<2>(GetParam());
+    }
+
+    unsigned int numIterations;
+    unsigned int numThreads;
+};
+
+// tested function: LoadNetwork, AddExtension
+TEST_P(CoreThreadingTestsWithIterations, smoke_LoadNetwork) {
+    InferenceEngine::Core ie;
+    std::atomic<unsigned int> counter{0u};
+
+    const FuncTestUtils::TestModel::TestModel models[] = {
+        FuncTestUtils::TestModel::convReluNormPoolFcModelFP32,
+        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16
+    };
+    std::vector<InferenceEngine::CNNNetwork> networks;
+    for (auto & model : models) {
+        networks.emplace_back(ie.ReadNetwork(model.model_xml_str, model.weights_blob));
+    }
+
+    // TODO: uncomment after fixing *-31414
+    // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::make2InputSubtract()));
+    // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeMultiSingleConv()));
+    // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSingleConv()));
+    // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitConvConcat()));
+    // networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitMultiConvConcat()));
+
+    ie.SetConfig(config, deviceName);
+    runParallel([&] () {
+        auto value = counter++;
+        (void)ie.LoadNetwork(networks[(counter++) % networks.size()], deviceName);
+    }, numIterations, numThreads);
+}
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp

index 53f0b61..ecd7c57 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp
@@ -16,6 +16,7 @@
  namespace LayerTestsDefinitions {
  
  using concatParamsTuple = typename std::tuple<
+        //TODO: according to specification axis have to be int, negative values are allowed
          size_t,                            // Concat axis
          std::vector<std::vector<size_t>>,  // Input shapes
          InferenceEngine::Precision,        // Network precision
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/proposal.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/proposal.hpp

new file mode 100644 (file)

index 0000000..c8a9dc4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/proposal.hpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+namespace LayerTestsDefinitions {
+
+namespace proposalTypes {
+
+typedef size_t base_size_type;
+typedef size_t pre_nms_topn_type;
+typedef size_t post_nms_topn_type;
+typedef float nms_thresh_type;
+typedef size_t min_size_type;
+typedef std::vector<float> ratio_type;
+typedef std::vector<float> scale_type;
+typedef bool clip_before_nms_type;
+typedef bool clip_after_nms_type;
+typedef bool normalize_type;
+typedef size_t feat_stride_type;
+typedef float box_size_scale_type;
+typedef float box_coordinate_scale_type;
+typedef std::string framework_type;
+
+};  // namespace proposalTypes
+
+using namespace proposalTypes;
+
+typedef std::tuple<
+        base_size_type,
+        pre_nms_topn_type,
+        post_nms_topn_type,
+        nms_thresh_type,
+        min_size_type,
+        ratio_type,
+        scale_type,
+        clip_before_nms_type,
+        clip_after_nms_type,
+        framework_type> proposalSpecificParams;
+typedef std::tuple<
+        proposalSpecificParams,
+        std::string> proposalLayerTestParamsSet;
+
+class ProposalLayerTest
+        : public testing::WithParamInterface<proposalLayerTestParamsSet>,
+          public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<proposalLayerTestParamsSet> obj);
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
+
+protected:
+    void SetUp() override;
+    void Validate() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp

index 1adf2b8..b7add9d 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp
@@ -101,12 +101,15 @@ TEST_P(ActivationLayerTest, CompareWithRefs) {
      auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
      convertFuncToF32(fnPtr, netPrecision);
      auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr, inRawData);
-    auto thr = FuncTestUtils::GetComparisonThreshold(netPrecision);
+    float thr1, thr2;
+    FuncTestUtils::GetComparisonThreshold(netPrecision, thr1, thr2);
+
      size_t outElementsCount = std::accumulate(begin(fnPtr->get_output_shape(0)), end(fnPtr->get_output_shape(0)), 1,
                                                std::multiplies<size_t>());
-    FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), *refOutData[0], outElementsCount,
-                                     outElementsCount,
-                                     thr);
+    FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), *refOutData[0],
+                                                     outElementsCount, outElementsCount,
+                                                     FuncTestUtils::CompareType::ABS_AND_REL,
+                                                     thr1, thr2);
      fnPtr.reset();
      if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
          PluginCache::get().reset();
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp

index 251ec74..24d1d6b 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp
@@ -21,7 +21,7 @@
  namespace LayerTestsDefinitions {
  
  std::string ConcatLayerTest::getTestCaseName(const testing::TestParamInfo<concatParamsTuple> &obj) {
-    size_t axis;
+    int axis;
      std::vector<std::vector<size_t>> inputShapes;
      InferenceEngine::Precision netPrecision;
      std::string targetName;
@@ -35,7 +35,7 @@ std::string ConcatLayerTest::getTestCaseName(const testing::TestParamInfo<concat
  }
  
  void ConcatLayerTest::SetUp() {
-    size_t axis;
+    int axis;
      std::vector<std::vector<size_t>> inputShape;
      InferenceEngine::Precision netPrecision;
      std::tie(axis, inputShape, netPrecision, targetDevice) = this->GetParam();
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/proposal.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/proposal.cpp

new file mode 100644 (file)

index 0000000..d5cc953
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/proposal.cpp
@@ -0,0 +1,154 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <functional>
+#include <functional_test_utils/skip_tests_config.hpp>
+
+#include "ie_core.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "single_layer_tests/proposal.hpp"
+
+namespace LayerTestsDefinitions {
+
+const normalize_type normalize = true;
+const feat_stride_type feat_stride = 1;
+const box_size_scale_type box_size_scale = 2.0f;
+const box_coordinate_scale_type box_coordinate_scale = 2.0f;
+
+std::string ProposalLayerTest::getTestCaseName(testing::TestParamInfo<proposalLayerTestParamsSet> obj) {
+    proposalSpecificParams proposalParams;
+
+    std::string targetDevice;
+    std::tie(proposalParams, targetDevice) = obj.param;
+
+    base_size_type base_size;
+    pre_nms_topn_type pre_nms_topn;
+    post_nms_topn_type post_nms_topn;
+    nms_thresh_type nms_thresh;
+    min_size_type min_size;
+    ratio_type ratio;
+    scale_type scale;
+    clip_before_nms_type clip_before_nms;
+    clip_after_nms_type clip_after_nms;
+    framework_type framework;
+    std::tie(base_size, pre_nms_topn,
+             post_nms_topn,
+             nms_thresh,
+             min_size,
+             ratio,
+             scale,
+             clip_before_nms,
+             clip_after_nms,
+             framework) = proposalParams;
+
+    std::ostringstream result;
+    result << "base_size=" << base_size << "_";
+    result << "pre_nms_topn=" << pre_nms_topn << "_";
+    result << "post_nms_topn=" << post_nms_topn << "_";
+    result << "nms_thresh=" << nms_thresh << "_";
+    result << "feat_stride=" << feat_stride << "_";
+    result << "min_size=" << min_size << "_";
+    result << "ratio = " << CommonTestUtils::vec2str(ratio) << "_";
+    result << "scale = " << CommonTestUtils::vec2str(scale) << "_";
+    result << "clip_before_nms=" << clip_before_nms << "_";
+    result << "clip_after_nms=" << clip_after_nms << "_";
+    result << "normalize=" << normalize << "_";
+    result << "box_size_scale=" << box_size_scale << "_";
+    result << "box_coordinate_scale=" << box_coordinate_scale << "_";
+    result << "framework=" << framework << "_";
+    result << "targetDevice=" << targetDevice;
+
+    return result.str();
+}
+
+void ProposalLayerTest::SetUp() {
+    proposalSpecificParams proposalParams;
+
+    std::tie(proposalParams, targetDevice) = this->GetParam();
+    base_size_type base_size;
+    pre_nms_topn_type pre_nms_topn;
+    post_nms_topn_type post_nms_topn;
+    nms_thresh_type nms_thresh;
+    min_size_type min_size;
+    ratio_type ratio;
+    scale_type scale;
+    clip_before_nms_type clip_before_nms;
+    clip_after_nms_type clip_after_nms;
+    framework_type framework;
+
+    std::tie(base_size, pre_nms_topn,
+             post_nms_topn,
+             nms_thresh,
+             min_size,
+             ratio,
+             scale,
+             clip_before_nms,
+             clip_after_nms,
+             framework) = proposalParams;
+
+    size_t bottom_w = base_size;
+    size_t bottom_h = base_size;
+    size_t num_anchors = ratio.size() * scale.size();
+
+    std::vector<size_t> scoresShape = {1, 2 * num_anchors, bottom_h, bottom_w};
+    std::vector<size_t> boxesShape  = {1, 4 * num_anchors, bottom_h, bottom_w};
+    std::vector<size_t> imageInfoShape = {3};
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(InferenceEngine::Precision::FP16);
+    auto params = ngraph::builder::makeParams(ngPrc, {{"scores", scoresShape}, {"boxes", boxesShape}, {"image_info", imageInfoShape}});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    auto proposal = std::dynamic_pointer_cast<ngraph::opset1::Proposal>(
+             ngraph::builder::makeProposal(paramOuts[0], paramOuts[1], paramOuts[2], ngPrc,
+                                           base_size,
+                                           pre_nms_topn,
+                                           post_nms_topn,
+                                           nms_thresh,
+                                           feat_stride,
+                                           min_size,
+                                           ratio,
+                                           scale,
+                                           clip_before_nms,
+                                           clip_after_nms,
+                                           normalize,
+                                           box_size_scale,
+                                           box_coordinate_scale,
+                                           framework));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(proposal)};
+    function = std::make_shared<ngraph::Function>(results, params, "proposal");
+}
+
+InferenceEngine::Blob::Ptr ProposalLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
+    InferenceEngine::Blob::Ptr blobPtr;
+
+    const std::string name = info.name();
+    if (name == "scores") {
+        blobPtr = FuncTestUtils::createAndFillBlobFloat(info.getTensorDesc(), 1, 0, 1000, 8234231);
+    } else if (name == "boxes") {
+        blobPtr = FuncTestUtils::createAndFillBlobFloatNormalDistribution(info.getTensorDesc(), 0.0f, 0.2f, 7235346);
+    } else if (name == "image_info") {
+        const float image_info[] = {225.0f, 225.0f, 1.0f};
+        blobPtr = FuncTestUtils::createAndFillBlobWithFloatArray(info.getTensorDesc(), image_info, 3);
+    }
+
+    return blobPtr;
+}
+
+// TODO: for validation, reference version is required (#28373)
+void ProposalLayerTest::Validate() {}
+
+TEST_P(ProposalLayerTest, CompareWithRefs) {
+    Run();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp

index 17a1909..2bd874e 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/select.cpp
@@ -87,10 +87,15 @@ namespace LayerTestsDefinitions {
              std::vector<int32_t> convRefOutData(outElementsCount);
              for (size_t i = 0; i < outElementsCount; i++)
                  convRefOutData[i] = static_cast<int32_t>(refOutData[i]);
-            FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<int32_t *>(), convRefOutData.data(), outElementsCount, outElementsCount);
+            FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<int32_t *>(), convRefOutData.data(),
+                    outElementsCount, outElementsCount, FuncTestUtils::CompareType::ABS_AND_REL);
          } else {
-            auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
-            FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), refOutData.data(), outElementsCount, outElementsCount, thr);
+            float thr1, thr2;
+            FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
+            FuncTestUtils::compareRawBuffers(outBlob->cbuffer().as<float *>(), refOutData.data(),
+                                                             outElementsCount, outElementsCount,
+                                                             FuncTestUtils::CompareType::ABS_AND_REL,
+                                                             thr1, thr2);
          }
  
          layer.fnPtr.reset();
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp

index b40b2d7..031919c 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@@ -92,9 +92,10 @@ void inline  fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t r
  }
  
  template<InferenceEngine::Precision::ePrecision PRC>
-void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
+void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k,
+                                   const int seed = 1) {
      using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
-    std::default_random_engine random(1);
+    std::default_random_engine random(seed);
      // 1/k is the resolution of the floating point numbers
      std::uniform_int_distribution<int32_t> distribution(k * start_from, k * (start_from + range));
  
@@ -110,6 +111,41 @@ void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint3
      }
  }
  
+template<InferenceEngine::Precision::ePrecision PRC>
+void inline fill_data_normal_random_float(InferenceEngine::Blob::Ptr &blob,
+                                          const float mean,
+                                          const float stddev,
+                                          const int seed = 1) {
+    using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+    std::default_random_engine random(seed);
+    std::normal_distribution<> normal_d{mean, stddev};
+
+    auto *rawBlobDataPtr = blob->buffer().as<dataType *>();
+    for (size_t i = 0; i < blob->size(); i++) {
+        auto value = static_cast<float>(normal_d(random));
+        if (typeid(dataType) == typeid(typename InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type)) {
+            rawBlobDataPtr[i] = ngraph::float16(value).to_bits();
+        } else {
+            rawBlobDataPtr[i] = value;
+        }
+    }
+}
+
+template<InferenceEngine::Precision::ePrecision PRC>
+void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const float values[], const size_t size) {
+    using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
+
+    auto *rawBlobDataPtr = blob->buffer().as<dataType *>();
+    for (size_t i = 0; i < std::min(size, blob->size()); i++) {
+        auto value = values[i];
+        if (typeid(dataType) == typeid(typename InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type)) {
+            rawBlobDataPtr[i] = ngraph::float16(value).to_bits();
+        } else {
+            rawBlobDataPtr[i] = value;
+        }
+    }
+}
+
  template<>
  void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
      fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k);
diff --git a/inference-engine/tests_deprecated/helpers/test_assertions.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp

similarity index 68%

rename from inference-engine/tests_deprecated/helpers/test_assertions.hpp

rename to inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp

index f984535..b74ac0a 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/test_assertions.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/test_assertions.hpp
@@ -4,33 +4,59 @@
  
  #include <gtest/gtest.h>
  
+#include <string>
+
+#include <ie_data.h>
  #include <ie_blob.h>
+#include <ie_common.h>
+#include <ie_preprocess.hpp>
+#include <ie_icnn_network.hpp>
+
+namespace {
+
+bool strContains(const std::string & str, const std::string & substr) {
+    return str.find(substr) != std::string::npos;
+}
+
+bool strDoesnotContain(const std::string & str, const std::string & substr) {
+    (void)strDoesnotContain;  // to overcome unused warning
+    return !strContains(str, substr);
+}
+
+}  // namespace
+
+#define ASSERT_STR_CONTAINS(str, substr) \
+    ASSERT_PRED2(&strContains, str, substr)
+
+#define ASSERT_STR_DOES_NOT_CONTAIN(str, substr) \
+    ASSERT_PRED2(&strDoesnotContain, str, substr)
+
+#define EXPECT_STR_CONTAINS(str, substr) \
+    EXPECT_PRED2(&strContains, str, substr)
  
  #define ASSERT_BLOB_EQ(lhs, rhs) \
-compare_blob(lhs,rhs)
+    compare_blob(lhs, rhs)
  
  #define ASSERT_DIMS_EQ(lhs, rhs) \
-compare_dims(lhs,rhs)
+    compare_dims(lhs, rhs)
  
  #define ASSERT_DATA_EQ(lhs, rhs) \
-compare_data(lhs,rhs)
+    compare_data(lhs, rhs)
  
  #define ASSERT_PREPROCESS_CHANNEL_EQ(lhs, rhs) \
-compare_preprocess(lhs,rhs)
+    compare_preprocess(lhs, rhs)
  
  #define ASSERT_PREPROCESS_INFO_EQ(lhs, rhs) \
-compare_preprocess_info(lhs,rhs)
+    compare_preprocess_info(lhs, rhs)
  
  #define ASSERT_OUTPUTS_INFO_EQ(lhs, rhs) \
-compare_outputs_info(lhs,rhs)
+    compare_outputs_info(lhs, rhs)
  
  #define ASSERT_INPUTS_INFO_EQ(lhs, rhs) \
-compare_inputs_info(lhs,rhs)
+    compare_inputs_info(lhs, rhs)
  
  #define ASSERT_STRINGEQ(lhs, rhs) \
-compare_cpp_strings(lhs,rhs)
-
-
+    compare_cpp_strings(lhs, rhs)
  
  inline void compare_blob(InferenceEngine::Blob::Ptr lhs, InferenceEngine::Blob::Ptr rhs) {
      ASSERT_EQ(lhs.get(), rhs.get());
@@ -39,7 +65,7 @@ inline void compare_blob(InferenceEngine::Blob::Ptr lhs, InferenceEngine::Blob::
  
  inline void compare_dims(const InferenceEngine::SizeVector & lhs, const InferenceEngine::SizeVector & rhs) {
      ASSERT_EQ(lhs.size(), rhs.size());
-    for(int i=0;i<lhs.size();i++) {
+    for (size_t i = 0; i < lhs.size(); i++) {
          ASSERT_EQ(lhs[i], rhs[i]);
      }
  }
@@ -59,7 +85,7 @@ inline void compare_preprocess(const InferenceEngine::PreProcessChannel & lhs, c
  inline void compare_preprocess_info(const InferenceEngine::PreProcessInfo & lhs, const InferenceEngine::PreProcessInfo & rhs) {
      ASSERT_EQ(lhs.getMeanVariant(), rhs.getMeanVariant());
      ASSERT_EQ(lhs.getNumberOfChannels(), rhs.getNumberOfChannels());
-    for(int i=0; i < lhs.getNumberOfChannels(); i++) {
+    for (int i = 0; i < lhs.getNumberOfChannels(); i++) {
          ASSERT_PREPROCESS_CHANNEL_EQ(*lhs[i].get(), *rhs[i].get());
      }
  }
@@ -75,12 +101,12 @@ inline void compare_outputs_info(const InferenceEngine::OutputsDataMap & lhs, co
      }
  }
  
-inline void compare_inputs_info (const InferenceEngine::InputsDataMap & lhs, const InferenceEngine::InputsDataMap & rhs) {
+inline void compare_inputs_info(const InferenceEngine::InputsDataMap & lhs, const InferenceEngine::InputsDataMap & rhs) {
      ASSERT_EQ(lhs.size(), rhs.size());
      auto i = lhs.begin();
      auto j = rhs.begin();
  
-    for (int k =0; k != lhs.size(); k++, i++, j++) {
+    for (int k = 0; k != lhs.size(); k++, i++, j++) {
          ASSERT_STREQ(i->first.c_str(), j->first.c_str());
          ASSERT_DIMS_EQ(i->second->getTensorDesc().getDims(), j->second->getTensorDesc().getDims());
          ASSERT_PREPROCESS_INFO_EQ(i->second->getPreProcess(), j->second->getPreProcess());
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp

index 75b9450..76ffcd9 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp
@@ -17,10 +17,32 @@
  #include "common_test_utils/test_constants.hpp"
  
  namespace FuncTestUtils {
-template<typename dType>
-void inline compareRawBuffers(const dType *res, const dType *ref,
-                              size_t resSize, size_t refSize,
-                              float max_diff = 0.01, bool printData = false) {
+
+enum CompareType{
+    ABS,
+    REL,
+    ABS_AND_REL  //  if absolute and relative differences are too high, an exception is thrown
+};
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Pointer to considered blob
+ * @param ref Pointer to reference blob
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+ template<typename dType>
+static void inline compareRawBuffers(const dType *res, const dType *ref,
+                                     size_t resSize, size_t refSize,
+                                     CompareType compareType, float thr1 = 0.01, float thr2 = 0.01,
+                                     bool printData = false) {
      if (printData) {
          std::cout << "Reference results: " << std::endl;
          for (size_t i = 0; i < refSize; i++) {
@@ -34,43 +56,144 @@ void inline compareRawBuffers(const dType *res, const dType *ref,
          std::cout << std::endl;
      }
  
-    for (size_t i = 0; i < refSize; i++) {
-        float absDiff = std::abs(res[i] - ref[i]);
-        if (absDiff > max_diff) {
-            float relDiff = absDiff / std::max(res[i], ref[i]);
-            ASSERT_LT(relDiff, max_diff) << "Relative comparison of values ref: " << ref[i] << " and res: "
-                                         << res[i] << " , index in blobs: " << i << " failed!";
-        }
+    switch (compareType) {
+        case CompareType::ABS:
+            for (size_t i = 0; i < refSize; i++) {
+                float absDiff = std::abs(res[i] - ref[i]);
+                ASSERT_LT(absDiff, thr1) << "Relative comparison of values ref: " << ref[i] << " and res: "
+                                               << res[i] << " , index in blobs: " << i << " failed!";
+            }
+            break;
+        case CompareType::REL:
+            for (size_t i = 0; i < refSize; i++) {
+                float absDiff = std::abs(res[i] - ref[i]);
+                float relDiff = absDiff / std::max(res[i], ref[i]);
+                ASSERT_LT(relDiff, thr2) << "Relative comparison of values ref: " << ref[i] << " and res: "
+                                               << res[i] << " , index in blobs: " << i << " failed!";
+            }
+            break;
+        case CompareType::ABS_AND_REL:
+            for (size_t i = 0; i < refSize; i++) {
+                float absDiff = std::abs(res[i] - ref[i]);
+                if (absDiff > thr1) {
+                    float relDiff = absDiff / std::max(res[i], ref[i]);
+                    ASSERT_LT(relDiff, thr2) << "Comparison of values ref: " << ref[i] << " and res: "
+                                                   << res[i] << " , index in blobs: " << i << " failed!";
+                }
+            }
+            break;
      }
  }
-
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Pointer to considered blob
+ * @param ref Pointer to reference blob
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData Flag if data printing is demanded
+ */
  template<typename dType>
-void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
+static void inline compareRawBuffers(const dType *res, const dType *ref,
+                                     size_t resSize, size_t refSize,
+                                     float thr = 0.01,
+                                     bool printData = false) {
+    compareRawBuffers(res, ref, resSize, refSize, CompareType::ABS_AND_REL, thr, thr, printData);
+}
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
                                const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
-                              float max_diff = 0.01, bool printData = false) {
+                              CompareType compareType,
+                              float thr1 = 0.01, float thr2 = 0.01, bool printData = false) {
      ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length";
      ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length";
      ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length";
      for (size_t i = 0; i < res.size(); i++) {
          if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl;
-        compareRawBuffers(res[i], ref[i], resSizes[i], refSizes[i], max_diff, printData);
+        compareRawBuffers(res[i], ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData);
          if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl;
      }
  }
-
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData A flag if data printing is demanded
+ */
  template<typename dType>
-void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<dType *> ref,
+                                     const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
+                                     float thr = 0.01, bool printData = false) {
+    compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData);
+}
+/**
+ * @brief Checks values of two blobs according to given algorithm and thresholds.
+ * In ABS and REL cases thr1 corresponds to the single threshold,
+ * In ABS_AND_REL case thr1 and thr2 mean absolute and relative threshold
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param compareType Defines an algorithm of comparision
+ * @param thr1 First threshold of difference
+ * @param thr2 Second threshold of difference
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
                                const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
-                              float max_diff = 0.01, bool printData = false) {
+                              CompareType compareType,
+                              float thr1 = 0.01, float thr2 = 0.01, bool printData = false) {
      ASSERT_TRUE(res.size() == ref.size()) << "Reference and Results vector have to be same length";
      ASSERT_TRUE(res.size() == resSizes.size()) << "Results vector and elements count vector have to be same length";
      ASSERT_TRUE(ref.size() == refSizes.size()) << "Reference vector and elements count vector have to be same length";
      for (size_t i = 0; i < res.size(); i++) {
          if (printData) std::cout << "BEGIN CHECK BUFFER [" << i << "]" << std::endl;
-        compareRawBuffers(res[i], *ref[i], resSizes[i], refSizes[i], max_diff, printData);
+        compareRawBuffers(res[i], *ref[i], resSizes[i], refSizes[i], compareType, thr1, thr2, printData);
          if (printData) std::cout << "END CHECK BUFFER [" << i << "]" << std::endl;
      }
  }
+/**
+ * @brief Checks absolute and relative difference of blob values according to given threshold.
+ *
+ * @tparam dType Type of blob data
+ * @param res Vector of considered blob values
+ * @param ref Vector of reference blob values
+ * @param resSize Size of considered blob
+ * @param refSize Size of reference blob
+ * @param thr Threshold of difference, absolute and relative simultaneously
+ * @param printData A flag if data printing is demanded
+ */
+template<typename dType>
+static void inline compareRawBuffers(const std::vector<dType *> res, const std::vector<std::shared_ptr<dType *>> ref,
+                                     const std::vector<size_t> &resSizes, const std::vector<size_t> &refSizes,
+                                     float thr = 0.01, bool printData = false) {
+    compareRawBuffers(res, ref, resSizes, refSizes, CompareType::ABS_AND_REL, thr, thr, printData);
+}
  
  template<InferenceEngine::Precision::ePrecision PRC>
  void inline
@@ -154,21 +277,30 @@ compareBlobs(const InferenceEngine::Blob::Ptr &res, const InferenceEngine::Blob:
      }
  }
  
-float inline GetComparisonThreshold(InferenceEngine::Precision prc) {
+void inline GetComparisonThreshold(InferenceEngine::Precision prc, float &absoluteThreshold, float &relativeThreshold) {
      switch (prc) {
          case InferenceEngine::Precision::FP32:
-            return 1e-4;
+            absoluteThreshold = relativeThreshold = 1e-4;
+            break;
          case InferenceEngine::Precision::FP16:
-            return 1e-2;
+            absoluteThreshold = relativeThreshold = 1e-2;
+            break;
          case InferenceEngine::Precision::I16:
          case InferenceEngine::Precision::I8:
          case InferenceEngine::Precision::U8:
-            return 1;
+            absoluteThreshold = relativeThreshold = 1;
+            break;
          default:
              THROW_IE_EXCEPTION << "Unhandled precision " << prc << " passed to the GetComparisonThreshold()";
      }
  }
  
+float inline GetComparisonThreshold(InferenceEngine::Precision prc) {
+    float res;
+    GetComparisonThreshold(prc, res, res);
+    return res;
+}
+
  // Copy from net_pass.h
  template<InferenceEngine::Precision::ePrecision PREC_FROM, InferenceEngine::Precision::ePrecision PREC_TO>
  void inline convertArrayPrecision(typename InferenceEngine::PrecisionTrait<PREC_TO>::value_type *dst,
@@ -238,6 +370,82 @@ InferenceEngine::Blob::Ptr inline copyBlobWithCast(const InferenceEngine::Blob::
      return newBlob;
  }
  
+InferenceEngine::Blob::Ptr inline createAndFillBlobFloatNormalDistribution(const InferenceEngine::TensorDesc &td,
+                                                                           const float mean,
+                                                                           const float stddev,
+                                                                           const int32_t seed = 1) {
+    InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+    blob->allocate();
+    switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_normal_random_float<X>(blob, mean, stddev, seed); break;
+        CASE(InferenceEngine::Precision::FP32)
+        CASE(InferenceEngine::Precision::FP16)
+        CASE(InferenceEngine::Precision::U8)
+        CASE(InferenceEngine::Precision::U16)
+        CASE(InferenceEngine::Precision::I8)
+        CASE(InferenceEngine::Precision::I16)
+        CASE(InferenceEngine::Precision::I64)
+        CASE(InferenceEngine::Precision::BIN)
+        CASE(InferenceEngine::Precision::I32)
+        CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+        default:
+            THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+    }
+    return blob;
+}
+
+InferenceEngine::Blob::Ptr inline createAndFillBlobFloat(const InferenceEngine::TensorDesc &td,
+        const uint32_t range = 10,
+        const int32_t start_from = 0,
+        const int32_t resolution = 1,
+        const int32_t seed = 1) {
+    InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+
+    blob->allocate();
+    switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_random_float<X>(blob, range, start_from, resolution, seed); break;
+        CASE(InferenceEngine::Precision::FP32)
+        CASE(InferenceEngine::Precision::FP16)
+        CASE(InferenceEngine::Precision::U8)
+        CASE(InferenceEngine::Precision::U16)
+        CASE(InferenceEngine::Precision::I8)
+        CASE(InferenceEngine::Precision::I16)
+        CASE(InferenceEngine::Precision::I64)
+        CASE(InferenceEngine::Precision::BIN)
+        CASE(InferenceEngine::Precision::I32)
+        CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+        default:
+            THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+    }
+    return blob;
+}
+
+InferenceEngine::Blob::Ptr inline createAndFillBlobWithFloatArray(const InferenceEngine::TensorDesc &td,
+                                                                  const float values[],
+                                                                  const int size) {
+    InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
+    blob->allocate();
+    switch (td.getPrecision()) {
+#define CASE(X) case X: CommonTestUtils::fill_data_float_array<X>(blob, values, size); break;
+        CASE(InferenceEngine::Precision::FP32)
+        CASE(InferenceEngine::Precision::FP16)
+        CASE(InferenceEngine::Precision::U8)
+        CASE(InferenceEngine::Precision::U16)
+        CASE(InferenceEngine::Precision::I8)
+        CASE(InferenceEngine::Precision::I16)
+        CASE(InferenceEngine::Precision::I64)
+        CASE(InferenceEngine::Precision::BIN)
+        CASE(InferenceEngine::Precision::I32)
+        CASE(InferenceEngine::Precision::BOOL)
+#undef CASE
+        default:
+            THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
+    }
+    return blob;
+}
+
  InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::TensorDesc &td,
          const uint32_t range = 10,
          const int32_t start_from = 0,
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp

index e0a6873..27f8a9a 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp
@@ -84,6 +84,9 @@ void LayerTestsCommon::LoadNetwork() {
      cnnNetwork = InferenceEngine::CNNNetwork{function};
      ConfigureNetwork();
      executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice);
+}
+
+void LayerTestsCommon::Infer() {
      inferRequest = executableNetwork.CreateInferRequest();
  
      for (const auto &input : cnnNetwork.getInputsInfo()) {
@@ -93,9 +96,6 @@ void LayerTestsCommon::LoadNetwork() {
          inferRequest.SetBlob(info->name(), blob);
          inputs.push_back(blob);
      }
-}
-
-void LayerTestsCommon::Infer() {
      inferRequest.Infer();
  }
  
@@ -108,6 +108,14 @@ std::vector<InferenceEngine::Blob::Ptr> LayerTestsCommon::GetOutputs() {
      return outputs;
  }
  
+void LayerTestsCommon::Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutputs, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) {
+    for (std::size_t outputIndex = 0; outputIndex < expectedOutputs.size(); ++outputIndex) {
+        const auto &expected = expectedOutputs[outputIndex];
+        const auto &actual = actualOutputs[outputIndex];
+        Compare(expected, actual);
+    }
+}
+
  void LayerTestsCommon::Validate() {
      // nGraph interpreter does not support f16
      // IE converts f16 to f32
@@ -142,11 +150,7 @@ void LayerTestsCommon::Validate() {
      IE_ASSERT(actualOutputs.size() == expectedOutputs.size())
      << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
  
-    for (std::size_t outputIndex = 0; outputIndex < expectedOutputs.size(); ++outputIndex) {
-        const auto &expected = expectedOutputs[outputIndex];
-        const auto &actual = actualOutputs[outputIndex];
-        Compare(expected, actual);
-    }
+    Compare(expectedOutputs, actualOutputs);
  }
  
  void LayerTestsCommon::SetRefMode(RefMode mode) {
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp

index 60cacea..6e317b7 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp
@@ -137,9 +137,11 @@ public:
          // Run ngraph Interpreter backend to calculate references
          auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr, inRawData);
          // Compare IE infer results vs ngraph Interpreter reference results
-        auto thr = FuncTestUtils::GetComparisonThreshold(netPrecision);
-        FuncTestUtils::compareRawBuffers(outBlobsRawData, refOutData, outElementsCount, outElementsCount, thr);
-
+        float thr1, thr2;
+        FuncTestUtils::GetComparisonThreshold(netPrecision, thr1, thr2);
+        FuncTestUtils::compareRawBuffers(outBlobsRawData, refOutData, outElementsCount, outElementsCount,
+                                                         FuncTestUtils::CompareType::ABS_AND_REL,
+                                                         thr1, thr2);
          // Deallocate ngraph::Function pointer
          fnPtr.reset();
          if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
@@ -229,6 +231,8 @@ public:
  
      virtual void Run();
  
+    virtual void Compare(const std::vector<std::vector<std::uint8_t>>& expectedOutputs, const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs);
+
      virtual void Compare(const std::vector<std::uint8_t> &expected, const InferenceEngine::Blob::Ptr &actual);
  
      virtual void SetRefMode(RefMode mode);
@@ -262,6 +266,10 @@ protected:
          return refMode;
      }
  
+    void ConfigurePlugin() const;
+
+    void LoadNetwork();
+
      TargetDevice targetDevice;
      std::shared_ptr<ngraph::Function> function;
      std::map<std::string, std::string> configuration;
@@ -272,19 +280,15 @@ protected:
      InferenceEngine::Precision outPrc = InferenceEngine::Precision::UNSPECIFIED;
      InferenceEngine::ExecutableNetwork executableNetwork;
  
-private:
-    void ConfigurePlugin() const;
+    virtual void Validate();
  
+private:
      void ConfigureNetwork() const;
  
-    void LoadNetwork();
-
      void Infer();
  
      std::vector<InferenceEngine::Blob::Ptr> GetOutputs();
  
-    void Validate();
-
      InferenceEngine::Core *core = nullptr;
      InferenceEngine::CNNNetwork cnnNetwork;
      InferenceEngine::InferRequest inferRequest;
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp

index fc8605a..c68604a 100644 (file)
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
@@ -18,8 +18,8 @@
  
  class MockInferencePluginInternal2 : public InferenceEngine::InferencePluginInternal {
  public:
-    MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
-            const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+    MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+            const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
      MOCK_METHOD3(LoadNetwork, void(
              InferenceEngine::IExecutableNetwork::Ptr &,
              const InferenceEngine::ICNNNetwork &,
@@ -30,8 +30,8 @@ public:
  
  class MockInferencePluginInternal : public InferenceEngine::InferencePluginInternal {
  public:
-    MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
-            const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+    MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+            const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
      MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
      MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
  
@@ -47,8 +47,8 @@ public:
  
  class MockInferencePluginInternal3 : public InferenceEngine::InferencePluginInternal {
  public:
-    MOCK_METHOD3(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
-            const InferenceEngine::ICore *, const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
+    MOCK_METHOD2(LoadExeNetworkImpl, std::shared_ptr<InferenceEngine::ExecutableNetworkInternal>(
+            const InferenceEngine::ICNNNetwork &, const std::map<std::string, std::string> &));
      MOCK_METHOD1(AddExtension, void(InferenceEngine::IExtensionPtr ext_ptr));
      MOCK_METHOD1(SetConfig, void(const std::map <std::string, std::string> &));
  };
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp

index 5fac6e9..996dc64 100644 (file)
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp
@@ -36,7 +36,7 @@ class MockPluginImpl {
      InferenceEngine::ExecutableNetwork ImportNetwork(const std::istream&, const std::map<std::string, std::string> &) {return {};}
      MOCK_QUALIFIED_METHOD0(GetName, const noexcept, std::string(void));
      MOCK_QUALIFIED_METHOD1(SetName, noexcept, void(const std::string &));
-    MOCK_QUALIFIED_METHOD0(GetCore, const noexcept, const InferenceEngine::ICore*(void));
+    MOCK_QUALIFIED_METHOD0(GetCore, const noexcept, InferenceEngine::ICore*(void));
      MOCK_QUALIFIED_METHOD1(SetCore, noexcept, void(InferenceEngine::ICore*));
  
      MOCK_CONST_METHOD2(GetConfig, InferenceEngine::Parameter(const std::string& name,
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp

index c4a92d6..a7e2462 100644 (file)
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -17,6 +17,7 @@ namespace ngraph {
  namespace builder {
  
  ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::vector<size_t>> &shapes);
+ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::pair<std::string, std::vector<size_t>>> &inputs);
  
  std::shared_ptr<ngraph::Node> makeConstant(const element::Type &type, const std::vector<size_t> &shape,
                                             const std::vector<float> &data, bool random = false);
@@ -92,5 +93,24 @@ std::shared_ptr<ngraph::Node> makeSqueeze(const ngraph::Output<Node> &in,
  std::shared_ptr<ngraph::Node> makeUnsqueeze(const ngraph::Output<Node> &in,
                                              const element::Type &type,
                                              const std::vector<size_t> &squeeze_indices);
+
+std::shared_ptr<ngraph::Node> makeProposal(const ngraph::Output<Node> &class_probs,
+                                           const ngraph::Output<Node> &class_logits,
+                                           const ngraph::Output<Node> &image_shape,
+                                           const element::Type &type,
+                                           size_t base_size,
+                                           size_t pre_nms_topn,
+                                           size_t post_nms_topn,
+                                           float nms_thresh,
+                                           size_t feat_stride,
+                                           size_t min_size,
+                                           const std::vector<float> &ratio,
+                                           const std::vector<float> &scale,
+                                           bool clip_before_nms,
+                                           bool clip_after_nms,
+                                           bool normalize,
+                                           float box_size_scale,
+                                           float box_coordinate_scale,
+                                           std::string framework);
  }  // namespace builder
  }  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/src/params_vector.cpp b/inference-engine/tests/ngraph_functions/src/params_vector.cpp

index 0ed3a35..ba7ef1e 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/params_vector.cpp
+++ b/inference-engine/tests/ngraph_functions/src/params_vector.cpp
@@ -15,7 +15,22 @@ ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<
          auto paramNode = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(shape));
          outs.push_back(paramNode);
      }
+
+    return outs;
+}
+
+ngraph::ParameterVector makeParams(const element::Type &type, const std::vector<std::pair<std::string, std::vector<size_t>>> &inputs) {
+    ngraph::ParameterVector outs;
+    for (const auto &input : inputs) {
+        const auto &name = input.first;
+        const auto &shape = input.second;
+        auto paramNode = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(shape));
+        paramNode->set_friendly_name(name);
+        outs.push_back(paramNode);
+    }
+
      return outs;
  }
+
  }  // namespace builder
  }  // namespace ngraph
 \ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/src/proposal.cpp b/inference-engine/tests/ngraph_functions/src/proposal.cpp

new file mode 100644 (file)

index 0000000..cbcbfda
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/proposal.cpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//
+
+#include <vector>
+#include <memory>
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<Node> makeProposal(const ngraph::Output<Node> &class_probs,
+                                   const ngraph::Output<Node> &class_logits,
+                                   const ngraph::Output<Node> &image_shape,
+                                   const element::Type &type,
+                                   size_t base_size,
+                                   size_t pre_nms_topn,
+                                   size_t post_nms_topn,
+                                   float nms_thresh,
+                                   size_t feat_stride,
+                                   size_t min_size,
+                                   const std::vector<float> &ratio,
+                                   const std::vector<float> &scale,
+                                   bool clip_before_nms,
+                                   bool clip_after_nms,
+                                   bool normalize,
+                                   float box_size_scale,
+                                   float box_coordinate_scale,
+                                   std::string framework) {
+    ngraph::op::ProposalAttrs attrs;
+    attrs.base_size = base_size;
+    attrs.pre_nms_topn = pre_nms_topn;
+    attrs.post_nms_topn = post_nms_topn;
+    attrs.nms_thresh = nms_thresh;
+    attrs.feat_stride = feat_stride;
+    attrs.min_size = min_size;
+    attrs.ratio = ratio;
+    attrs.scale = scale;
+    attrs.clip_before_nms = clip_before_nms;
+    attrs.clip_after_nms = clip_after_nms;
+    attrs.normalize = normalize;
+    attrs.box_size_scale = box_size_scale;
+    attrs.box_coordinate_scale = box_coordinate_scale;
+    attrs.framework = framework;
+
+    return std::make_shared<opset1::Proposal>(class_probs, class_logits, image_shape, attrs);
+}
+
+}  // namespace builder
+}  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp

index be02b12..6730758 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -60,10 +60,9 @@ std::vector<std::vector<std::uint8_t>> interpreterFunction(const std::shared_ptr
  
      auto outputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
      const auto &results = function->get_results();
-    std::transform(results.cbegin(), results.cend(), std::back_inserter(outputTensors),
-                   [&backend](const std::shared_ptr<op::Result> &result) {
-                       return backend->create_tensor(result->get_element_type(), result->get_shape());
-                   });
+    for (size_t i = 0; i <results.size(); ++i) {
+        outputTensors.push_back(std::make_shared<HostTensor>());
+    }
  
      auto handle = backend->compile(function);
      handle->call_with_validate(outputTensors, inputTensors);
diff --git a/inference-engine/tests/unit/engines/vpu/split_large_kernel_conv_tests.cpp b/inference-engine/tests/unit/engines/vpu/split_large_kernel_conv_tests.cpp

new file mode 100644 (file)

index 0000000..99ffa78
--- /dev/null
+++ b/inference-engine/tests/unit/engines/vpu/split_large_kernel_conv_tests.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "graph_transformer_tests.hpp"
+#include <vpu/stages/mx_stage.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+
+using namespace vpu;
+
+class VPU_SplitLargeKernelConvTest : public GraphTransformerTest {
+ protected:
+    PassSet pipeline;
+    Model model;
+
+ public:
+    void InitConvStage(
+        int inputX = 8960,
+        int inputY = 1,
+        bool isOutput4D = true) {
+
+        int kernelx = 16;
+        int kernely = 1;
+        int kernelStrideX = 1;
+        int kernelStrideY = 1;
+        int dilationX = 1;
+        int dilationY = 1;
+        int padx_begin = 7;
+        int pady_begin = 0;
+        int padx_end = 8;
+        int pady_end = 0;
+        model = CreateModel();
+
+        auto input = model->addInputData(
+            "Input",
+            DataDesc(DataType::FP16, DimsOrder::NCHW, {inputX, inputY, 8, 1}));
+        model->attrs().set<int>("numInputs", 1);
+
+        Data output;
+        if (isOutput4D) {
+            output = model->addOutputData(
+                "Output",
+                DataDesc(DataType::FP16,
+                              DimsOrder::NCHW,
+                              {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+                                (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1, 8, 1}));
+        } else {
+            output = model->addOutputData(
+                "Output",
+                DataDesc(DataType::FP16,
+                              DimsOrder::CHW,
+                              {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+                                (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1, 8}));
+        }
+
+        auto conv = std::make_shared<ie::ConvolutionLayer>(ie::LayerParams{"conv", "Convolution", ie::Precision::FP16});
+        conv->_kernel_x = kernelx;
+        conv->_kernel_y = kernely;
+        conv->_stride_x = kernelStrideX;
+        conv->_stride_y = kernelStrideY;
+        conv->_dilation_x = dilationX;
+        conv->_dilation_x = dilationY;
+
+        conv->_padding.insert(0, padx_begin);
+        conv->_padding.insert(1, pady_begin);
+        conv->_pads_end.insert(0, padx_end);
+        conv->_pads_end.insert(1, pady_end);
+        conv->_auto_pad = "same_upper";
+
+        conv->_weights = ie::make_shared_blob<short>({ ie::Precision::FP16, {static_cast<size_t>(kernelx * kernely * 8 * 8)}, ie::Layout::C });
+        conv->_weights->allocate();
+
+        frontEnd->parseConvolution(model, conv, {input}, {output});
+
+        pipeline.addPass(passManager->dumpModel("initial"));
+
+        pipeline.addPass(passManager->hwPadding());
+        pipeline.addPass(passManager->dumpModel("hwPadding"));
+
+        // if large kernel conv converted to conv that can be ran on HW, then hwConvTiling will work - if not will got an exception
+        pipeline.addPass(passManager->splitLargeKernelConv());
+        pipeline.addPass(passManager->dumpModel("splitLargeKernelConv"));
+
+        pipeline.addPass(passManager->hwConvTiling());
+        pipeline.addPass(passManager->dumpModel("hwConvTiling"));
+
+        pipeline.addPass(passManager->adjustDataLayout());
+        pipeline.addPass(passManager->dumpModel("adjustDataLayout"));
+
+        pipeline.addPass(passManager->processSpecialStages());
+        pipeline.addPass(passManager->dumpModel("processSpecialStages"));
+
+        pipeline.addPass(passManager->adjustDataLocation());
+        pipeline.addPass(passManager->dumpModel("adjustDataLocation"));
+
+        pipeline.addPass(passManager->finalCheck());
+    }
+};
+
+TEST_F(VPU_SplitLargeKernelConvTest, splitLargeKernelConvIfKernelSizeIs1x16) {
+    InitCompileEnv();
+    InitConvStage();
+
+    ASSERT_NO_THROW(pipeline.run(model));
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/inference_engine/CMakeLists.txt b/inference-engine/tests/unit/inference_engine/CMakeLists.txt

index 55e52b5..629a2ce 100644 (file)
--- a/inference-engine/tests/unit/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/unit/inference_engine/CMakeLists.txt
@@ -11,5 +11,5 @@ addIeTargetTest(
              unitTestUtils
          ADD_CPPLINT
          LABELS
-            INFERENCE_ENGINE
+            IE
  )
 \ No newline at end of file
diff --git a/inference-engine/tests/unit/vpu/frontend_tests/dsr_parsing_tests.cpp b/inference-engine/tests/unit/vpu/frontend_tests/dsr_parsing_tests.cpp

index 8c9de8b..b4fbbb5 100644 (file)
--- a/inference-engine/tests/unit/vpu/frontend_tests/dsr_parsing_tests.cpp
+++ b/inference-engine/tests/unit/vpu/frontend_tests/dsr_parsing_tests.cpp
@@ -24,6 +24,21 @@ protected:
          _testModel = CreateTestModel();
      }
  
+    void checkShapeConnection(const Data& parent, const Data& child) {
+        ASSERT_NE(child->parentDataToShapeEdge(), nullptr);
+        ASSERT_EQ(child->childDataToShapeEdges().size(), 0);
+        const auto& parentDataToShapeEdge = child->parentDataToShapeEdge();
+        ASSERT_EQ(parentDataToShapeEdge->parent(), parent);
+        ASSERT_EQ(parentDataToShapeEdge->child(), child);
+
+        ASSERT_EQ(parent->parentDataToShapeEdge(), nullptr);
+
+        const auto& childDataToShapeEdges = parent->childDataToShapeEdges();
+
+        const auto& it = std::find(childDataToShapeEdges.begin(), childDataToShapeEdges.end(), parentDataToShapeEdge);
+        ASSERT_NE(it, childDataToShapeEdges.end());
+    }
+
      ie::CNNLayerPtr createDSRLayer() {
          return std::make_shared<ie::CNNLayer>(ie::LayerParams{"DSR", "DynamicShapeResolver", ie::Precision::I32});
      }
@@ -97,25 +112,34 @@ TEST_F(DSRParsingTests, DSRParserDoesntAssertOnCorrectIO) {
                                         {inputStage->output(0), inputStage->output(1)}, _testModel.getOutputs()));
  }
  
-class DSRParsingFromNgraphTests : public DSRParsingTests {
-protected:
-    void checkShapeConnection(const Data& parent, const Data& child) {
-        ASSERT_NE(child->parentDataToShapeEdge(), nullptr);
-        ASSERT_EQ(child->childDataToShapeEdges().size(), 0);
-        const auto& parentDataToShapeEdge = child->parentDataToShapeEdge();
-        ASSERT_EQ(parentDataToShapeEdge->parent(), parent);
-        ASSERT_EQ(parentDataToShapeEdge->child(), child);
+TEST_F(DSRParsingTests, DSRParserPreservesConnectionsOnOutputDSR) {
+    _testModel.createInputs({_dataDesc});
+    _testModel.createOutputs({_dataDesc});
  
-        ASSERT_EQ(parent->parentDataToShapeEdge(), nullptr);
+    const auto& model = _testModel.getBaseModel();
  
-        const auto& childDataToShapeEdges = parent->childDataToShapeEdges();
-        ASSERT_EQ(childDataToShapeEdges.size(), 1);
+    const auto& inputStage = _testModel.addStage({InputInfo::fromNetwork(0)},
+                                                 {OutputInfo::intermediate(_dataDesc), OutputInfo::intermediate(_correstShapeDesc)});
  
-        ASSERT_EQ(childDataToShapeEdges.front(), parentDataToShapeEdge);
-    }
-};
+    model->connectDataWithShape(inputStage->output(1), inputStage->output(0));
+
+    checkShapeConnection(inputStage->output(1), inputStage->output(0));
+
+    const auto& outputStage = _testModel.addStage({InputInfo::fromPrevStage(0)},
+                                                  {OutputInfo::intermediate(_dataDesc)});
+
+    const auto& dsrLayer = createDSRLayer();
+
+    ASSERT_NO_THROW(frontEnd->parseDSR(_testModel.getBaseModel(), dsrLayer,
+    {outputStage->output(0), inputStage->output(1)}, _testModel.getOutputs()));
+
+    checkShapeConnection(inputStage->output(1), inputStage->output(0));
+    checkShapeConnection(inputStage->output(1), outputStage->output(0));
+}
  
-TEST_F(DSRParsingFromNgraphTests, DSRParserCreatesTwoOutputsOnOutputDSR) {
+typedef DSRParsingTests DSRParsingFromNgraphTests;
+
+TEST_F(DSRParsingFromNgraphTests, DSRParserCreatesAndConnectsTwoOutputsOnOutputDSR) {
      const auto& inPrecision = ::ngraph::element::Type(::ngraph::element::Type_t::i32);
  
      const auto& tensor = std::make_shared<ngraph::opset3::Parameter>(inPrecision, ngraph::Shape{1, 800});
@@ -139,6 +163,15 @@ TEST_F(DSRParsingFromNgraphTests, DSRParserCreatesTwoOutputsOnOutputDSR) {
          }
      }
      ASSERT_EQ(numOutputs, 2);
+
+    const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+        return stage->type() == StageType::NonZero;
+    });
+
+    ASSERT_NE(it, model->getStages().end());
+    const auto& nonZeroStage = *it;
+
+    checkShapeConnection(nonZeroStage->output(1), nonZeroStage->output(0));
  }
  
  TEST_F(DSRParsingFromNgraphTests, DSRWithSingleProducerCreatesConnectionBetweenDataAndShape) {
@@ -159,16 +192,12 @@ TEST_F(DSRParsingFromNgraphTests, DSRWithSingleProducerCreatesConnectionBetweenD
      ModelPtr model;
      ASSERT_NO_THROW(model = frontEnd->buildInitialModel(cnnNet));
  
-    Stage nonZeroStage = nullptr;
+    const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+        return stage->type() == StageType::NonZero;
+    });
  
-    for (const auto& stage : model->getStages()) {
-        if (stage->type() != StageType::NonZero) {
-            continue;
-        }
-        nonZeroStage = stage;
-    }
-
-    ASSERT_NE(nonZeroStage, nullptr);
+    ASSERT_NE(it, model->getStages().end());
+    const auto& nonZeroStage = *it;
  
      checkShapeConnection(nonZeroStage->output(1), nonZeroStage->output(0));
  }
@@ -193,16 +222,12 @@ TEST_F(DSRParsingFromNgraphTests, DSRWithTwoProducersCreatesConnectionBetweenDat
      ModelPtr model;
      ASSERT_NO_THROW(model = frontEnd->buildInitialModel(cnnNet));
  
-    Stage nonZeroStage = nullptr;
-
-    for (const auto& stage : model->getStages()) {
-        if (stage->type() != StageType::NonZero) {
-            continue;
-        }
-        nonZeroStage = stage;
-    }
+    const auto& it = std::find_if(model->getStages().begin(), model->getStages().end(), [](const Stage& stage) {
+        return stage->type() == StageType::NonZero;
+    });
  
-    ASSERT_NE(nonZeroStage, nullptr);
+    ASSERT_NE(it, model->getStages().end());
+    const auto& nonZeroStage = *it;
  
      const auto& stageReluData = nonZeroStage->output(0)->singleConsumer();
      const auto& stageReluShape = nonZeroStage->output(1)->singleConsumer();
diff --git a/inference-engine/tests/unit/vpu/middleend_tests/edges_tests/stage_dependency_edge.cpp b/inference-engine/tests/unit/vpu/middleend_tests/edges_tests/stage_dependency_edge.cpp

index e41b52f..80b6bd1 100644 (file)
--- a/inference-engine/tests/unit/vpu/middleend_tests/edges_tests/stage_dependency_edge.cpp
+++ b/inference-engine/tests/unit/vpu/middleend_tests/edges_tests/stage_dependency_edge.cpp
@@ -24,7 +24,7 @@ protected:
      PassSet::Ptr _middleEnd = nullptr;
  };
  
-TEST_F(StageDependencyEdgeProcessingTests, AddStageDependencyAssertsOnOutputData) {
+TEST_F(StageDependencyEdgeProcessingTests, AddStageDependencyDoesNotAssertOnOutputData) {
      //
      //                    -> [Data] -> (Stage) -> [Output]
      // [Input] -> (Stage)                            |
@@ -43,7 +43,7 @@ TEST_F(StageDependencyEdgeProcessingTests, AddStageDependencyAssertsOnOutputData
  
      auto model = _testModel.getBaseModel();
  
-    ASSERT_ANY_THROW(model->addStageDependency(dependentStage, dependencyProducer->output(0)));
+    ASSERT_NO_THROW(model->addStageDependency(dependentStage, dependencyProducer->output(0)));
  }
  
  TEST_F(StageDependencyEdgeProcessingTests, NetWithTwoStagesHasCorrectExecOrder) {
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp

index 8a8876d..4c8ff9a 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp
@@ -4,7 +4,7 @@
  
  #include "holders_tests.hpp"
  
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
      // 0 - plugin
      // 1 - executable_network
      // 2 - infer_request
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp

index d72defc..3cf345d 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
@@ -6,9 +6,9 @@
  #include "behavior_test_plugins.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp

index 01cde8f..718d099 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
@@ -6,13 +6,13 @@
  #include "cldnn_test_data.hpp"
  
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
                          ValuesIn(supportedValues),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp

index f41607d..9979c08 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
@@ -7,7 +7,7 @@
  
  // Disabled due to a bug on CentOS that leads to segmentation fault of application on exit
  // when perf counters are enabled
-//INSTANTIATE_TEST_CASE_P(
+//INSTANTIATE_TEST_CASE_P(smoke_
  //        BehaviorTest,
  //        BehaviorPluginTestExecGraphInfo,
  //        ValuesIn(supportedValues),
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp

index 220d66d..d6312a9 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_infer_request.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp

index 919fccd..1f78f6d 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_callback.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp

index 9e5800d..5233805 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_config.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
                          getConfigTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp

index 0197166..d71e2d5 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_input.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp

index 9719941..1ebe622 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_output.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp

index d35e5b8..ad2113f 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
@@ -10,7 +10,7 @@ memory_test_params memory_test_cases[] = {
  
  // FIXME
  //#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
  //    ::testing::ValuesIn(memory_test_cases),
  //    getTestName<memory_test_params>);
  //#endif
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp

index 9336739..ce4f865 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
@@ -14,8 +14,8 @@ layout_test_params conv_test_cases[] = {
      layout_test_params("GPU", "FP32", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 1, 2, 2)),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
      ::testing::ValuesIn(power_test_cases), getTestName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
      ::testing::ValuesIn(conv_test_cases), getTestName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_perf_counters.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_perf_counters.cpp

index dc309f4..1ac533a 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_perf_counters.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_perf_counters.cpp
@@ -7,7 +7,7 @@
  
  // Disabled due to a bug on CentOS that leads to segmentation fault of application on exit
  // when perf counters are enabled
-//INSTANTIATE_TEST_CASE_P(
+//INSTANTIATE_TEST_CASE_P(smoke_
  //        BehaviorTest,
  //        BehaviorPluginTestPerfCounters,
  //        ValuesIn(supportedValues),
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp

index 75009b9..14d5d03 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
@@ -5,7 +5,7 @@
  #include "behavior_test_plugin_set_preprocess.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
                          BehaviorPluginTestPreProcess,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp

index 3aa7335..51b6f82 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
@@ -5,8 +5,8 @@
  #include "behavior_test_plugin_unsupported.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
      getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
      getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp

index 1915219..a9857f6 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_version.hpp"
  #include "cldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/cldnn_test_data.hpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/cldnn_test_data.hpp

index 1b1ec48..81df192 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/cldnn_test_data.hpp
+++ b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/cldnn_test_data.hpp
@@ -14,19 +14,34 @@
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
                                   Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+const std::map<std::string, std::string> multi_device_conf = {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}};
+#define BEH_MULTI BehTestParams("MULTI", \
+                                FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+                                FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+                                Precision::FP32, \
+                                multi_device_conf)
  
  // all parameters are unsupported - reversed
  #define BEH_US_ALL_CLDNN   BehTestParams("GPU", \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
                                           Precision::Q78)
+#define BEH_US_ALL_MULTI   BehTestParams("MULTI", \
+                                         FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+                                         FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+                                         Precision::Q78, \
+                                         multi_device_conf)
  
  const BehTestParams supportedValues[] = {
          BEH_CLDNN,
+        BEH_MULTI,
  };
  
  const BehTestParams requestsSupportedValues[] = {
          BEH_CLDNN,
+        BEH_MULTI,
  };
  
  const BehTestParams allInputSupportedValues[] = {
@@ -36,19 +51,34 @@ const BehTestParams allInputSupportedValues[] = {
          BEH_CLDNN.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
          BEH_CLDNN.withIn(Precision::I16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
          BEH_CLDNN.withIn(Precision::I32).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI, BEH_MULTI.withIn(Precision::FP16), BEH_MULTI.withIn(Precision::U8), BEH_MULTI.withIn(Precision::I16),
+        BEH_MULTI.withIn(Precision::I32),
+        BEH_MULTI.withIn(Precision::U8).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                                                    {KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO},
+                                                      {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
+        BEH_MULTI.withIn(Precision::I16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO},
+                                                     {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
+        BEH_MULTI.withIn(Precision::I32).withConfig({{KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO},
+                                                     {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
  };
  
  const BehTestParams allOutputSupportedValues[] = {
          BEH_CLDNN, BEH_CLDNN.withOut(Precision::FP16),
          BEH_CLDNN.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS, GPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI, BEH_MULTI.withOut(Precision::FP16),
+        BEH_MULTI.withIn(Precision::FP16).withConfig({{KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO},
+                                                      {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"}}),
  };
  
  const BehTestParams typeUnSupportedValues[] = {
          BEH_CLDNN.withIn(Precision::Q78), BEH_CLDNN.withIn(Precision::I8),
+        BEH_MULTI.withIn(Precision::Q78), BEH_MULTI.withIn(Precision::I8),
  };
  
  const BehTestParams allUnSupportedValues[] = {
          BEH_US_ALL_CLDNN,
+        BEH_US_ALL_MULTI,
  };
  
  const std::vector<BehTestParams> withCorrectConfValues = {
@@ -63,6 +93,23 @@ const std::vector<BehTestParams> withCorrectConfValues = {
          BEH_CLDNN.withConfig({{KEY_TUNING_MODE, TUNING_CREATE},
                                {KEY_TUNING_FILE, "tfile"}}),
          BEH_CLDNN.withConfig({{KEY_DEVICE_ID, "0"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_GPU_THROUGHPUT_STREAMS,                           GPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_GPU_THROUGHPUT_STREAMS,                           "2"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_PERF_COUNT,                                       NO}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_DUMP_KERNELS,                                     NO}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_DUMP_KERNELS,                                     YES}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_TUNING_MODE,                                      TUNING_DISABLED}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_TUNING_MODE,                                      TUNING_CREATE},
+                              {KEY_TUNING_FILE,                                      "tfile"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_DEVICE_ID,                                        "0"}}),
  };
  
  const BehTestParams withIncorrectConfValues[] = {
@@ -75,6 +122,18 @@ const BehTestParams withIncorrectConfValues[] = {
          // FIXME: [IE clDNN] The plugin doesn't throw GENERAL_ERROR if use non-exist tuning file. CVS-8593
          //BEH_CLDNN.withConfig({ { KEY_TUNING_MODE, TUNING_USE_EXISTING },
          //                       { KEY_TUNING_FILE, "unknown_file" } }),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_GPU_THROUGHPUT_STREAMS,                           "OFF"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_PERF_COUNT,                                       "ON"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_CONFIG_FILE,                                      "unknown_file"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_DUMP_KERNELS,                                     "ON"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_TUNING_MODE,                                      "TUNING_UNKNOWN_MODE"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "GPU"},
+                              {KEY_DEVICE_ID,                                        "DEVICE_UNKNOWN"}})
  };
  
  const std::vector<BehTestParams> withCorrectConfValuesNetworkOnly = {
@@ -83,4 +142,5 @@ const std::vector<BehTestParams> withCorrectConfValuesNetworkOnly = {
  
  const BehTestParams withIncorrectConfKeys[] = {
          BEH_CLDNN.withIncorrectConfigItem(),
+        BEH_MULTI.withIncorrectConfigItem(),
  };
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/cpp_wrappers/holders_tests.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/cpp_wrappers/holders_tests.cpp

index 7b7ed39..75e76e2 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/cpp_wrappers/holders_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/cpp_wrappers/holders_tests.cpp
@@ -4,7 +4,7 @@
  
  #include "holders_tests.hpp"
  
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
      // 0 - plugin
      // 1 - executable_network
      // 2 - infer_request
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp

index ce4219d..5e8a3f7 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
@@ -6,9 +6,9 @@
  #include "behavior_test_plugins.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp

index fe72b8e..bff01fb 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
@@ -5,13 +5,13 @@
  #include "behavior_test_plugin_config.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
                          ValuesIn(withIncorrectConfKeys),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp

index 9341140..c8099ab 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
@@ -6,7 +6,7 @@
  #include "gna_test_data.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        BehaviorTest,
+        smoke_BehaviorTest,
          BehaviorPluginTestExecGraphInfo,
          ValuesIn(supportedValues),
          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp

index 3a2a25c..9918dc5 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_infer_request.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp

index 906805e..17c4deb 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
@@ -6,5 +6,5 @@
  #include "gna_test_data.hpp"
  
  // TODO: support InferRequestCallback in GNAPlugin
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
  //                        getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp

index 6eb64e9..67ae245 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
@@ -5,11 +5,11 @@
  #include "behavior_test_plugin_infer_request_config.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
                          ValuesIn(withCorrectConfValues),
                          getConfigTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
                          getConfigTestCaseName);
  
  bool CheckGnaHw() {
@@ -38,6 +38,6 @@ TEST_P(BehaviorPluginTestInferRequestWithGnaHw, CanInferOrFailWithGnaHw) {
      }
  }
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestWithGnaHw,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestWithGnaHw,
                          ValuesIn(withGnaHwConfValue),
                          getConfigTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp

index 235c647..2c9c0d1 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_input.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp

index bb12332..d9c2220 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_output.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp

index bbda316..d7dca53 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
@@ -10,7 +10,7 @@ conv_test_params deconv_test_cases[] = {
          conv_test_params(CommonTestUtils::DEVICE_GNA, conv_case)
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, DeconvolutionLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, DeconvolutionLayerTest,
                          ::testing::ValuesIn(deconv_test_cases),
                          getTestName<conv_test_params>);
  
@@ -19,17 +19,17 @@ pool_test_params roi_pool_test_cases[] = {
  };
  
  // TODO: fix this
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, PoolingLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, PoolingLayerTest,
  //                        ::testing::Values(pool_test_params("GNAPlugin", "FP32", pool_case)),
  //                        getTestName<pool_test_params>);
  //
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, ReLULayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ReLULayerTest,
  //                        ::testing::Values(activ_test_params("GNAPlugin", "FP32", activation_case)),
  //                        getTestName<activ_test_params>);
  
  // FIXME
  //#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
  //    ::testing::ValuesIn(memory_test_cases),
  //    getTestName<memory_test_params>);
  //#endif
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp

index 385fc84..7e621e7 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
@@ -11,5 +11,5 @@ layout_test_params activ_test_cases[] = {
      layout_test_params(CommonTestUtils::DEVICE_GNA, "FP32", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadActiv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadActiv,
      ::testing::ValuesIn(activ_test_cases), getTestName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp

index 6220d69..56b4c50 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
@@ -5,10 +5,10 @@
  #include "behavior_test_plugin_unsupported.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
      getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
      getTestCaseName);
-    INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp

index 7b3c76d..2900cff 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_version.hpp"
  #include "gna_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp

index 4a48d06..4d4dc98 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/cpp_wrappers/holders_tests.cpp
@@ -4,7 +4,7 @@
  
  #include "holders_tests.hpp"
  
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
      // 0 - plugin
      // 1 - executable_network
      // 2 - infer_request
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp

index 3ee37d6..471137b 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
@@ -6,9 +6,9 @@
  #include "behavior_test_plugins.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp

index 487dc47..bdabdb8 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
@@ -5,17 +5,17 @@
  #include "behavior_test_plugin_config.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTest,
                          ValuesIn(BehTestParams::concat(withCorrectConfValues, withCorrectConfValuesPluginOnly)),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
                          ValuesIn(withIncorrectConfKeys),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp

index ec20b0c..af7aa87 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
@@ -6,7 +6,7 @@
  #include "mkldnn_test_data.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        BehaviorTest,
+        smoke_BehaviorTest,
          BehaviorPluginTestExecGraphInfo,
          ValuesIn(supportedValues),
          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp

index 41bd3e3..f74a84c 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_infer_request.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp

index ee71646..fe0f54b 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_callback.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp

index d1d7c86..408d020 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
@@ -5,10 +5,10 @@
  #include "behavior_test_plugin_infer_request_config.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
                          ValuesIn(BehTestParams::concat(withCorrectConfValues, withCorrectConfValuesNetworkOnly)),
                          getConfigTestCaseName);
  
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
                          getConfigTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp

index 088844f..12f48ef 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_input.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp

index 7bc7022..575a865 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_output.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp

index 5e47e3a..2bd8e2d 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
@@ -8,7 +8,7 @@ pool_test_params roi_pool_test_cases[] = {
          pool_test_params(CommonTestUtils::DEVICE_CPU, "FP32", pool_case),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ROIPoolingLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ROIPoolingLayerTest,
                          ::testing::ValuesIn(roi_pool_test_cases),
                          getTestName<pool_test_params>);
  
@@ -16,11 +16,11 @@ activ_test_params activ_test_cases[] = {
          activ_test_params(CommonTestUtils::DEVICE_CPU, "FP32", activation_case),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ActivationLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ActivationLayerTest,
                          ::testing::ValuesIn(activ_test_cases),
                          getTestName<activ_test_params>);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ReLULayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ReLULayerTest,
                          ::testing::Values(activ_test_params("CPU", "FP32", activation_case)),
                          getTestName<activ_test_params>);
  
@@ -28,6 +28,6 @@ norm_test_params norm_test_cases[] = {
          norm_test_params(CommonTestUtils::DEVICE_CPU, "FP32", norm_case),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, NormalizeLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, NormalizeLayerTest,
                          ::testing::ValuesIn(norm_test_cases),
                          getTestName<norm_test_params>);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp

index 49750d1..efb38cf 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
@@ -21,9 +21,9 @@ layout_test_params conv_neg_test_cases[] = {
      layout_test_params(CommonTestUtils::DEVICE_CPU, "FP32", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
      ::testing::ValuesIn(power_test_cases), getTestName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
      ::testing::ValuesIn(conv_test_cases), getTestName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanNotLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanNotLoadConv,
      ::testing::ValuesIn(conv_neg_test_cases), getTestName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp

index 266f1b0..17369e1 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
@@ -5,7 +5,7 @@
  #include "behavior_test_plugin_set_preprocess.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
                          BehaviorPluginTestPreProcess,
                          ValuesIn(requestsSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp

index aadb3dc..9ca1d5d 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
@@ -5,8 +5,8 @@
  #include "behavior_test_plugin_unsupported.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
      getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
      getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp

index 8d05905..3811e22 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_version.hpp"
  #include "mkldnn_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/mkldnn_test_data.hpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/mkldnn_test_data.hpp

index 7f7f36e..71539e2 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/mkldnn_test_data.hpp
+++ b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/mkldnn_test_data.hpp
@@ -17,21 +17,39 @@
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
                                   Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+// Yet the input precision FP16 is not supported by the CPU yet
+const std::map<std::string, std::string> multi_device_conf = {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}};
+#define BEH_MULTI BehTestParams("MULTI", \
+                                FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+                                FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+                                Precision::FP32, \
+                                multi_device_conf)
  
  // all parameters are unsupported - reversed
  #define BEH_US_ALL_MKLDNN  BehTestParams("CPU", \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
                                           Precision::Q78)
+#define BEH_US_ALL_MULTI   BehTestParams("MULTI", \
+                                         FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+                                         FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+                                         Precision::Q78, \
+                                         multi_device_conf)
  
  const BehTestParams supportedValues[] = {
          BEH_MKLDNN,
+        BEH_MULTI,
  };
  
  const BehTestParams requestsSupportedValues[] = {
          BEH_MKLDNN,
          // the following adds additional test the MKLDNNGraphlessInferRequest (explicitly created for streams)
          BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+        BEH_MKLDNN.withConfig({{CONFIG_KEY(CPU_THROUGHPUT_STREAMS),"0"},
+                               {CONFIG_KEY(CPU_THREADS_NUM), "1"}}),
+        BEH_MULTI,
  };
  
  const BehTestParams allInputSupportedValues[] = {
@@ -50,21 +68,36 @@ const BehTestParams allInputSupportedValues[] = {
          BEH_MKLDNN_FP16.withIn(Precision::U8).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
          BEH_MKLDNN_FP16.withIn(Precision::U16).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
          BEH_MKLDNN_FP16.withIn(Precision::I16).withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI,
+        BEH_MULTI.withIn(Precision::U8),
+        BEH_MULTI.withIn(Precision::U16),
+        BEH_MULTI.withIn(Precision::I16),
+        BEH_MULTI.withIn(Precision::U8).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                                                    {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withIn(Precision::U16).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                                                     {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withIn(Precision::I16).withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                                                     {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_AUTO}}),
  };
  
  const BehTestParams allOutputSupportedValues[] = {
          BEH_MKLDNN,
          // the following withConfig test checks the MKLDNNGraphlessInferRequest (explicitly created for streams)
          BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, CPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withOut(Precision::FP32),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_AUTO}}),
  };
  
  const BehTestParams typeUnSupportedValues[] = {
          BEH_MKLDNN.withIn(Precision::Q78),
          BEH_MKLDNN_FP16,
+        BEH_MULTI.withIn(Precision::Q78),
  };
  
  const BehTestParams allUnSupportedValues[] = {
          BEH_US_ALL_MKLDNN,
+        BEH_US_ALL_MULTI,
  };
  
  const std::vector<BehTestParams> withCorrectConfValues = {
@@ -74,20 +107,40 @@ const std::vector<BehTestParams> withCorrectConfValues = {
          BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, NO}}),
          BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, YES}}),
          BEH_MKLDNN.withConfig({{KEY_DYN_BATCH_LIMIT, "10"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_NUMA}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_THROUGHPUT_STREAMS,                           CPU_THROUGHPUT_AUTO}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_THROUGHPUT_STREAMS,                           "8"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_BIND_THREAD,                                  NO}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_BIND_THREAD,                                  YES}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_DYN_BATCH_LIMIT,                                  "10"}}),
  };
  
  const BehTestParams withIncorrectConfValues[] = {
          BEH_MKLDNN.withConfig({{KEY_CPU_THROUGHPUT_STREAMS, "OFF"}}),
          BEH_MKLDNN.withConfig({{KEY_CPU_BIND_THREAD, "OFF"}}),
          BEH_MKLDNN.withConfig({{KEY_DYN_BATCH_LIMIT, "NAN"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_THROUGHPUT_STREAMS,                           "OFF"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_CPU_BIND_THREAD,                                  "OFF"}}),
+        BEH_MULTI.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"},
+                              {KEY_DYN_BATCH_LIMIT,                                  "NAN"}}),
  };
  
  const std::vector<BehTestParams> withCorrectConfValuesPluginOnly;
  
  const std::vector<BehTestParams> withCorrectConfValuesNetworkOnly = {
          BEH_MKLDNN.withConfig({}),
+        BEH_MULTI
  };
  
  const BehTestParams withIncorrectConfKeys[] = {
          BEH_MKLDNN.withIncorrectConfigItem(),
+        BEH_MULTI.withIncorrectConfigItem(),
  };
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/shared_tests/CMakeLists.txt

index c570cb9..942efa4 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/CMakeLists.txt
@@ -11,7 +11,7 @@ file(GLOB_RECURSE SHARED_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
                                     ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
  
  add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
-add_dependencies(${TARGET_NAME} inference_engine_preproc)
+add_dependencies(${TARGET_NAME} MultiDevicePlugin inference_engine_preproc)
  
  target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/plugin_tests")
  
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h

index c08d9ee..a1e593e 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin.h
@@ -14,7 +14,7 @@
  #include <gna/gna_config.hpp>
  #include <multi-device/multi_device_config.hpp>
  #include <cpp_interfaces/exception2status.hpp>
-#include <tests_utils.hpp>
+#include <common_test_utils/test_assertions.hpp>
  #include <memory>
  #include <fstream>
  
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_config.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_config.hpp

index 4edc6b5..7b43063 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_config.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_config.hpp
@@ -193,7 +193,7 @@ TEST_P(BehaviorPluginCorrectConfigTestInferRequestAPI, reusableCPUStreamsExecuto
              // for multi-device the number of Executors is not known (defined by the devices configuration)
          } else {
              ASSERT_EQ(0u, ExecutorManager::getInstance()->getExecutorsNumber());
-            ASSERT_GE(1u, ExecutorManager::getInstance()->getIdleCPUStreamsExecutorsNumber());
+            ASSERT_GE(2u, ExecutorManager::getInstance()->getIdleCPUStreamsExecutorsNumber());
          }
    }
      if (GetParam().device == CommonTestUtils::DEVICE_CPU) {
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request.hpp

index 38d520e..b0fea61 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request.hpp
@@ -13,8 +13,11 @@ using namespace InferenceEngine::details;
  
  namespace {
  std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
-    return obj.param.device + "_" + obj.param.input_blob_precision.name()
-           + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
+    std::string config;
+    for (auto&& cfg : obj.param.config) {
+        config += "_" + cfg.first + "_" + cfg.second;
+    }
+    return obj.param.device + "_" + obj.param.input_blob_precision.name() + config;
  }
  }
  
@@ -500,6 +503,15 @@ TEST_P(BehaviorPluginTestInferRequest, canWaitWithotStartAsync) {
  
  TEST_P(BehaviorPluginTestInferRequest, returnDeviceBusyOnSetBlobAfterAsyncInfer) {
      SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    auto&& config = GetParam().config;
+    auto itConfig = config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
+    if (itConfig != config.end()) {
+        if (itConfig->second != "CPU_THROUGHPUT_AUTO") {
+            if (std::stoi(itConfig->second) == 0) {
+                GTEST_SKIP() << "Not applicable with disabled streams";
+            }
+        }
+    }
      TestEnv::Ptr testEnv;
      ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
      Blob::Ptr input;
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp

index 08c98f7..75565ea 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp
@@ -13,8 +13,11 @@ using namespace InferenceEngine::details;
  
  namespace {
  std::string getTestCaseName(testing::TestParamInfo<BehTestParams> obj) {
-    return obj.param.device + "_" + obj.param.input_blob_precision.name()
-                + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : "");
+    std::string config;
+    for (auto&& cfg : obj.param.config) {
+        config += "_" + cfg.first + "_" + cfg.second;
+    }
+    return obj.param.device + "_" + obj.param.input_blob_precision.name() + config;
  }
  }
  
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp

index 8b17573..0881e51 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp
@@ -3,7 +3,6 @@
  //
  
  #include "behavior_test_plugin.h"
-#include <test_assertions.hpp>
  
  using namespace std;
  using namespace ::testing;
diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugins.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugins.hpp

index 99c225e..f94a71a 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugins.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugins.hpp
@@ -68,7 +68,7 @@ IE_SUPPRESS_DEPRECATED_END
  class BehaviorPluginTestInput : public BehaviorPluginTest { };
  class BehaviorPluginTestOutput : public BehaviorPluginTest { };
  
-TEST_F(BehaviorPluginTest, AllocateNullBlob) {
+TEST_F(BehaviorPluginTest, smoke_llocateNullBlob) {
      TensorDesc tdesc = TensorDesc(Precision::FP32, NCHW);
      InferenceEngine::TBlob<float> blob(tdesc);
      ASSERT_NO_THROW(blob.allocate());
diff --git a/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt

index 99c6d89..217a2aa 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt
@@ -66,8 +66,8 @@ function(enable_vpu TARGET_NAME FLAG_NAME PLUGIN_NAME)
          target_link_libraries(${TARGET_NAME} PRIVATE mvnc)
      endif()
  
-    add_test(NAME ${TARGET_NAME}
-            COMMAND ${TARGET_NAME})
+    add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+    set_property(TEST ${TARGET_NAME} PROPERTY LABELS VPU MYRIAD)
  
      add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
  endfunction(enable_vpu)
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp

index 66f7972..40eb168 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp
@@ -204,6 +204,6 @@ const BehTestParams vpuValues[] = {
      BEH_MYRIAD,
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, AOTBehaviorTests, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, AOTBehaviorTests, ValuesIn(vpuValues), getTestCaseName);
  
  #endif
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_boot_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_boot_tests.cpp

index 8525def..20142b8 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_boot_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_boot_tests.cpp
@@ -112,4 +112,4 @@ const BehTestParams vpuValues[] = {
          BEH_MYRIAD,
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, MYRIADBoot, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MYRIADBoot, ValuesIn(vpuValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_get_metric_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_get_metric_tests.cpp

index 890fb3b..6555a67 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_get_metric_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_get_metric_tests.cpp
@@ -26,7 +26,7 @@ protected:
      }
  };
  
-TEST_F(VPUGetMetric, GetThermalStatsFromNetwork) {
+TEST_F(VPUGetMetric, smoke_GetThermalStatsFromNetwork) {
      const auto exe_network = loadNetworkOnDevice("MYRIAD");
  
      auto result = Parameter{};
@@ -36,7 +36,7 @@ TEST_F(VPUGetMetric, GetThermalStatsFromNetwork) {
      ASSERT_GT(result.as<float>(), 0);
  }
  
-TEST_F(VPUGetMetric, GetThermalStatsFromPlugin) {
+TEST_F(VPUGetMetric, smoke_GetThermalStatsFromPlugin) {
      std::vector<std::string> availableDevices;
      ASSERT_NO_THROW(availableDevices = getAvailableDevices());
      ASSERT_TRUE(!availableDevices.empty());
@@ -53,7 +53,7 @@ TEST_F(VPUGetMetric, GetThermalStatsFromPlugin) {
      }
  }
  
-TEST_F(VPUGetMetric, ThermalStatsFromPluginWithIncorrectID) {
+TEST_F(VPUGetMetric, smoke_ThermalStatsFromPluginWithIncorrectID) {
      std::vector<std::string> availableDevices;
      ASSERT_NO_THROW(availableDevices = getAvailableDevices());
      ASSERT_TRUE(!availableDevices.empty());
@@ -70,7 +70,7 @@ TEST_F(VPUGetMetric, ThermalStatsFromPluginWithIncorrectID) {
      ASSERT_TRUE(result.empty());
  }
  
-TEST_F(VPUGetMetric, ThermalStatsFromPluginWithoutLoadedNetwork) {
+TEST_F(VPUGetMetric, smoke_ThermalStatsFromPluginWithoutLoadedNetwork) {
      std::vector<std::string> availableDevices;
      ASSERT_NO_THROW(availableDevices = getAvailableDevices());
      ASSERT_TRUE(!availableDevices.empty());
@@ -83,7 +83,7 @@ TEST_F(VPUGetMetric, ThermalStatsFromPluginWithoutLoadedNetwork) {
      ASSERT_TRUE(result.empty());
  }
  
-TEST_F(VPUGetMetric, MyriadGetAvailableDevices) {
+TEST_F(VPUGetMetric, smoke_MyriadGetAvailableDevices) {
      std::vector<std::string> availableDevices;
      ASSERT_NO_THROW(availableDevices = getAvailableDevices());
      ASSERT_TRUE(!availableDevices.empty());
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_load_network_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_load_network_tests.cpp

index eb4baab..fa47068 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_load_network_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_load_network_tests.cpp
@@ -8,12 +8,12 @@
  #include "behavior_test_plugin.h"
  #include "helpers/myriad_load_network_case.hpp"
  
-TEST_F(MyriadLoadNetworkTestCase, ReloadPlugin) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ReloadPlugin) {
      ASSERT_NO_THROW(LoadNetwork());
      ASSERT_NO_THROW(LoadNetwork());
  }
  
-TEST_F(MyriadLoadNetworkTestCase, SimpleLoading) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_SimpleLoading) {
      auto devices = getDevicesList();
      ASSERT_TRUE(devices.size());
  
@@ -28,7 +28,7 @@ TEST_F(MyriadLoadNetworkTestCase, SimpleLoading) {
      ASSERT_TRUE(!IsDeviceAvailable(device_to_load));
  }
  
-TEST_F(MyriadLoadNetworkTestCase, LoadingAtTheSameDevice) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_LoadingAtTheSameDevice) {
      auto devices = getDevicesList();
      ASSERT_TRUE(devices.size());
  
@@ -46,7 +46,7 @@ TEST_F(MyriadLoadNetworkTestCase, LoadingAtTheSameDevice) {
                          ie->LoadNetwork(cnnNetwork, "MYRIAD", config));
  }
  
-TEST_F(MyriadLoadNetworkTestCase, ThrowsExeptionWhenNameIsInvalid) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ThrowsExeptionWhenNameIsInvalid) {
      auto device_to_load = "SomeVeryBadName";
      std::map<std::string, std::string> config = {
          {KEY_DEVICE_ID, device_to_load},
@@ -56,7 +56,7 @@ TEST_F(MyriadLoadNetworkTestCase, ThrowsExeptionWhenNameIsInvalid) {
          ie->LoadNetwork(cnnNetwork, "MYRIAD", config));
  }
  
-TEST_F(MyriadLoadNetworkTestCase, ThrowsExeptionWhenPlatformConflictWithProtocol) {
+TEST_F(MyriadLoadNetworkTestCase, smoke_ThrowsExeptionWhenPlatformConflictWithProtocol) {
      std::string wrong_platform;
      auto devices = getDevicesList();
      ASSERT_TRUE(devices.size());
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_protocol_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_protocol_tests.cpp

index 76e201a..d66b56e 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_protocol_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_protocol_tests.cpp
@@ -26,7 +26,7 @@ TEST_P(MyriadProtocolTests, CanInferenceWithProtocol) {
      ASSERT_EQ(statusCode, StatusCode::OK) << resp.msg;
  }
  
-INSTANTIATE_TEST_CASE_P(VPUConfigProtocolTests,
+INSTANTIATE_TEST_CASE_P(smoke_VPUConfigProtocolTests,
                          MyriadProtocolTests,
                          ::testing::ValuesIn(myriadProtocols),
                          MyriadProtocolTests::getTestCaseName);
 \ No newline at end of file
diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp

index a20cb24..eec34b1 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp
@@ -235,4 +235,4 @@ const BehTestParams vpuValues[] = {
      BEH_MYRIAD,
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, MYRIADWatchdog, ValuesIn(vpuValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MYRIADWatchdog, ValuesIn(vpuValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/cpp_wrappers/holders_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/cpp_wrappers/holders_tests.cpp

index eaa3e59..cd57d39 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/cpp_wrappers/holders_tests.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/cpp_wrappers/holders_tests.cpp
@@ -4,7 +4,7 @@
  
  #include "holders_tests.hpp"
  
-INSTANTIATE_TEST_CASE_P(ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
+INSTANTIATE_TEST_CASE_P(smoke_ReleaseOrderTests, CPP_HoldersTests, testing::Combine(testing::ValuesIn(std::vector<std::vector<int>> {
      // 0 - plugin
      // 1 - executable_network
      // 2 - infer_request
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp

index 4ff0124..44b689c 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin.cpp
@@ -6,9 +6,9 @@
  #include "behavior_test_plugins.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTest, ValuesIn(supportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp

index b33e8e8..f704546 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_config.cpp
@@ -6,7 +6,7 @@
  #include "vpu_test_data.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-    BehaviorTest, BehaviorPluginCorrectConfigTest,
+    smoke_BehaviorTest, BehaviorPluginCorrectConfigTest,
      ValuesIn(
          BehTestParams::concat(
              BehTestParams::concat(deviceSpecificConfigurations, deviceAgnosticConfigurations),
@@ -16,13 +16,13 @@ INSTANTIATE_TEST_CASE_P(
      getTestCaseName
  );
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTest, ValuesIn(withIncorrectConfValues),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginIncorrectConfigTestInferRequestAPI,
                          ValuesIn(withIncorrectConfKeys),
                          getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginCorrectConfigTestInferRequestAPI,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp

index ab60df5..f0eb203 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_exec_graph_info.cpp
@@ -7,7 +7,7 @@
  
  // TODO: currently this tests are not applicable to myriadPlugin
  #if 0
-INSTANTIATE_TEST_CASE_P(
+INSTANTIATE_TEST_CASE_P(smoke_
          BehaviorTest,
          BehaviorPluginTestExecGraphInfo,
          ValuesIn(supportedValues),
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp

index d05de67..0721ad5 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_infer_request.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequest, ValuesIn(requestsSupportedValues), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp

index 24daac1..14db00a 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_callback.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_callback.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestCallback, ValuesIn(requestsSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp

index 7b9db6b..437662c 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_config.cpp
@@ -5,9 +5,9 @@
  #include "behavior_test_plugin_infer_request_config.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfig,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfig,
                          ValuesIn(BehTestParams::concat(deviceAgnosticConfigurations, withCorrectConfValuesNetworkOnly)),
                          getConfigTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestConfigExclusiveAsync, ValuesIn(supportedValues),
                          getConfigTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp

index cdde246..e8633b7 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_input.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_input.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestInput, ValuesIn(allInputSupportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp

index fb68122..c7e891d 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_infer_request_output.cpp
@@ -5,5 +5,5 @@
  #include "behavior_test_plugin_infer_request_output.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestInferRequestOutput, ValuesIn(allOutputSupportedValues),
                          getOutputTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp

index 86c076e..845c885 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layers.cpp
@@ -8,7 +8,7 @@ pool_test_params roi_pool_test_cases[] = {
      pool_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", pool_case),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, ROIPoolingLayerTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, ROIPoolingLayerTest,
                          ::testing::ValuesIn(roi_pool_test_cases),
                          getTestName<pool_test_params>);
  
@@ -18,7 +18,7 @@ memory_test_params memory_test_cases[] = {
  
  // FIXME
  //#if (defined INSTANTIATE_TESTS)
-//INSTANTIATE_TEST_CASE_P(BehaviorTest, MemoryLayerTest,
+//INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, MemoryLayerTest,
  //    ::testing::ValuesIn(memory_test_cases),
  //    getTestName<memory_test_params>);
  //#endif
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp

index c26895a..623fe83 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_layout.cpp
@@ -10,7 +10,7 @@ layout_test_params power_test_cases[] = {
      layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
      layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
  };
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadPower,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadPower,
      ::testing::ValuesIn(power_test_cases), getTestName);
  
  layout_test_params conv_neg_test_cases[] = {
@@ -18,12 +18,12 @@ layout_test_params conv_neg_test_cases[] = {
      layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NC, power_params({ { 1, 3 } }, 2, 2, 2)),
  };
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanNotLoadConv,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanNotLoadConv,
      ::testing::ValuesIn(conv_neg_test_cases), getTestName);
  
  layout_test_params conv_test_cases[] = {
      layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::CHW, power_params({ { 3, 32, 16 } }, 2, 2, 2)),
      layout_test_params(CommonTestUtils::DEVICE_MYRIAD, "FP16", Layout::NCHW, power_params({ { 1, 3, 16, 16 } }, 2, 2, 2)),
      };
-    INSTANTIATE_TEST_CASE_P(BehaviorTest, LayoutTestCanLoadConv,
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, LayoutTestCanLoadConv,
      ::testing::ValuesIn(conv_test_cases), getTestName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp

index 24a1234..8fa9bbb 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp
@@ -5,7 +5,7 @@
  #include "behavior_test_plugin_set_preprocess.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest,
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest,
                          BehaviorPluginTestPreProcess,
                          ValuesIn(supportedValues),
                          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp

index abe5a37..c3a37c6 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_unsupported.cpp
@@ -5,10 +5,10 @@
  #include "behavior_test_plugin_unsupported.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestAllUnsupported, ValuesIn(allUnSupportedValues),
      getTestCaseName);
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestTypeUnsupported, ValuesIn(typeUnSupportedValues),
      getTestCaseName);
-    INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
+    INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestBatchUnsupported, ValuesIn(batchUnSupportedValues),
          getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp

index db9b742..aae56e3 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_version.cpp
@@ -5,4 +5,4 @@
  #include "behavior_test_plugin_version.hpp"
  #include "vpu_test_data.hpp"
  
-INSTANTIATE_TEST_CASE_P(BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, BehaviorPluginTestVersion, ValuesIn(add_element_into_array(supportedValues, BEH_HETERO)), getTestCaseName);
diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp

index 39ae924..9858ad0 100644 (file)
--- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp
+++ b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/vpu_test_data.hpp
@@ -13,42 +13,67 @@
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.model_xml_str, \
                                   FuncTestUtils::TestModel::convReluNormPoolFcModelFP32.weights_blob, \
                                   Precision::FP32)
+// for multi-device we are testing the fp16 (as it is supported by all device combos we are considering for testing
+// e.g. GPU and VPU, for CPU the network is automatically (internally) converted to fp32.
+#define BEH_MULTI(device) BehTestParams("MULTI", \
+                                        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+                                        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+                                        Precision::FP32, \
+                                        {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, #device}})
+#define BEH_MULTI_CONFIG  BehTestParams("MULTI", \
+                                        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.model_xml_str, \
+                                        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16.weights_blob, \
+                                        Precision::FP32)
  
  // all parameters are unsupported - reversed
  #define BEH_US_ALL_MYRIAD  BehTestParams("MYRIAD", \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
                                           FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
                                           Precision::Q78)
+#define BEH_US_ALL_MULTI(device) BehTestParams("MULTI", \
+                                               FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.model_xml_str, \
+                                               FuncTestUtils::TestModel::convReluNormPoolFcModelQ78.weights_blob, \
+                                               Precision::Q78, \
+                                               {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, #device}})
  const BehTestParams supportedValues[] = {
          BEH_MYRIAD,
+        BEH_MULTI(MYRIAD),
  };
  
  const BehTestParams requestsSupportedValues[] = {
          BEH_MYRIAD,
+        BEH_MULTI(MYRIAD),
  };
  
  const BehTestParams allInputSupportedValues[] = {
          BEH_MYRIAD, BEH_MYRIAD.withIn(Precision::U8), BEH_MYRIAD.withIn(Precision::FP16),
+        BEH_MULTI(MYRIAD), BEH_MULTI(MYRIAD).withIn(Precision::U8), BEH_MULTI(MYRIAD).withIn(Precision::FP16),
          // I16 not supported yet
-        // (ISSUE-7979) [IE myriad] The plugin should support I16 format for Input
+        // (CVS-7979) [IE myriad] The plugin should support I16 format for Input
          //BEH_MYRIAD.withIn(Precision::I16),
  };
  
  const BehTestParams allOutputSupportedValues[] = {
          BEH_MYRIAD, BEH_MYRIAD.withOut(Precision::FP16),
+        BEH_MULTI(MYRIAD), BEH_MULTI(MYRIAD).withOut(Precision::FP16),
  };
  
  const BehTestParams typeUnSupportedValues[] = {
          BEH_MYRIAD.withIn(Precision::Q78), BEH_MYRIAD.withIn(Precision::U16), BEH_MYRIAD.withIn(Precision::I8),
          BEH_MYRIAD.withIn(Precision::I16), BEH_MYRIAD.withIn(Precision::I32),
+        BEH_MULTI(MYRIAD).withIn(Precision::Q78), BEH_MULTI(MYRIAD).withIn(Precision::U16),
+        BEH_MULTI(MYRIAD).withIn(Precision::I8),
+        BEH_MULTI(MYRIAD).withIn(Precision::I16), BEH_MULTI(MYRIAD).withIn(Precision::I32),
  };
  
  const BehTestParams batchUnSupportedValues[] = {
          BEH_MYRIAD.withBatchSize(0),
+        BEH_MULTI(MYRIAD).withBatchSize(0),
  };
  
  const BehTestParams allUnSupportedValues[] = {
          BEH_US_ALL_MYRIAD,
+        BEH_US_ALL_MULTI(MYRIAD),
  };
  
  const std::vector<BehTestParams> deviceSpecificConfigurations = {
@@ -78,6 +103,15 @@ const std::vector<BehTestParams> deviceAgnosticConfigurations = {
  
      BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(YES)}}),
      BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), CONFIG_VALUE(NO)}}),
+
+    BEH_MULTI_CONFIG.withConfig({
+        {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+         {CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_DEBUG)}
+    }),
+    BEH_MULTI_CONFIG.withConfig({
+        {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+        {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), CONFIG_VALUE(YES)}
+    }),
  };
  
  const std::vector<BehTestParams> withCorrectConfValuesPluginOnly = {
@@ -107,8 +141,22 @@ const BehTestParams withIncorrectConfValues[] = {
  
      BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "ON"}}),
      BEH_MYRIAD.withConfig({{VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME), "OFF"}}),
+
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION),"ON"}}),
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {CONFIG_KEY(LOG_LEVEL), "VERBOSE"}}),
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {VPU_CONFIG_KEY(IGNORE_IR_STATISTIC), "ON"}}),
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "-1"}}),
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "0"}}),
+    BEH_MULTI_CONFIG.withConfig({{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "MYRIAD"},
+                                 {VPU_MYRIAD_CONFIG_KEY(PLATFORM), "1"}}),
  };
  
  const BehTestParams withIncorrectConfKeys[] = {
          BEH_MYRIAD.withIncorrectConfigItem(),
+        BEH_MULTI(MYRIAD).withIncorrectConfigItem(),
  };
diff --git a/inference-engine/tests_deprecated/fluid_preproc/CMakeLists.txt b/inference-engine/tests_deprecated/fluid_preproc/CMakeLists.txt

index c7f9416..840d5f8 100644 (file)
--- a/inference-engine/tests_deprecated/fluid_preproc/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/fluid_preproc/CMakeLists.txt
@@ -29,3 +29,6 @@ if(GAPI_TEST_PERF)
  else()
    target_compile_definitions(${TARGET} PRIVATE -DPERF_TEST=0)
  endif()
+
+add_test(NAME ${TARGET} COMMAND ${TARGET})
+set_property(TEST ${TARGET} PROPERTY LABELS IE PREPROC)
diff --git a/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt b/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt

index 88d6e04..76302d4 100644 (file)
--- a/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt
@@ -23,7 +23,6 @@ list(APPEND TEST_SRC ${CLDNN_TEST_SOURCES})
  list(APPEND CLDNN_LIBS
          IESharedTests
          inference_engine_lp_transformations
-        inference_engine_ir_readers
          ${CLDNN__IOCL_ICD_LIBPATH})
  
  # try to find VA libraries
@@ -69,5 +68,5 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${CLDNN_LIBS})
  
  add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
  
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS GPU)
diff --git a/inference-engine/tests_deprecated/functional/gna/CMakeLists.txt b/inference-engine/tests_deprecated/functional/gna/CMakeLists.txt

index 6528fe5..35a90e8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/gna/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/gna/CMakeLists.txt
@@ -50,10 +50,8 @@ target_include_directories(${TARGET_NAME}
  
  add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
  
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
-
-set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS GNA)
  
  if(GNA_LIBRARY_VERSION STREQUAL "GNA1")
      target_compile_definitions(${TARGET_NAME} PRIVATE GNA1_LIB)
diff --git a/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/ie_class/ie_class.cpp b/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/ie_class/ie_class.cpp

index b984f0e..a83653b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/ie_class/ie_class.cpp
+++ b/inference-engine/tests_deprecated/functional/gna/shared_tests_instance/ie_class/ie_class.cpp
@@ -89,11 +89,10 @@ INSTANTIATE_TEST_CASE_P(
  INSTANTIATE_TEST_CASE_P(
     DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
     ::testing::Values("GNA", "MULTI:GNA", "HETERO:GNA"));
-//
-// TODO: this metric is not supported by the plugin
+
  INSTANTIATE_TEST_CASE_P(
-   DISABLED_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
-   ::testing::Values("GNA", "MULTI:GNA", "HETERO:GNA"));
+   IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
+   ::testing::Values("GNA"/*, "MULTI:GNA", "HETERO:GNA" */));
  
  INSTANTIATE_TEST_CASE_P(
      IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt b/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt

index 63a1f58..8947a83 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/mkldnn/CMakeLists.txt
@@ -30,7 +30,6 @@ file(GLOB MKL_DNN_TEST_SOURCES
  list(APPEND MKL_DNN_LIBS
          IESharedTests
          inference_engine_lp_transformations
-        inference_engine_ir_readers
          ${Boost_REGEX_LIBRARY})
  
  list(APPEND TEST_SRC ${MKL_DNN_TEST_SOURCES})
@@ -63,5 +62,5 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${LIBRARIES})
  
  add_dependencies(${TARGET_NAME} ${DEPENDENCIES})
  
-add_test(NAME ${TARGET_NAME}
-         COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS CPU)
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp

index ae9ea61..2db8c49 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/config_param_test/config_param_test.cpp
@@ -40,12 +40,13 @@ TEST_F(smoke_PropertyTest, onSplitConvConcat) {
      inferRequest1.Infer();
      float* outRawDataWithConfig = inferRequest1.GetBlob(net.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
  
-    auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+    float thr1, thr2;
+    FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
  
      size_t outElementsCount = std::accumulate(begin(fnPtr->get_output_shape(0)), end(fnPtr->get_output_shape(0)), 1,
                                                std::multiplies<size_t>());
  
-    FuncTestUtils::compareRawBuffers(outRawData, outRawDataWithConfig, outElementsCount,
-                                     outElementsCount,
-                                     thr);
+    FuncTestUtils::compareRawBuffers(outRawData, outRawDataWithConfig, outElementsCount, outElementsCount,
+                                                     FuncTestUtils::CompareType::ABS_AND_REL,
+                                                     thr1, thr2);
  }
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp

index 4023b5c..a8b437d 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/extensions_tests/extensions_test.cpp
@@ -6,7 +6,6 @@
  #include <tests_common.hpp>
  #include <tests_common_func.hpp>
  #include <memory>
-#include <tests_utils.hpp>
  #include <multi-device/multi_device_config.hpp>
  #include <ie_core.hpp>
  #include <ie_plugin_ptr.hpp>
@@ -386,36 +385,3 @@ TEST_F(smoke_ExtensionTest, MKLDNN_no_share_new_extension_between_engines) {
      std::shared_ptr<IExtension> ext(new NewTestExtension());
      checkNotSharedExtensions(ext, "CPU");
  }
-
-TEST_F(smoke_ExtensionTest, MULTI_delete_extension) {
-    try {
-        InferenceEngine::Core ie;
-        ie.GetVersions("MULTI");
-    } catch (...) {
-        GTEST_SKIP();
-    }
-    std::shared_ptr<IExtension> ext(new TestExtension());
-    checkExtensionRemoved({"MultiDevice", ext, {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}}});
-}
-
-TEST_F(smoke_ExtensionTest, MULTI_no_delete_extension_from_another_engine) {
-    try {
-        InferenceEngine::Core ie;
-        ie.GetVersions("MULTI");
-    } catch (...) {
-        GTEST_SKIP();
-    }
-    std::shared_ptr<IExtension> ext(new TestExtension());
-    checkExtensionNotRemovedFromAnotherEngineObject({"MultiDevice", ext, {{MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, "CPU"}}});
-}
-
-TEST_F(smoke_ExtensionTest, MULTI_no_share_extension_between_engines) {
-    try {
-        InferenceEngine::Core ie;
-        ie.GetVersions("MULTI");
-    } catch (...) {
-        GTEST_SKIP();
-    }
-    std::shared_ptr<IExtension> ext(new TestExtension());
-    checkNotSharedExtensions(ext, "MULTI:CPU");
-}
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/ie_class/ie_class.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/ie_class/ie_class.cpp

index e4c55c0..09cb58e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/ie_class/ie_class.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/ie_class/ie_class.cpp
@@ -28,11 +28,11 @@ INSTANTIATE_TEST_CASE_P(
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_SUPPORTED_CONFIG_KEYS,
-        ::testing::Values("CPU", "HETERO"));
+        ::testing::Values("CPU", "MULTI", "HETERO"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_SUPPORTED_METRICS,
-        ::testing::Values("CPU", "HETERO"));
+        ::testing::Values("CPU", "MULTI", "HETERO"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_AVAILABLE_DEVICES,
@@ -40,7 +40,7 @@ INSTANTIATE_TEST_CASE_P(
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_FULL_DEVICE_NAME,
-        ::testing::Values("CPU", "HETERO"));
+        ::testing::Values("CPU", "MULTI", "HETERO"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES,
@@ -56,11 +56,11 @@ INSTANTIATE_TEST_CASE_P(
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetMetricTest, IEClassGetMetricTest_ThrowUnsupported,
-        ::testing::Values("CPU", "HETERO"));
+        ::testing::Values("CPU", "MULTI", "HETERO"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetConfigTest, IEClassGetConfigTest_ThrowUnsupported,
-        ::testing::Values("CPU", "HETERO"));
+        ::testing::Values("CPU", "MULTI", "HETERO"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassGetAvailableDevices, IEClassGetAvailableDevices,
@@ -80,23 +80,23 @@ INSTANTIATE_TEST_CASE_P(
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_CONFIG_KEYS,
-        ::testing::Values("CPU", "HETERO:CPU"));
+        ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_SUPPORTED_METRICS,
-        ::testing::Values("CPU", "HETERO:CPU"));
+        ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_NETWORK_NAME,
-        ::testing::Values("CPU", "HETERO:CPU"));
+        ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
-        ::testing::Values("CPU", "HETERO:CPU"));
+        ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
  
  INSTANTIATE_TEST_CASE_P(
          smoke_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_ThrowsUnsupported,
-        ::testing::Values("CPU", "HETERO:CPU"));
+        ::testing::Values("CPU", "MULTI:CPU", "HETERO:CPU"));
  
  //
  // Executable Network GetConfig / SetConfig
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp

index c525fcb..6930beb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp
@@ -121,14 +121,14 @@ INSTANTIATE_TEST_CASE_P(
          smoke_Inception,
          ModelTransformationsTest,
          ::testing::Values(
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, false, createParam(), {}, 3ul),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamI8I8(), {}, 0, false),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8I8(), {}, 0),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8U8(), {}, 0),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateLevel)),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateIntervals)),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 1ul, true, false, createParam()),
-                TransformationsParams("MKLDNN", getModelParams("inception_v3_tf"), 2ul, true, false, createParam())
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, false, createParam(), {}, 3ul),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamI8I8(), {}, 0, false),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8I8(), {}, 0),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamU8U8(), {}, 0),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateLevel)),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, false, true, createParamCpu().setQuantizedTensorAlignmentOnActivations(LayerTransformation::QuantizedTensorAlignment::UpdateIntervals)),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 1ul, true, false, createParam()),
+                TransformationsParams("CPU", getModelParams("inception_v3_tf"), 2ul, true, false, createParam())
          ),
          TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
  
@@ -136,14 +136,14 @@ INSTANTIATE_TEST_CASE_P(
          smoke_MobileNet,
          ModelTransformationsTest,
          ::testing::Values(
-                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false),
+                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false),
  // TODO: eshoguli: fix this issue
-//                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamI8I8()),
-//                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8I8()),
-//                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8U8(), {}, 2),
-//                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamCpu(), { "464/Pool", "465/Pool" }),
-                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, true),
-                TransformationsParams("MKLDNN", getModelParams("mobilenet_v2_tf_depthwise"), 2ul, true)
+//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamI8I8()),
+//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8I8()),
+//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamU8U8(), {}, 2),
+//                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, false, true, createParamCpu(), { "464/Pool", "465/Pool" }),
+                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 1ul, true),
+                TransformationsParams("CPU", getModelParams("mobilenet_v2_tf_depthwise"), 2ul, true)
          ),
          TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
  
@@ -151,28 +151,28 @@ INSTANTIATE_TEST_CASE_P(
          smoke_ResNet,
          ModelTransformationsTest,
          ::testing::Values(
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamI8I8(), {
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false),
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamI8I8(), {
                          // TODO: remove when eltwise validation was added
                          "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
                          "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
                  }),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8I8(), {
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8I8(), {
  //            // TODO: remove when eltwise validation was added
                          "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
                          "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
                  }),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8U8(), {
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamU8U8(), {
                          // TODO: remove when eltwise validation was added
                          "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
                          "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
                  }),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, false, true, createParamCpu(), {
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, false, true, createParamCpu(), {
                          // TODO: remove when eltwise validation was added
                          "resnet_v1_50/block1/unit_2/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars",
                          "resnet_v1_50/block2/unit_3/bottleneck_v1/act_quant/FakeQuantWithMinMaxVars"
                  }),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 1ul, true),
-                TransformationsParams("MKLDNN", getModelParams("resnet_50_tf"), 2ul, true)
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 1ul, true),
+                TransformationsParams("CPU", getModelParams("resnet_50_tf"), 2ul, true)
          ),
          TransformationsParams::getLowPrecisionTransformerSingleLayerTestName);
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp

index b2ea9be..1e8adfb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
@@ -20,6 +20,36 @@ INSTANTIATE_TEST_CASE_P(
      ::testing::Values(
          SingleLayerTransformationsTestParams(
              "CPU",
+            PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, 0)),
+            { { 1, 3, 299, 299 } },
+            { { 1, 3, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            PowerTestModel::Ptr(new PowerTestModel(1.f, 2.89f, 64)),
+            { { 1, 3, 299, 299 } },
+            { { 1, 3, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
+            { { 1, 3, 299, 299 } },
+            { { 1, 3, 299, 299 } }),
+            
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
+            { { 1, 3, 299, 299 } },
+            { { 1, 3, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            PowerTestModel::Ptr(new PowerTestModel(3.5f, 1.f, 0)),
+            { { 1, 3, 299, 299 } },
+            { { 1, 3, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
              SingleLayerTestModel::Ptr(new ResampleTestModel()),
              { { 1, 32, 149, 149 } },
              { { 1, 32, 147, 147 } }),
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt b/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt

index 20ea186..0cb368c 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/shared_tests/CMakeLists.txt
@@ -11,7 +11,6 @@ list(APPEND SHARED_LIBRARIES
          ${NGRAPH_LIBRARIES}
          ie_tests
          ngraphFunctions
-        inference_engine_ir_readers
          )
  
  file(GLOB SHARED_TESTS_SRC
@@ -26,7 +25,7 @@ file(GLOB SHARED_TESTS_SRC
          )
  
  add_library(${TARGET_NAME} STATIC ${SHARED_TESTS_SRC})
-add_dependencies(${TARGET_NAME} inference_engine_preproc mock_engine)
+add_dependencies(${TARGET_NAME} inference_engine_preproc MultiDevicePlugin mock_engine)
  
  if(ENABLE_MKL_DNN)
      add_dependencies(${TARGET_NAME} MKLDNNPlugin)
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/common_single_layer_tests/single_layer_tests.hpp b/inference-engine/tests_deprecated/functional/shared_tests/common_single_layer_tests/single_layer_tests.hpp

index df5db8f..6e0ab59 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/common_single_layer_tests/single_layer_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/common_single_layer_tests/single_layer_tests.hpp
@@ -401,7 +401,7 @@ protected:
      }
  
      template<int Version = 3>
-    static details::CNNNetworkImplPtr
+    static InferenceEngine::CNNNetwork
      buildSingleLayerNetwork(const std::string &layerType,
                              const CommonTestUtils::InOutShapes &inOutShapes,
                              std::map<std::string, std::string> *params,
@@ -410,8 +410,7 @@ protected:
                              size_t weightsSize = 0,
                              size_t biasesSize = 0,
                              const TBlob<uint8_t>::Ptr &weights = nullptr) {
-        auto *parser = new details::FormatParser(Version);
-        return buildSingleLayerNetworkCommon<Version>(parser, layerType, inOutShapes, params, layerDataName, precision,
+        return buildSingleLayerNetworkCommon<Version>(layerType, inOutShapes, params, layerDataName, precision,
                                                        weightsSize, biasesSize, weights);
      }
  
@@ -441,10 +440,9 @@ TEST_P(CommonSingleLayerTest, inferAfterReshape) {
  
      auto weights = createWeights(elementSize, weightByteSize, biasByteSize);
  
-    auto networkImplPtr = buildSingleLayerNetwork<3>(layerHelper->getType(), initialShapes, &params, "data",
-                                                     pluginParams.precision, weightByteSize, biasByteSize, weights);
+    auto network = buildSingleLayerNetwork<3>(layerHelper->getType(), initialShapes, &params, "data",
+                                              pluginParams.precision, weightByteSize, biasByteSize, weights);
  
-    CNNNetwork network(networkImplPtr);
      std::tie(inputName, inputData) = (*network.getInputsInfo().begin());
      inputData->setPrecision(pluginParams.precision);
      inputData->setLayout(pluginParams.layout);
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/ie_class/ie_class.hpp b/inference-engine/tests_deprecated/functional/shared_tests/ie_class/ie_class.hpp

index c19573f..8191938 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/ie_class/ie_class.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/ie_class/ie_class.hpp
@@ -35,6 +35,15 @@ using namespace InferenceEngine;
  using namespace InferenceEngine::details;
  using namespace InferenceEngine::PluginConfigParams;
  
+#define CHECK_MULTI() do { \
+                          try { \
+                              Core ie; \
+                              ie.GetVersions("MULTI"); \
+                          } catch (...) { \
+                            GTEST_SKIP(); \
+                          } \
+                      } while(false)\
+
  class IEClassBasicTest : public TestsCommon {
  public:
      void SetUp() override {
@@ -188,7 +197,9 @@ TEST_P(IEClassBasicTestP, smoke_registerPluginsXMLUnicodePath) {
              GTEST_COUT << "Core created " << testIndex << std::endl;
              ASSERT_NO_THROW(ie.RegisterPlugins(wStringtoMBCSstringChar(pluginsXmlW)));
              CommonTestUtils::removeFile(pluginsXmlW);
-
+#if defined __linux__  && !defined(__APPLE__)
+            ASSERT_NO_THROW(ie.GetVersions("mock")); // from pluginXML
+#endif
              ASSERT_NO_THROW(ie.GetVersions(deviceName));
              GTEST_COUT << "Plugin created " << testIndex << std::endl;
  
@@ -344,8 +355,8 @@ TEST_F(IEClassBasicTest, smoke_ImportNetworkHeteroThrows) {
  }
  
  TEST_F(IEClassBasicTest, smoke_ImportNetworkMultiThrows) {
-    Core ie;
-
+    CHECK_MULTI();
+    InferenceEngine::Core ie;
      ASSERT_THROW(ie.ImportNetwork("model", "MULTI"), InferenceEngineException);
  }
  
@@ -465,6 +476,7 @@ TEST_P(IEClassNetworkTestP, QueryNetworkHeteroActualNoThrow) {
  }
  
  TEST_P(IEClassNetworkTestP, QueryNetworkMultiThrows) {
+    CHECK_MULTI();
      Core ie;
      ASSERT_THROW(ie.QueryNetwork(actualNetwork, "MULTI"), InferenceEngineException);
  }
@@ -1206,8 +1218,9 @@ TEST_P(IEClassLoadNetworkTest, LoadNetworkHETEROAndDeviceIDThrows) {
  //
  
  TEST_P(IEClassLoadNetworkTest, LoadNetworkHETEROwithMULTINoThrow) {
-    Core ie;
+    CHECK_MULTI();
  
+    Core ie;
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
          std::string devices;
          auto availableDevices = ie.GetMetric(deviceName, METRIC_KEY(AVAILABLE_DEVICES)).as<std::vector<std::string>>();
@@ -1227,6 +1240,7 @@ TEST_P(IEClassLoadNetworkTest, LoadNetworkHETEROwithMULTINoThrow) {
  }
  
  TEST_P(IEClassLoadNetworkTest, LoadNetworkMULTIwithHETERONoThrow) {
+    CHECK_MULTI();
      Core ie;
  
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
@@ -1251,6 +1265,7 @@ TEST_P(IEClassLoadNetworkTest, LoadNetworkMULTIwithHETERONoThrow) {
  //
  
  TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv7) {
+    CHECK_MULTI();
      Core ie;
  
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
@@ -1279,6 +1294,7 @@ TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv7) {
  }
  
  TEST_P(IEClassLoadNetworkTest, QueryNetworkMULTIwithHETERONoThrowv7) {
+    CHECK_MULTI();
      Core ie;
  
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
@@ -1307,6 +1323,7 @@ TEST_P(IEClassLoadNetworkTest, QueryNetworkMULTIwithHETERONoThrowv7) {
  }
  
  TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv10) {
+    CHECK_MULTI();
      Core ie;
  
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
@@ -1335,6 +1352,7 @@ TEST_P(IEClassLoadNetworkTest, QueryNetworkHETEROwithMULTINoThrowv10) {
  }
  
  TEST_P(IEClassLoadNetworkTest, DISABLED_QueryNetworkMULTIwithHETERONoThrowv10) {
+    CHECK_MULTI();
      Core ie;
  
      if (supportsDeviceID(ie, deviceName) && supportsAvaliableDevices(ie, deviceName)) {
@@ -1393,8 +1411,9 @@ TEST_P(IEClassLoadNetworkTest, DISABLED_QueryNetworkMULTIwithHETERONoThrowv10) {
  using IEClassLoadNetworkAfterCoreRecreateTest = IEClassLoadNetworkTest;
  
  TEST_P(IEClassLoadNetworkAfterCoreRecreateTest, LoadAfterRecreateCoresAndPlugins) {
+    CHECK_MULTI();
+    Core ie;
      {
-        Core ie;
          auto versions = ie.GetVersions("MULTI:" + deviceName + ",CPU");
          ASSERT_EQ(3, versions.size());
      }
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/inference_engine_regression_tests/common_dyn_batch_regression.hpp b/inference-engine/tests_deprecated/functional/shared_tests/inference_engine_regression_tests/common_dyn_batch_regression.hpp

index 3e4dac9..5255bc0 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/inference_engine_regression_tests/common_dyn_batch_regression.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/inference_engine_regression_tests/common_dyn_batch_regression.hpp
@@ -69,10 +69,14 @@ TEST_P(TestNoRegressionDynBatchFP32, dynBatch) {
      auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr,
                                                                                         {blob->cbuffer().as<float *>()});
  
-    auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+    float thr1, thr2;
+    FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32, thr1, thr2);
+
      std::vector<size_t> inShapeLimited{size_t(bsz), 4, 20, 20};
      size_t outElementsCount = std::accumulate(begin(inShapeLimited), end(inShapeLimited), 1, std::multiplies<size_t>());
-    FuncTestUtils::compareRawBuffers(outRawData, *refOutData[0], outElementsCount, outElementsCount, thr);
+    FuncTestUtils::compareRawBuffers(outRawData, *refOutData[0], outElementsCount, outElementsCount,
+                                                     FuncTestUtils::CompareType::ABS_AND_REL,
+                                                     thr1, thr2);
      if (GetParam().deviceName.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
          PluginCache::get().reset();
      }
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp

index d6a3b16..af91077 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp
@@ -51,7 +51,7 @@ inline void checkLayerOuputPrecision(const ICNNNetwork& network, const std::stri
  }
  
  struct network_params {
-    std::string pluginName;
+    std::string deviceName;
      std::string modelFile;
      std::string imageName;
      std::string statFile;
@@ -87,20 +87,6 @@ struct network_params {
          result += statFile;
          return result;
      }
-
-    std::string plugin() { return pluginName + "Plugin"; }
-
-    std::string deviceName() {
-        if (pluginName == "MultiDevice") {
-            return "MULTI:CPU";
-        }
-        if (pluginName == "MKLDNN") {
-            return "CPU";
-        }
-
-        return "";
-    }
-
  };
  
  static LayerTransformation::Params createParam() {
@@ -254,7 +240,7 @@ public:
              const size_t classesCanBeChangedIndex = 9999,
              const bool compareRawValues = true,
              const std::unordered_set<std::string>& removedLayers = {}) :
-            pluginName(""),
+            deviceName(""),
              modelParams(ModelParams("", "", "", {})),
              batchSize(1ul),
              transformationsInPluginEnabled(transformationsInPluginEnabled),
@@ -266,7 +252,7 @@ public:
              removedLayers(removedLayers) {}
  
      TransformationsParams(
-            const std::string pluginName,
+            const std::string deviceName,
              const ModelParams modelParams,
              const size_t batchSize,
              const bool transformationsInPluginEnabled = true,
@@ -278,7 +264,7 @@ public:
              const std::unordered_set<std::string>& removedLayers = {},
              const std::vector<std::pair<std::string, std::vector<float>>> inputs = {},
              const std::vector<std::pair<std::string, std::shared_ptr<LayerTransformation>>> transformations = {}) :
-            pluginName(pluginName),
+            deviceName(deviceName),
              modelParams(modelParams),
              batchSize(batchSize),
              transformationsInPluginEnabled(transformationsInPluginEnabled),
@@ -289,7 +275,7 @@ public:
              compareRawValues(compareRawValues),
              removedLayers(removedLayers) {}
  
-    const std::string pluginName;
+    const std::string deviceName;
      const ModelParams modelParams;
      const size_t batchSize;
  
@@ -436,7 +422,7 @@ protected:
          //    transformationsParams.transformationsInPluginEnabled ? PluginConfigParams::YES : PluginConfigParams::NO);
  
          usedNetwork = cloneNet(network);
-        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.deviceName(), config);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(network, p.deviceName, config);
          InferRequest inferRequest = exeNetwork.CreateInferRequest();
          if (inputs.empty()) {
              Blob::Ptr src = readInput(p.image(), batch_size);
@@ -541,7 +527,7 @@ protected:
          }
  
          network_params p{
-                "MKLDNN",
+                "CPU",
                  transformationsParam.modelParams.irFilePath,
                  transformationsParam.modelParams.dataFilePath,
                  "",
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/single_layer_tests/ti_tests.hpp b/inference-engine/tests_deprecated/functional/shared_tests/single_layer_tests/ti_tests.hpp

index b939b48..f7ee40f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/single_layer_tests/ti_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/single_layer_tests/ti_tests.hpp
@@ -293,21 +293,24 @@ class TITest2Base: public PlgTest<ti_test_params> {
  
  protected:
      virtual void RunTITest(const std::map<std::string, std::string> & config = {}) {
-
          try {
              ti_test_params p = param();
              std::string model = getModel(p);
  
              auto weights = make_shared_blob<uint8_t>(TensorDesc {Precision::U8, {p.precision.size() * p.tensorSize}, C});
              weights->allocate();
-            auto weights_size = details::product(std::begin(weights->getTensorDesc().getDims()), std::end(weights->getTensorDesc().getDims()));
+
              if (p.precision == Precision::FP32) {
-                std::vector<float> weights_vector(weights_size, 1.0f);
-                ie_memcpy(weights->buffer().as<float *>(), sizeof(float), &weights_vector[0], weights_vector.size() * sizeof(float));
-            } else {
+                std::vector<float> weights_vector(p.tensorSize, 1.0f);
+                ie_memcpy(weights->buffer().as<float *>(), p.tensorSize * sizeof(float),
+                    &weights_vector[0], p.tensorSize * sizeof(float));
+            } else if (p.precision == Precision::FP16) {
                  //  FP16 case
-                std::vector<ie_fp16> weights_vector(weights_size, PrecisionUtils::f32tof16(1.0f));
-                ie_memcpy(weights->buffer().as<ie_fp16 *>(), sizeof(ie_fp16), &weights_vector[0], weights_vector.size() * sizeof(ie_fp16));
+                std::vector<ie_fp16> weights_vector(p.tensorSize, PrecisionUtils::f32tof16(1.0f));
+                ie_memcpy(weights->buffer().as<ie_fp16 *>(), p.tensorSize * sizeof(ie_fp16),
+                    &weights_vector[0], p.tensorSize * sizeof(ie_fp16));
+            } else {
+                ASSERT_TRUE(false);
              }
  
              Core ie;
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_and_scaleshift_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_and_scaleshift_test.cpp

index e7a83e3..8f4d6ed 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_and_scaleshift_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_and_scaleshift_test.cpp
@@ -14,8 +14,9 @@ std::string FakeQuantizeAndScaleShiftTestModel::getModel(SingleLayerTransformati
      std::map<std::string, std::string> fake_quantize_params = {
          {"levels", "256"}
      };
+
      std::map<std::string, std::string> power_params = {
-        {"power", "1"}, {"scale", "1"}, {"shift", "0"}
+        {"power", "2"}, {"scale", "1"}, {"shift", "0"}
      };
  
      std::vector<std::pair<std::string, std::string>> edges = {
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_reshape_pooling_test_model_without_constants_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_reshape_pooling_test_model_without_constants_test.cpp

index 4cf0f36..eda64fd 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_reshape_pooling_test_model_without_constants_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fake_quantize_reshape_pooling_test_model_without_constants_test.cpp
@@ -35,7 +35,7 @@ std::string FakeQuantizeReshapePoolingTestModelWithoutConstants::getModel(Single
      std::vector<size_t> biasesConvolutionConstDims = { conv.out_c };
      std::map<std::string, std::string> const_params = {};
      std::map<std::string, std::string> fakeQuantizeParams = {{ "levels", "256" }};
-    std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
+    std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
      std::map<std::string, std::string> poolingParams = { {"kernel", "7,1"}, { "pool-method", "avg" }, { "strides", "1,1" } };
  
      std::vector<std::pair<std::string, std::string>> edges = {
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp

index 2464694..18014f3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
@@ -852,6 +852,20 @@ public:
      std::string getModel(SingleLayerTransformationsTestParams& p) const override;
  };
  
+class PowerTestModel : public SingleLayerTestModel {
+public:
+    PowerTestModel(const float& power, const float& scale, const float& shift) : power(power), scale(scale), shift(shift) {}
+    void resetTransformation(CNNNetwork& network) const override;
+    std::string getName() const override;
+    bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
+    std::string getModel(SingleLayerTransformationsTestParams& p) const override;
+
+private:
+    const float power;
+    const float scale;
+    const float shift;
+};
+
  class ConvolutionAndQuantizeOnWeightsWithMultiOutputIntervalsTestModel : public SingleLayerTestModel {
  public:
      std::string getModel(SingleLayerTransformationsTestParams& p) const override;
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/power_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/power_test.cpp

new file mode 100644 (file)

index 0000000..92a7217
--- /dev/null
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/power_test.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformer_single_layer_tests.hpp"
+
+std::string PowerTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
+    size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
+    if (p._network_precision == "FP16")
+        type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
+
+    CommonTestUtils::pool_common_params pooling = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, "valid", false, true };
+    std::vector<size_t> poolOutShape(p.inputDimensions[0].size());
+    CommonTestUtils::getPoolOutShape(p.inputDimensions[0], pooling, poolOutShape);
+
+    std::map<std::string, std::string> power_params = {{"power", std::to_string(power)}, {"scale", std::to_string(scale)}, {"shift", std::to_string(shift)}};
+    std::map<std::string, std::string> const_params = {};
+    std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
+
+    std::vector<std::pair<std::string, std::string>> edges = {
+        {"0,0", "1,1"}, {"1,2", "6,7"}, // power
+        {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // const
+        {"6,12", "7,13"}, {"7,14", "8,15"} // pool, power
+    };
+
+    return CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
+            "Conv_ScaleShift_transformations", p.inputDimensions[0], p._network_precision)
+        .addLayer("Power", p._network_precision, &power_params, {{p.inputDimensions[0]}, {p.inputDimensions[0]}})
+        .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+        .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+        .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+        .addLayer("Const", p._network_precision, &const_params, {{}, {{1}}}, type_size, 0)
+        .addLayer("FakeQuantize", p._network_precision, &fake_quantize_params, {{p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}}})
+        .poolingLayer(p._network_precision, {{p.inputDimensions[0]}, {poolOutShape}}, pooling)
+        .addLayer("Power", p._network_precision, &power_params, {{poolOutShape}, {poolOutShape}})
+        .finish(&edges);
+}
+
+void PowerTestModel::resetTransformation(CNNNetwork& network) const {
+    fillData(getLayer(network, "Const2"), 63.5, "custom");
+    fillData(getLayer(network, "Const3"), 127.0, "custom");
+    fillData(getLayer(network, "Const4"), 63.5, "custom");
+    fillData(getLayer(network, "Const5"), 127.0, "custom");
+}
+
+std::string PowerTestModel::getName() const {
+    return std::string("PowerTestModel") +
+           (power == 1.f ? std::string("") : "_power!=1") +
+           (scale == 1.f ? "" : "_scale=" + std::to_string(scale)) +
+           (shift == 0 ? "" : "_shift!=" + std::to_string(shift));
+}
+
+bool PowerTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
+    LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params));
+    transformer.transform(network);
+
+    const Precision precision = params.updatePrecisions ? Precision(Precision::U8) : network.getPrecision();
+
+    CNNLayerPtr fakeQuantize = getLayer(network, "FakeQuantize6");
+    if (fakeQuantize->outData[0]->getPrecision() != precision) {
+        THROW_IE_EXCEPTION << fakeQuantize->name << " precision " << precision << " is not correct";
+    }
+
+    CNNLayerPtr pooling = getLayer(network, "Pooling7");
+    if (pooling->outData[0]->getPrecision() != precision) {
+        THROW_IE_EXCEPTION << pooling->name << " precision " << precision << " is not correct";
+    }
+
+    CNNLayerPtr powerLayer = getLayer(network, "Power8");
+
+    const bool deleteLayer = params.quantizeOutputs && power == 1.f && powerLayer != nullptr && powerLayer->type == "Power";
+
+    if (deleteLayer) {
+        THROW_IE_EXCEPTION << "Power layer is present after transformation";
+    }
+
+    return true;
+}
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_and_fake_quantize_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_and_fake_quantize_test.cpp

index a0d8baf..bed3fb8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_and_fake_quantize_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_and_fake_quantize_test.cpp
@@ -29,7 +29,7 @@ std::string ScaleShiftAndFakeQuantizeTestModel::getModel(SingleLayerTransformati
  
      std::map<std::string, std::string> const_params = {};
      std::map<std::string, std::string> fake_quantize_params = {{"levels", "256"}};
-    std::map<std::string, std::string> power_params = {{"power", "1"}, {"scale", "1"}, {"shift", "0"}};
+    std::map<std::string, std::string> power_params = {{"power", "2"}, {"scale", "1"}, {"shift", "0"}};
  
      std::vector<std::pair<std::string, std::string>> edges = {
          {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_concat_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_concat_test.cpp

index 0a2f19a..2d668a2 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_concat_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_concat_test.cpp
@@ -27,7 +27,7 @@ std::string ScaleShiftToConvolutionAfterConcatTestModel::getModel(SingleLayerTra
          {"axis", "1"}
      };
      std::map<std::string, std::string> power_params = {
-        {"power", "1"}, {"scale", "1"}, {"shift", "0"}
+        {"power", "2"}, {"scale", "1"}, {"shift", "0"}
      };
  
      std::vector<std::pair<std::string, std::string>> edges = {
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_fakequantize_ignore_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_fakequantize_ignore_test.cpp

index 053e5a4..97c94fa 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_fakequantize_ignore_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/scaleshift_to_conv_after_fakequantize_ignore_test.cpp
@@ -42,7 +42,7 @@ std::string ScaleShiftToConvolutionAfterFakeQuantizeIgnoreTestModel::getModel(Si
  
      std::map<std::string, std::string> const_params = {};
      std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
-    std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
+    std::map<std::string, std::string> power_params = { {"power", "2"}, {"scale", "1"}, {"shift", "0"} };
      std::vector<std::pair<std::string, std::string>> edges = {
          {"0,0", "1,1"}, // Input -> Power
          {"1,2", "6,7"}, // Power -> FakeQuantize
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp

index c234284..50b9e1f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp
@@ -304,9 +304,14 @@ void SingleLayerTransformationsTest::SetUp() {
  
                                                      const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
                                                      const float zeroThreshold = p.model->getZeroThreshold();
-                                                    // const float threshold = net_precision == Precision::FP16 ? 0.0005f : 0.0003f;
+                                                    
+                                                    const auto outName = transformedOutput.find(name);
+                                                    if (outName == transformedOutput.end()) {
+                                                        THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";
+                                                    }
+
                                                      relative_compare(
-                                                        CNNNetworkHelper::getFloatData(transformedOutput.find(name)->second).get(),
+                                                        CNNNetworkHelper::getFloatData(outName->second).get(),
                                                          CNNNetworkHelper::getFloatData(originalOutput.second).get(),
                                                          outSize,
                                                          threshold,
diff --git a/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt b/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt

index 8e6ada3..e8fe9b1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt
@@ -41,7 +41,6 @@ addIeTarget(
          ${CMAKE_CURRENT_SOURCE_DIR}/common/regression/helpers
      LINK_LIBRARIES
          IESharedTests
-        inference_engine_ir_readers
          vpu_graph_transformer
          vpu_custom_kernels
      DEFINES
@@ -69,7 +68,6 @@ addIeTargetTest(
      LINK_LIBRARIES
          IESharedTests
          vpu_custom_kernels
-        inference_engine_ir_readers
      LINK_LIBRARIES_WHOLE_ARCHIVE
          myriadTestData
          VPUCommonTests
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.cpp

index 49fa679..28996ae 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.cpp
@@ -4,7 +4,11 @@
  
  #include "myriad_layers_CTCDecoder_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(myriad, myriadCTCDecoderLayerTests_nightly,
-        ::testing::Combine(
-        ::testing::Values(true, false),
-        ::testing::ValuesIn(s_DimsConfig)));
+INSTANTIATE_TEST_CASE_P(
+       accuracy, myriadCTCDecoderLayerTests_smoke,
+       ::testing::Combine(
+               ::testing::Values<Dims>({{1, 88, 1, 71}}),
+               ::testing::Values<HwOptimization>(true, false),
+               ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10),
+               ::testing::ValuesIn(s_CustomConfig)
+));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.hpp

index d047eee..69a4d79 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_CTCDecoder_test.hpp
@@ -9,17 +9,11 @@ using namespace InferenceEngine;
  
  #define ERROR_BOUND 0.2f
  
-typedef struct {
-    SizeVector src_dims;
-    SizeVector seq_ind_dims;
-    SizeVector dst_dims;
-    std::string custom_config;
-} dims_config;
+PRETTY_PARAM(CustomConfig, std::string);
+PRETTY_PARAM(HwOptimization, bool);
  
-PRETTY_PARAM(hwAcceleration, std::string);
-PRETTY_PARAM(dimsConfig, dims_config);
-
-typedef myriadLayerTestBaseWithParam<std::tuple<bool, dims_config>> myriadCTCDecoderLayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, HwOptimization, IRVersion, CustomConfig>>
+    myriadCTCDecoderLayerTests_smoke;
  
  void refCTCDecoder(const Blob::Ptr src, const Blob::Ptr seq_ind, Blob::Ptr dst) {
      ie_fp16 *src_data = static_cast<ie_fp16*>(src->buffer());
@@ -89,46 +83,40 @@ void refCTCDecoder(const Blob::Ptr src, const Blob::Ptr seq_ind, Blob::Ptr dst)
      }
  }
  
-TEST_P(myriadCTCDecoderLayerTests_nightly, CTCGreedyDecoder) {
-
-    bool HWConfigValue = std::get<0>(GetParam());
-    dims_config dimsConfig = std::get<1>(GetParam());
-
-    if(!dimsConfig.custom_config.empty() && !CheckMyriadX()) {
-        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
-    }
+TEST_P(myriadCTCDecoderLayerTests_smoke, CTCGreedyDecoder) {
+    const tensor_test_params dims = std::get<0>(GetParam());
+    const bool hwOptimization = std::get<1>(GetParam());
+    _irVersion = std::get<2>(GetParam());
+    const std::string customConfig = std::get<3>(GetParam());
  
-    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = dimsConfig.custom_config;
+    if (!customConfig.empty() && !CheckMyriadX()) {
+               GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+       }
  
-    IN_OUT_desc inputTensors;
-    IN_OUT_desc outputTensors;
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    inputTensors.resize(2);
-    outputTensors.resize(1);
-
-    inputTensors[0] = dimsConfig.src_dims;
-    inputTensors[1] = dimsConfig.seq_ind_dims;
-    outputTensors[0] = dimsConfig.dst_dims;
+    const auto inputTensors = IN_OUT_desc{{dims.c, dims.h, dims.w}, {dims.h, dims.c}};
+    const auto outputTensors = IN_OUT_desc{{1, 1, dims.h, dims.c}};
  
      SetInputTensors(inputTensors);
      SetOutputTensors(outputTensors);
  
-    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("CTCGreedyDecoder"), NetworkInitParams().useHWOpt(HWConfigValue)));
+    std::map<std::string, std::string> params;
+    params["ctc_merge_repeated"] = "1";
  
-    auto iter = _inputMap.begin();
-    auto first_input = iter->first;
-    ++iter;
-    auto second_input = iter->first;
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("CTCGreedyDecoder").params(params),
+                                                   NetworkInitParams()
+                                                           .useHWOpt(hwOptimization)
+                                                           .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+                                                           .lockLayout(true)));
  
-    Blob::Ptr data;
-    auto dataBlob = _inputMap[first_input];
+    auto dataBlob = _inputMap.begin()->second;
+    auto seqIndBlob = std::next(_inputMap.begin())->second;
  
-    auto seqIndBlob = _inputMap[second_input];
-    uint16_t *blobRawSeqFp16 = seqIndBlob->buffer().as<uint16_t *>();
-    size_t count = seqIndBlob->size();
-    blobRawSeqFp16[0] = PrecisionUtils::f32tof16(0.0);
-    for (size_t indx = 1; indx < count; ++indx) {
-        blobRawSeqFp16[indx] = PrecisionUtils::f32tof16(1.0);
+    auto seqIndFp16 = seqIndBlob->buffer().as<uint16_t *>();
+    seqIndFp16[0] = PrecisionUtils::f32tof16(0.0);
+    for (size_t i = 1; i < seqIndBlob->size(); ++i) {
+        seqIndFp16[i] = PrecisionUtils::f32tof16(1.0);
      }
  
      std::string inputTensorBinary = TestDataHelpers::get_data_path() + "/vpu/InputGreedyDecoderMyriadCHW.bin";
@@ -136,19 +124,14 @@ TEST_P(myriadCTCDecoderLayerTests_nightly, CTCGreedyDecoder) {
  
      ASSERT_TRUE(Infer());
  
-    auto outputBlob = _outputMap.begin()->second;
-
-    _refBlob = make_shared_blob<ie_fp16>(TensorDesc(Precision::FP16, outputBlob->getTensorDesc().getDims(), ANY));
-    _refBlob->allocate();
-
      refCTCDecoder(dataBlob, seqIndBlob, _refBlob);
  
-    CompareCommonAbsolute(outputBlob, _refBlob, 0.0);
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.0);
  }
  
-static std::vector<dims_config> s_DimsConfig = {
-    {{88, 1, 71}, {88,  1}, {1, 88, 1, 1}, ""},
+static std::vector<CustomConfig> s_CustomConfig = {
+        {""},
  #ifdef VPU_HAS_CUSTOM_KERNELS
-    {{88, 1, 71}, {88,  1}, {1, 88, 1, 1}, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
+        getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
  #endif
  };
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.cpp

index 9739f14..a30d8b1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.cpp
@@ -5,7 +5,7 @@
  #include "myriad_layers_batch_normalization_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsBatchNormalization_nightly,
+        accuracy, myriadLayersTestsBatchNormalization_smoke,
          ::testing::Values(
                  bn_test_params{{1, 1, 16, 8}, 0.001f},
                  bn_test_params{{1, 4, 8, 16}, 0.00001f},
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.hpp

index 8028d60..20d451b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_batch_normalization_test.hpp
@@ -58,11 +58,11 @@ void ref_batch_normalization(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-class myriadLayersTestsBatchNormalization_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsBatchNormalization_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<bn_test_params> {
  };
  
-TEST_P(myriadLayersTestsBatchNormalization_nightly, TestsBatchNorm)
+TEST_P(myriadLayersTestsBatchNormalization_smoke, TestsBatchNorm)
  {
      bn_test_params p = ::testing::WithParamInterface<bn_test_params>::GetParam();
      size_t sz_weights = p.in.c;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.cpp

index f8521f8..eb89b5c 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.cpp
@@ -4,6 +4,6 @@
  
  #include "myriad_layers_bias_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBias_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBias_smoke,
          ::testing::ValuesIn(s_biasDims)
  );
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.hpp

index 61abb88..c35dd52 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_bias_test.hpp
@@ -88,11 +88,11 @@ void ref_bias(const InferenceEngine::Blob::Ptr src1,
      }
  }
  
-class myriadLayersTestsBias_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsBias_smoke: public myriadLayersTests_nightly,
                               public testing::WithParamInterface<InferenceEngine::SizeVector> {
  };
  
-TEST_P(myriadLayersTestsBias_nightly, TestsBias) {
+TEST_P(myriadLayersTestsBias_smoke, TestsBias) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto input_dim = GetParam();
      InferenceEngine::SizeVector input_dim1;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_blob_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_blob_test.cpp

index f663e73..de0614d 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_blob_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_blob_test.cpp
@@ -10,7 +10,7 @@
  using namespace InferenceEngine;
  using namespace ::testing;
  
-typedef myriadLayerTestBaseWithParam<std::string> myriadBlobTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::string> myriadBlobTests_smoke;
  
  std::vector<char> readBinFile(std::string filename)
  {
@@ -33,7 +33,7 @@ std::vector<char> readBinFile(std::string filename)
      return vec;
  }
  
-TEST_P(myriadBlobTests_nightly, CanGetSameBlobsOnSameIR) {
+TEST_P(myriadBlobTests_smoke, CanGetSameBlobsOnSameIR) {
      std::string HWConfigValue = GetParam();
  
      auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
@@ -64,20 +64,20 @@ TEST_P(myriadBlobTests_nightly, CanGetSameBlobsOnSameIR) {
      }
  }
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobTests_smoke,
      ::testing::Values(CONFIG_VALUE(YES), CONFIG_VALUE(NO))
  );
  
-using myriadBlobExportTests_nightly = myriadLayersTests_nightly;
+using myriadBlobExportTests_smoke = myriadLayersTests_nightly;
  
  
-TEST_F(myriadBlobExportTests_nightly, CanNotDoImportOnNonExistFile)
+TEST_F(myriadBlobExportTests_smoke, CanNotDoImportOnNonExistFile)
  {
      InferenceEngine::IExecutableNetwork::Ptr importedNetworkPtr;
      ASSERT_EQ(StatusCode::NETWORK_NOT_READ, _vpuPluginPtr->ImportNetwork(importedNetworkPtr, "I_dont_exist.blob", {}, nullptr));
  }
  
-TEST_F(myriadBlobExportTests_nightly, CanInferImportedNetworkOnExportedBlob)
+TEST_F(myriadBlobExportTests_smoke, CanInferImportedNetworkOnExportedBlob)
  {
      auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
      ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
@@ -95,7 +95,7 @@ TEST_F(myriadBlobExportTests_nightly, CanInferImportedNetworkOnExportedBlob)
      ASSERT_EQ(StatusCode::OK, inferRequest->Infer(&_resp)) << _resp.msg;
  }
  
-TEST_F(myriadBlobExportTests_nightly, CanGetPerfCountsImportedNetwork)
+TEST_F(myriadBlobExportTests_smoke, CanGetPerfCountsImportedNetwork)
  {
      auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
      ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
@@ -125,7 +125,7 @@ TEST_F(myriadBlobExportTests_nightly, CanGetPerfCountsImportedNetwork)
      }
  }
  
-class myriadConfigsWithBlobImportTests_nightly: public myriadLayersTests_nightly {
+class myriadConfigsWithBlobImportTests_smoke: public myriadLayersTests_nightly {
  protected:
      // use this stream to redirect cout to it,
      // needs to be able check output on warnings
@@ -148,7 +148,7 @@ private:
  };
  
  
-TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetCompileOptionPrintsWarning)
+TEST_F(myriadConfigsWithBlobImportTests_smoke, TryingToSetCompileOptionPrintsWarning)
  {
      auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
      ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
@@ -183,7 +183,7 @@ TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetCompileOptionPrintsW
      }
  }
  
-TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetRuntimeOptionDoesNotPrintWarning)
+TEST_F(myriadConfigsWithBlobImportTests_smoke, TryingToSetRuntimeOptionDoesNotPrintWarning)
  {
      auto fnPtr = ngraph::builder::subgraph::makeSplitConvConcat();
      ASSERT_NO_THROW(_cnnNetwork = CNNNetwork(fnPtr));
@@ -215,9 +215,9 @@ TEST_F(myriadConfigsWithBlobImportTests_nightly, TryingToSetRuntimeOptionDoesNot
  }
  
  
-using myriadBlobExportAccuracyDifferentCountInAndOutTests_nightly = myriadLayerTestBaseWithParam<std::vector<size_t>>;
+using myriadBlobExportAccuracyDifferentCountInAndOutTests_smoke = myriadLayerTestBaseWithParam<std::vector<size_t>>;
  
-TEST_F(myriadBlobExportAccuracyDifferentCountInAndOutTests_nightly, IsResultOfImportedAndGeneratedModelSame)
+TEST_F(myriadBlobExportAccuracyDifferentCountInAndOutTests_smoke, IsResultOfImportedAndGeneratedModelSame)
  {
      SetSeed(DEFAULT_SEED_VALUE);
  
@@ -287,9 +287,9 @@ TEST_F(myriadBlobExportAccuracyDifferentCountInAndOutTests_nightly, IsResultOfIm
  }
  
  
-using myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly = myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::Precision, InferenceEngine::Precision>>;
+using myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke = myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::Precision, InferenceEngine::Precision>>;
  
-TEST_P(myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly, IsResultOfImportedAndGeneratedModelSame)
+TEST_P(myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke, IsResultOfImportedAndGeneratedModelSame)
  {
      SetSeed(DEFAULT_SEED_VALUE);
      InferenceEngine::Precision inputPrecision = std::get<0>(GetParam());
@@ -354,9 +354,9 @@ TEST_P(myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly, IsResu
      CompareCommonAbsolute(importedOutputBlobPtr, outputBlobPtr, 0.f);
  }
  
-using myriadExtraTests_nightly = myriadLayersTests_nightly;
+using myriadExtraTests_smoke = myriadLayersTests_nightly;
  
-TEST_F(myriadExtraTests_nightly, ThereIsNoSegfaultOnZeroConvolutionWeights) {
+TEST_F(myriadExtraTests_smoke, ThereIsNoSegfaultOnZeroConvolutionWeights) {
      if (!CheckMyriadX()) {
          SKIP() << "Non-MyriadX device";
      }
@@ -414,5 +414,5 @@ static const std::vector<InferenceEngine::Precision> inputPrecisions = {Inferenc
  static const std::vector<InferenceEngine::Precision> outputPrecisions = {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP32};
  
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadBlobExportAccuracyDifferentPrecisionOfInAndOutTests_smoke,
                          ::testing::Combine(::testing::ValuesIn(inputPrecisions), ::testing::ValuesIn(outputPrecisions)));
 \ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.cpp

index b848f2d..bffd097 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_clamp_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsClampParams_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsClampParams_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_clampTensors),
          ::testing::ValuesIn(s_clampParams))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.hpp

index 306fe68..4b28d9f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_clamp_test.hpp
@@ -18,9 +18,9 @@ struct clamp_test_params {
      };
  };
  
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, clamp_test_params>> myriadLayersTestsClampParams_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, clamp_test_params>> myriadLayersTestsClampParams_smoke;
  
-TEST_P(myriadLayersTestsClampParams_nightly, TestsClamp) {
+TEST_P(myriadLayersTestsClampParams_smoke, TestsClamp) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto param = GetParam();
      SizeVector tensor = std::get<0>(param);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.cpp

index 2a385cf..07bf286 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_concat_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsConcat_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsConcat_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_concatCores),
          ::testing::ValuesIn(s_axis),
@@ -14,7 +14,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsConcat_nightly,
                          getTestCaseName
  );
  
-TEST_F(myriadLayersTestsConcat_nightly, ConcatAfterNormalize) {
+TEST_F(myriadLayersTestsConcat_smoke, ConcatAfterNormalize) {
      const std::string model = R"V0G0N(
          <Net name="ConcatAfterNormalize" version="2" batch="1">
              <layers>
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.hpp

index d25918b..36c110b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_concat_test.hpp
@@ -7,7 +7,7 @@
  using namespace InferenceEngine;
  
  using myriadConcatTestParams = std::tuple<InferenceEngine::SizeVector, int32_t, InferenceEngine::SizeVector, int32_t, int32_t >;
-typedef myriadLayerTestBaseWithParam<myriadConcatTestParams> myriadLayersTestsConcat_nightly;
+typedef myriadLayerTestBaseWithParam<myriadConcatTestParams> myriadLayersTestsConcat_smoke;
  
  void CheckOutput(const InferenceEngine::BlobMap& input, InferenceEngine::Blob::Ptr actual, int32_t axis) {
      int32_t OW = 1;
@@ -49,7 +49,7 @@ void CheckOutput(const InferenceEngine::BlobMap& input, InferenceEngine::Blob::P
      ASSERT_NE(n_checks, 0);
  }
  
-TEST_P(myriadLayersTestsConcat_nightly, Concat) {
+TEST_P(myriadLayersTestsConcat_smoke, Concat) {
      auto param   = GetParam();
      auto core    = std::get<0>(param);
      auto axis    = std::get<1>(param);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.cpp

index 75e3f27..78ef816 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.cpp
@@ -12,7 +12,7 @@ using namespace testing;
  //
  //----------------------------------------------------------------------
  
-INSTANTIATE_TEST_CASE_P(tricky_ncdhw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_ncdhw_userpad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -26,7 +26,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_userpad, myriadLayersConvNDTest_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(tricky_ncdhw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_ncdhw_autopad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -48,7 +48,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_autopad, myriadLayersConvNDTest_nightly,
  //
  //----------------------------------------------------------------------
  
-INSTANTIATE_TEST_CASE_P(simple_ncdhw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_ncdhw_userpad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -62,7 +62,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_userpad, myriadLayersConvNDTest_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(simple_ncdhw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_ncdhw_autopad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -84,7 +84,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_autopad, myriadLayersConvNDTest_nightly,
  //
  //----------------------------------------------------------------------
  
-INSTANTIATE_TEST_CASE_P(tricky_nchw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_nchw_userpad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 3}),
@@ -98,7 +98,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_userpad, myriadLayersConvNDTest_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(tricky_nchw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(tricky_nchw_autopad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 3}),
@@ -120,7 +120,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_autopad, myriadLayersConvNDTest_nightly,
  //
  //----------------------------------------------------------------------
  
-INSTANTIATE_TEST_CASE_P(simple_nchw_userpad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_nchw_userpad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -134,7 +134,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_userpad, myriadLayersConvNDTest_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(simple_nchw_autopad, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(simple_nchw_autopad, myriadLayersConvNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -158,7 +158,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_autopad, myriadLayersConvNDTest_nightly,
  
  // NB: requires 1GB of RAM on device (e.g. ma2085 board)
  // Stress test: large image with large depth, large kernel
-INSTANTIATE_TEST_CASE_P(i3d_id6, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id6, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 3, 79, 224, 224}),
                                  Values(KernelShape {7, 7, 7}),
@@ -171,7 +171,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id6, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Like `i3d_id6` test but with smaller image (so must fit in Myriad X)
-INSTANTIATE_TEST_CASE_P(i3d_id6_shrink, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id6_shrink, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 3, 39, 112, 112}),
                                  Values(KernelShape {7, 7, 7}),
@@ -184,7 +184,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id6_shrink, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Average-size image, trivial kernel 1x1x1
-INSTANTIATE_TEST_CASE_P(i3d_id12, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id12, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 64, 40, 56, 56}),
                                  Values(KernelShape {1, 1, 1}),
@@ -197,7 +197,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id12, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Average-size image, non-trivial kernel 3x3x3
-INSTANTIATE_TEST_CASE_P(i3d_id17, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id17, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 64, 40, 56, 56}),
                                  Values(KernelShape {3, 3, 3}),
@@ -210,7 +210,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id17, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Small image (7x7), trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id249, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id249, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 832, 10, 7, 7}),
                                  Values(KernelShape {1, 1, 1}),
@@ -223,7 +223,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id249, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Small image (7x7), non-trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id301, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id301, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 48, 10, 7, 7}),
                                  Values(KernelShape {3, 3, 3}),
@@ -236,7 +236,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id301, myriadLayersConvNDTest_nightly,
                                  Values(Groups(1))));
  
  // Trivial image (1x1), trivial kernel
-INSTANTIATE_TEST_CASE_P(i3d_id314, myriadLayersConvNDTest_nightly,
+INSTANTIATE_TEST_CASE_P(i3d_id314, myriadLayersConvNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 1024, 9, 1, 1}),
                                  Values(KernelShape {1, 1, 1}),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.hpp

index 98bfeaa..42bedcd 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_conv_nd_test.hpp
@@ -752,8 +752,8 @@ private:
      }
  };
  
-class myriadLayersConvNDTest_nightly: public ConvNDTest {};
+class myriadLayersConvNDTest_smoke: public ConvNDTest {};
  
-TEST_P(myriadLayersConvNDTest_nightly, ConvND) {
+TEST_P(myriadLayersConvNDTest_smoke, ConvND) {
      testConvND();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.cpp

index e64e72b..556f9fd 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.cpp
@@ -5,15 +5,25 @@
  #include "myriad_layers_convert_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayersTestsIOConvert_nightly,
+    accuracy, myriadLayersTestsIOConvert_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(inputsDims),
-        ::testing::ValuesIn(precisionsIO)
+        ::testing::ValuesIn(precisionsIO),
+        ::testing::Values("")
      )
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayersTestsConvertWithFP16_nightly,
+        accuracy_customu8f16, myriadLayersTestsIOConvert_smoke,
+        ::testing::Combine(
+                ::testing::ValuesIn(inputsDims4D),
+                ::testing::Values(PrecisionPair{Precision::U8, Precision::FP16}),
+                ::testing::Values(s_CustomConfig)
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+    accuracy, myriadLayersTestsConvertWithFP16_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(inputsDims),
          ::testing::ValuesIn(withFP16Precisions)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.hpp

index 03b6291..b068a5b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convert_test.hpp
@@ -12,47 +12,65 @@
  
  using namespace InferenceEngine;
  
+PRETTY_PARAM(CustomConfig, std::string);
+
  typedef std::pair<Precision, Precision> PrecisionPair;
-typedef std::tuple<InferenceEngine::SizeVector, PrecisionPair> ConvertIOTestParam;
+typedef std::tuple<SizeVector, PrecisionPair, CustomConfig> ConvertIOTestParam;
  typedef std::tuple<InferenceEngine::SizeVector, Precision> ConvertWithFP16TestParam;
  
-class myriadLayersTestsIOConvert_nightly: public myriadLayersTests_nightly,
-                                          public testing::WithParamInterface<ConvertIOTestParam> {
+static CustomConfig s_CustomConfig = {
+#ifdef VPU_HAS_CUSTOM_KERNELS
+    {getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"}
+#endif
  };
  
-TEST_P(myriadLayersTestsIOConvert_nightly, TestsIOConvert)
-{
-    const auto& param = ::testing::WithParamInterface<ConvertIOTestParam>::GetParam();
-    const auto& inputDims = std::get<0>(param);
-    const auto& precisions = std::get<1>(param);
-    const auto& inputPrecision = precisions.first;
-    const auto& outputPrecision = precisions.second;
+typedef myriadLayerTestBaseWithParam<ConvertIOTestParam> myriadLayersTestsIOConvert_smoke;
  
-    SetInputTensors({inputDims});
-    SetOutputTensors({inputDims});
+TEST_P(myriadLayersTestsIOConvert_smoke, TestsIOConvert) {
+    const SizeVector& dims = std::get<0>(GetParam());
+    const PrecisionPair& precision = std::get<1>(GetParam());
+    const std::string& customConfig = std::get<2>(GetParam());
+    const auto& inputPrecision = precision.first;
+    const auto& outputPrecision = precision.second;
  
-    makeSingleLayerNetwork(LayerInitParams("Copy"),
-                NetworkInitParams()
-                .inputPrecision(inputPrecision)
-                .outputPrecision(outputPrecision));
-    ASSERT_TRUE(Infer());
+    if(!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+    }
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    auto tensorDesc = InferenceEngine::TensorDesc(
-        outputPrecision, _outputMap.begin()->second->getTensorDesc().getDims(),
-        _outputMap.begin()->second->getTensorDesc().getLayout());
-    auto refBlob = make_blob_with_precision(outputPrecision, tensorDesc);
-    refBlob->allocate();
+    _config[VPU_CONFIG_KEY(DISABLE_CONVERT_STAGES)] = CONFIG_VALUE(YES);
+
+    SetInputTensors({dims});
+    SetOutputTensors({dims});
+
+    std::map<std::string, std::string> params = {
+        {"precision", std::to_string(outputPrecision)},
+        {"scale", std::to_string(1.0)},  // scale and bias are needed for custom layer
+        {"bias", std::to_string(0.0)}
+    };
  
-    ref_convert(_inputMap.begin()->second, refBlob);
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+        LayerInitParams("Convert")
+            .params(params)
+            .in({dims})
+            .out({dims})
+            .outPrecision(outputPrecision),
+        NetworkInitParams()
+            .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+            .inputPrecision(inputPrecision)
+            .outputPrecision(outputPrecision)
+            .lockLayout(true)));
  
-    CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
+    ASSERT_TRUE(Infer());
+
+    ASSERT_NO_FATAL_FAILURE(ref_convert(_inputMap.begin()->second, _refBlob));
+
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-class myriadLayersTestsConvertWithFP16_nightly: public myriadLayersTests_nightly,
-                                        public testing::WithParamInterface<ConvertWithFP16TestParam> {
-};
+typedef myriadLayerTestBaseWithParam<ConvertWithFP16TestParam> myriadLayersTestsConvertWithFP16_smoke;
  
-TEST_P(myriadLayersTestsConvertWithFP16_nightly, TestsConvertWithFP16)
+TEST_P(myriadLayersTestsConvertWithFP16_smoke, TestsConvertWithFP16)
  {
      const auto& param = ::testing::WithParamInterface<ConvertWithFP16TestParam>::GetParam();
      const auto& inputDims = std::get<0>(param);
@@ -91,7 +109,7 @@ TEST_P(myriadLayersTestsConvertWithFP16_nightly, TestsConvertWithFP16)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
  }
  
-std::vector<InferenceEngine::SizeVector> inputsDims = {
+std::vector<SizeVector> inputsDims = {
      {       224, 224 },
      {    3, 224, 224 },
      { 1, 1, 224, 224 },
@@ -104,6 +122,15 @@ std::vector<InferenceEngine::SizeVector> inputsDims = {
      { 2, 2, 3, 224, 224 },
  };
  
+std::vector<SizeVector> inputsDims4D = {
+    {{ 1, 1, 224, 224 }},
+    {{ 1, 1, 416, 416 }},
+    {{ 1, 1,  62,  62 }},
+    {{ 1, 1, 227, 227 }},
+    {{ 1, 3, 224, 224 }},
+    {{ 1, 3, 360, 480 }},
+};
+
  std::vector<PrecisionPair> precisionsIO = {
      {Precision::U8,   Precision::FP16},
      {Precision::FP32, Precision::FP16},
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.cpp

index da29ff1..6143dbb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_convolution1x1.hpp"
  
-INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution1x1LayerTests_nightly,
+INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution1x1LayerTests_smoke,
          ::testing::Combine(
          ::testing::Values(CONFIG_VALUE(NO)),
          ::testing::ValuesIn(s_isHWC),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.hpp

index 379817c..4ddafc8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution1x1.hpp
@@ -20,7 +20,7 @@ PRETTY_PARAM(hwAcceleration, std::string);
  PRETTY_PARAM(dimsConfig, dims_config);
  PRETTY_PARAM(isHWC, int);
  
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, isHWC, dims_config>> myriadConvolution1x1LayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::string, isHWC, dims_config>> myriadConvolution1x1LayerTests_smoke;
  
  void refConvolution1x1(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr weights, Blob::Ptr dst, int isHWC) {
      ie_fp16 *in = static_cast<ie_fp16*>(src->buffer());
@@ -39,7 +39,7 @@ void refConvolution1x1(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr
      size_t IW = in_width;
      size_t IH = in_height;
      size_t IC = in_channels;
-    
+
      const auto& out_dims = dst->getTensorDesc().getDims();
      size_t out_width      = out_dims[out_dims.size() - 1];
      size_t out_height     = out_dims[out_dims.size() - 2];
@@ -67,16 +67,17 @@ void refConvolution1x1(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr
                          continue;
                      }
                      uint32_t indx;
-                    if(isHWC == 1){
-                        indx = ic + iw * IC + ih * IC * IW;   
+                    if (isHWC == 1) {
+                        indx = ic + iw * IC + ih * IC * IW;
                          valYXZ = (valYXZ) + (PrecisionUtils::f16tof32(in[indx]) * PrecisionUtils::f16tof32(w[oc*IC + ic]));
                      }
-                    else {
+                    else
+                    {
                          indx = iw + ih * IW + ic * IW * IH;
                          valZYX = PrecisionUtils::f32tof16(PrecisionUtils::f16tof32(valZYX) + PrecisionUtils::f16tof32(PrecisionUtils::f32tof16(PrecisionUtils::f16tof32(in[indx]) * PrecisionUtils::f16tof32(w[oc*IC + ic]))));
                      }
                  }
-                if(isHWC == 1){                    
+                if (isHWC == 1) {
                      out[oc*OH*OW + oh*OW + ow] = PrecisionUtils::f32tof16(valYXZ);
                  }
                  else {
@@ -87,7 +88,7 @@ void refConvolution1x1(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr
      }
  }
  
-TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
+TEST_P(myriadConvolution1x1LayerTests_smoke, Convolution1x1) {
      std::string model = R"V0G0N(
         <net name="Convolution1x1" version="2" batch="1">
             <layers>
@@ -102,7 +103,7 @@ TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
                  </output>
              </layer>
              <layer id="2" name="conv1x1" precision="FP16" type="Convolution">
-                <data isHWC="@isHWC@" stride-x="1" stride-y="1" pad-x="0" pad-y="0" kernel-x="1" kernel-y="1" output="48" group="1"/>
+                <data stride="1,1" pad="0,0" kernel="1,1" dilation="1,1" output="48" group="1"/>
                  <input>
                      <port id="0">
                          <dim>@IB@</dim>
@@ -133,6 +134,7 @@ TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
      std::string HWConfigValue = std::get<0>(GetParam());
      int isHWC                 = std::get<1>(GetParam());
      dims_config customConfig  = std::get<2>(GetParam());
+    const auto layout = isHWC ? Layout::NHWC : Layout::NCHW;
  
      if(!customConfig.custom_config.empty() && !CheckMyriadX()) {
          GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
@@ -152,8 +154,6 @@ TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
  
      size_t num_weights = IC * OC;
  
-    model.replace( model.find("@isHWC@"), sizeof("@isHWC@") -1, std::to_string(isHWC));
-
      model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
      model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
      model.replace( model.find("@IC@"), sizeof("@IC@") -1, std::to_string(IC));
@@ -171,7 +171,7 @@ TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
      model.replace( model.find("@size_weights@"), sizeof("@size_weights@") -1, std::to_string(num_weights * sizeof(ie_fp16)));
  
      InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(GenWeights(num_weights));
-   
+
      StatusCode st;
  
      InferenceEngine::Core ie;
@@ -179,11 +179,11 @@ TEST_P(myriadConvolution1x1LayerTests_nightly, Convolution1x1) {
  
      _inputsInfo = network.getInputsInfo();
      _inputsInfo["data"]->setPrecision(Precision::FP16);
-    (isHWC) ? _inputsInfo["data"]->setLayout(NHWC) : _inputsInfo["data"]->setLayout(NCHW);
+    _inputsInfo["data"]->setLayout(layout);
  
      _outputsInfo = network.getOutputsInfo();
      _outputsInfo["conv1x1"]->setPrecision(Precision::FP16);
-    _outputsInfo["conv1x1"]->setLayout(NCHW);
+    _outputsInfo["conv1x1"]->setLayout(layout);
  
      ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network,
                                                      {{VPU_CONFIG_KEY(CUSTOM_LAYERS), customConfig.custom_config}, {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), HWConfigValue}}, &_resp));
@@ -254,4 +254,4 @@ static std::vector<isHWC> s_isHWC = {
  #ifdef VPU_HAS_CUSTOM_KERNELS
     {0, 1}
  #endif
-};
-\ No newline at end of file
+};
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.cpp

index 967ed36..df2fece 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_convolution3x3.hpp"
  
-INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution3x3LayerTests_nightly,
+INSTANTIATE_TEST_CASE_P(myriad, myriadConvolution3x3LayerTests_smoke,
          ::testing::Combine(
          ::testing::Values(CONFIG_VALUE(NO)),
          ::testing::ValuesIn(s_DimsConfig)));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.hpp

index 6f7307e..bf06022 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution3x3.hpp
@@ -19,7 +19,7 @@ typedef struct {
  PRETTY_PARAM(hwAcceleration, std::string);
  PRETTY_PARAM(dimsConfig, dims_config_con3x3);
  
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, dims_config_con3x3>> myriadConvolution3x3LayerTests_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::string, dims_config_con3x3>> myriadConvolution3x3LayerTests_smoke;
  
  void refConvolution3x3(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr weights, Blob::Ptr dst, int stride_x, int stride_y, int pad_x, int pad_y, int dilation_x, int dilation_y) {
      
@@ -102,7 +102,7 @@ void refConvolution3x3(const Blob::Ptr src, InferenceEngine::TBlob<uint8_t>::Ptr
      }
  }
  
-TEST_P(myriadConvolution3x3LayerTests_nightly, Convolution3x3) {
+TEST_P(myriadConvolution3x3LayerTests_smoke, Convolution3x3) {
      std::string model = R"V0G0N(
         <net name="Convolution3x3" version="2" batch="1">
             <layers>
@@ -117,7 +117,7 @@ TEST_P(myriadConvolution3x3LayerTests_nightly, Convolution3x3) {
                  </output>
              </layer>
              <layer id="2" name="conv3x3" precision="FP16" type="Convolution">
-                <data stride-x="@stride-x@" stride-y="@stride-y@" pad-x="1" pad-y="1" dilation-x="1" dilation-y="1" output="1" kernel-x="3" kernel-y="3"/>
+                <data stride="@stride-x@,@stride-y@" pads_begin="1,1" pads_end="1,1" dilation="1,1" output="1" kernel="3,3"/>
                  <input>
                      <port id="0">
                          <dim>@IB@</dim>
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.cpp

index dbb8ad0..f4cfb44 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_convolution_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy_chw_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_chw_dilation, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 37, 43, 43)
                                         , MAKE_STRUCT(tensor_test_params, 1, 37, 19, 19))
@@ -23,7 +23,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_chw_dilation, myriadLayerConvolution_nightly,
          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_ConvTests_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_0, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_0, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 5, 1, 1})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -47,7 +47,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_0, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -59,7 +59,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_2, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_2, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 128, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -71,7 +71,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_2, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_3, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_3, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 4, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -83,7 +83,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_3, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_4, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_4, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 256, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -95,7 +95,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_4, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_5, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_5, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -107,7 +107,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_5, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_6, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_6, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 192, 4, 4})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -119,7 +119,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_6, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_7, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_7, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 160, 4, 4})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -131,7 +131,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_7, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_8, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_8, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 224, 4, 4})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -143,7 +143,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_8, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_9, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_9, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -155,7 +155,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_9, myriadLayers_BatchTest_ConvTests_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_10, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_10, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 64, 56, 56})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -167,7 +167,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_10, myriadLayers_BatchTest_ConvTests_nigh
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_11, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_11, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 192, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -179,7 +179,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_11, myriadLayers_BatchTest_ConvTests_nigh
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_12, myriadLayers_BatchTest_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_12, myriadLayers_BatchTest_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -191,7 +191,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_12, myriadLayers_BatchTest_ConvTests_nigh
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest2_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest2_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({10, 576, 7, 7})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -203,7 +203,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_Batch_1, myriadLayers_BatchTest2_ConvTests_nigh
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3X3, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3X3, myriadLayers_IR3_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -215,7 +215,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3X3, myriadLayers_IR3_ConvTests_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3X1, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3X1, myriadLayers_IR3_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 3, 32, 24})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 1))
@@ -227,7 +227,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3X1, myriadLayers_IR3_ConvTests_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1X3, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1X3, myriadLayers_IR3_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 4, 16, 16})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 3))
@@ -239,7 +239,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1X3, myriadLayers_IR3_ConvTests_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_3X3X3_ConstInput_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_3X3X3_ConstInput_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 3, 10, 10})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -251,7 +251,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_3X3X3_ConstInput_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_crossroad_spatialConv, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_crossroad_spatialConv, myriadLayerConvolutionTensorFlow_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 1024, 1024))
            , ::testing::Values<DimsOutput>(MAKE_STRUCT(tensor_test_params, 1, 3, 512, 512))
@@ -264,7 +264,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_crossroad_spatialConv, myriadLayerConvolutionTe
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_inception_v2, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_inception_v2, myriadLayerConvolutionTensorFlow_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 28, 28))
            , ::testing::Values<DimsOutput>(MAKE_STRUCT(tensor_test_params, 1, 64, 14, 14))
@@ -277,7 +277,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_inception_v2, myriadLayerConvolutionTensorFlow_
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_inception_v1, myriadLayerConvolutionTensorFlow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_inception_v1, myriadLayerConvolutionTensorFlow_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 224, 224),
                                           MAKE_STRUCT(tensor_test_params, 1, 32, 224, 224)
@@ -292,7 +292,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_inception_v1, myriadLayerConvolutionTensorFlow_
            )
  );
  
-INSTANTIATE_TEST_CASE_P(test_3x3_SSD_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_3x3_SSD_dilation, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 512, 19, 19))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -305,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(test_3x3_SSD_dilation, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(test_TF_Resnet_50, myriadLayers_IR3_ConvTests_nightly,
+INSTANTIATE_TEST_CASE_P(test_TF_Resnet_50, myriadLayers_IR3_ConvTests_smoke,
          ::testing::Combine(
              ::testing::Values<InferenceEngine::SizeVector>({1, 512, 38, 38})
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -317,7 +317,7 @@ INSTANTIATE_TEST_CASE_P(test_TF_Resnet_50, myriadLayers_IR3_ConvTests_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(test_3x3_icvnet_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_3x3_icvnet_dilation, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 20, 20))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -332,7 +332,7 @@ INSTANTIATE_TEST_CASE_P(test_3x3_icvnet_dilation, myriadLayerConvolution_nightly
            )
  );
  
-INSTANTIATE_TEST_CASE_P(test_5x5_with_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_5x5_with_dilation, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64,  77))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 5, 5))
@@ -348,7 +348,7 @@ INSTANTIATE_TEST_CASE_P(test_5x5_with_dilation, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(test_7x7_with_dilation, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_7x7_with_dilation, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64,  77))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 7, 7))
@@ -365,7 +365,7 @@ INSTANTIATE_TEST_CASE_P(test_7x7_with_dilation, myriadLayerConvolution_nightly,
  );
  
  
-INSTANTIATE_TEST_CASE_P(test_conv1x1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_conv1x1, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 10, 13, 13))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -379,7 +379,7 @@ INSTANTIATE_TEST_CASE_P(test_conv1x1, myriadLayerConvolution_nightly,
             )
  );
  
-INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_3x3_convolution, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_3x3_convolution, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 512, 13, 13))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -392,7 +392,7 @@ INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_3x3_convolution, myriadLa
             )
  );
  
-INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_1x1_convolution, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_1x1_convolution, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 4608, 13, 13))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -405,7 +405,7 @@ INSTANTIATE_TEST_CASE_P(test_yolo_tiny_2_512x13x13_use_1x1_convolution, myriadLa
             )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64,  77)
                                         , MAKE_STRUCT(tensor_test_params, 1, 32, 112, 96))
@@ -424,7 +424,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_group_large_input, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group_large_input, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 192, 336))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -439,7 +439,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_group_large_input, myriadLayerConvolution_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_any_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_any_group, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 32, 64,  77))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -456,7 +456,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_any_group, myriadLayerConvolution_nightly,
          )
  );
  
-INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3_with_group, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3_with_group, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 80, 80)
                                         , MAKE_STRUCT(tensor_test_params, 1, 36, 80, 80))
@@ -471,7 +471,7 @@ INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3_with_group, myriadLayerConvolut
            )
  );
  
-INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3s1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3s1, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 80, 80))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -484,7 +484,7 @@ INSTANTIATE_TEST_CASE_P(set_optimization_for_3x3s1, myriadLayerConvolution_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1x1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x1, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 64, 64)
                                         , MAKE_STRUCT(tensor_test_params, 1, 32, 1, 1))
@@ -501,7 +501,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1x1, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 16, 16)
                                         , MAKE_STRUCT(tensor_test_params, 1, 8, 59, 73))
@@ -517,7 +517,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1x3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x3, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 59, 73))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 3), MAKE_STRUCT(param_size, 3, 1))
@@ -531,7 +531,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1x3, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_5x5, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_5x5, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 32, 32)
                                       /*, MAKE_STRUCT(tensor_test_params, 1, 8, 511, 399) failed*/)
@@ -549,7 +549,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_5x5, myriadLayerConvolution_nightly,
          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_7x7, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_7x7, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 8, 32, 32)
                                       /*, MAKE_STRUCT(tensor_test_params, 1, 8, 511, 399) failed*/)
@@ -566,7 +566,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_7x7, myriadLayerConvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_1, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 3, 720, 1280))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -579,7 +579,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_1, myriadLayerConvolution_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_2, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_2, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 357, 637))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -593,7 +593,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_2, myriadLayerConvolution_night
  );
  
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_3, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 16, 359, 639))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -606,7 +606,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3x3_large_input_3, myriadLayerConvolution_night
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1x1_large_input, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1x1_large_input, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 24, 355, 635))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 1, 1))
@@ -619,7 +619,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1x1_large_input, myriadLayerConvolution_nightly
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_0, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_0, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 128, 38, 38))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -631,7 +631,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_small_input_0, myriadLayerConvolution_nightly,
            , ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
            )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_1, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_1, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 2, 3))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -643,7 +643,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_small_input_1, myriadLayerConvolution_nightly,
            , ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
            )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_2, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_2, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 2, 2))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
@@ -655,7 +655,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_small_input_2, myriadLayerConvolution_nightly,
            , ::testing::Values<layoutPreference>(vpu::LayoutPreference::ChannelMinor)
            )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_small_input_3, myriadLayerConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_small_input_3, myriadLayerConvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 1, 1))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.hpp

index 18e16ff..9beac09 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_convolution_test.hpp
@@ -22,12 +22,12 @@ PRETTY_PARAM(dilation_factor, param_size);
  PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
  
  typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad
-        , out_channels, group, dilation_factor, layoutPreference >> myriadLayerConvolution_nightly;
+        , out_channels, group, dilation_factor, layoutPreference >> myriadLayerConvolution_smoke;
  
  typedef myriadLayerTestBaseWithParam<tuple<DimsInput, DimsOutput, kernel, stride, pad
-        , group, dilation_factor, layoutPreference >> myriadLayerConvolutionTensorFlow_nightly;
+        , group, dilation_factor, layoutPreference >> myriadLayerConvolutionTensorFlow_smoke;
  
-TEST_P(myriadLayerConvolution_nightly, Convolution) {
+TEST_P(myriadLayerConvolution_smoke, Convolution) {
      tensor_test_params input_dims = get<0>(GetParam());
      param_size kernel = get<1>(GetParam());
      param_size stride = get<2>(GetParam());
@@ -89,7 +89,7 @@ TEST_P(myriadLayerConvolution_nightly, Convolution) {
      CompareCommonAbsolute(outputBlob, _refBlob, maxerr);
  }
  
-TEST_P(myriadLayerConvolutionTensorFlow_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionTensorFlow_smoke, Convolution) {
      tensor_test_params input_dims = get<0>(GetParam());
      tensor_test_params output_dims = get<1>(GetParam());
      param_size kernel = get<2>(GetParam());
@@ -173,10 +173,10 @@ void loadConstData(InferenceEngine::Blob::Ptr blob) {
      }
  }
  
-class myriadLayers_3X3X3_ConstInput_nightly: public ConvolutionTest<vpu::LayoutPreference>{
+class myriadLayers_3X3X3_ConstInput_smoke: public ConvolutionTest<vpu::LayoutPreference>{
  };
  
-TEST_P(myriadLayers_3X3X3_ConstInput_nightly, Convolution) {
+TEST_P(myriadLayers_3X3X3_ConstInput_smoke, Convolution) {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, vpu::LayoutPreference>>::GetParam();
      const auto layoutPreference = std::get<6>(p);
  
@@ -215,11 +215,11 @@ TEST_P(myriadLayers_3X3X3_ConstInput_nightly, Convolution) {
  /* IR version 3 tests, main difference is a changes in padding parameters definitions */
  typedef std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, param_size, uint32_t, uint32_t> IR3_params;
  
-class myriadLayers_IR3_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
                                            public testing::WithParamInterface<IR3_params> {
  };
  
-TEST_P(myriadLayers_IR3_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_IR3_ConvTests_smoke, Conv) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
@@ -280,11 +280,11 @@ TEST_P(myriadLayers_IR3_ConvTests_nightly, Conv) {
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
  }
  
-class myriadLayers_BatchTest_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_BatchTest_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
                                                  public testing::WithParamInterface<IR3_params> {
  };
  
-class myriadLayers_BatchTest2_ConvTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_BatchTest2_ConvTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
                                                   public testing::WithParamInterface<IR3_params> {
  };
  
@@ -335,7 +335,7 @@ static void genTestData(InferenceEngine::Blob::Ptr blob) {
      }
  }
  
-TEST_P(myriadLayers_BatchTest_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_BatchTest_ConvTests_smoke, Conv) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
@@ -586,7 +586,7 @@ static const std::string MODEL_RFCNN = R"V0G0N(
      </net>
  )V0G0N";
  
-TEST_P(myriadLayers_BatchTest2_ConvTests_nightly, Conv) {
+TEST_P(myriadLayers_BatchTest2_ConvTests_smoke, Conv) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.cpp

index f5c70a5..e2526fb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_copy_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerCopy_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerCopy_smoke,
          ::testing::Combine(
              ::testing::Values<NDims>(MAKE_STRUCT(nd_tensor_test_params, {36, 19, 20, 21})
                                     , MAKE_STRUCT(nd_tensor_test_params, {7, 8, 5, 12})
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.hpp

index bf3f4cc..c93488d 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_copy_test.hpp
@@ -14,9 +14,9 @@ using namespace InferenceEngine;
  
  PRETTY_PARAM(NDims, nd_tensor_test_params);
  
-typedef myriadLayerTestBaseWithParam<tuple<NDims, int>> myriadLayerCopy_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<NDims, int>> myriadLayerCopy_smoke;
  
-TEST_P(myriadLayerCopy_nightly, Copy) {
+TEST_P(myriadLayerCopy_smoke, Copy) {
  
      nd_tensor_test_params input_dims = get<0>(GetParam());
      int ndims = get<1>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.cpp

index fd28beb..c7e2de4 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.cpp
@@ -5,7 +5,7 @@
  #include "myriad_layers_crop_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy_Crop, myriadLayerCropOneInputAndDim_nightly,
+    accuracy_Crop, myriadLayerCropOneInputAndDim_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_tileTensors1),
          ::testing::ValuesIn(s_tileTensors2),
@@ -15,7 +15,7 @@ INSTANTIATE_TEST_CASE_P(
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy_Crop1, myriadLayerCropOneInput_nightly,
+    accuracy_Crop1, myriadLayerCropOneInput_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_tileTensors1),
          ::testing::ValuesIn(s_tileTensors2),
@@ -25,7 +25,7 @@ INSTANTIATE_TEST_CASE_P(
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy_Crop2, myriadLayerCropTwoInputs_nightly,
+    accuracy_Crop2, myriadLayerCropTwoInputs_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_tileTensors1),
          ::testing::ValuesIn(s_tileTensors2),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.hpp

index c9d1360..8f9fa91 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_crop_test.hpp
@@ -14,9 +14,9 @@ PRETTY_PARAM(dim, InferenceEngine::SizeVector)
  PRETTY_PARAM(crop_begin, InferenceEngine::SizeVector)
  PRETTY_PARAM(crop_end, InferenceEngine::SizeVector)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, offset, dim >> myriadLayerCropOneInputAndDim_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, crop_begin, crop_end >> myriadLayerCropOneInput_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, Dims, crop_axis, offset >> myriadLayerCropTwoInputs_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, offset, dim >> myriadLayerCropOneInputAndDim_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, crop_axis, crop_begin, crop_end >> myriadLayerCropOneInput_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dims, Dims, crop_axis, offset >> myriadLayerCropTwoInputs_smoke;
  
  static void ref_crop(const Blob::Ptr src,
                       Blob::Ptr dst,
@@ -67,7 +67,7 @@ static void ref_crop(const Blob::Ptr src,
      }
  }
  
-TEST_P(myriadLayerCropOneInputAndDim_nightly, CropWithOneInputAndDim) {
+TEST_P(myriadLayerCropOneInputAndDim_smoke, CropWithOneInputAndDim) {
      auto param = GetParam();
      tensor_test_params tensor1 = std::get<0>(param);
      tensor_test_params tensor2 = std::get<1>(param);
@@ -101,7 +101,7 @@ TEST_P(myriadLayerCropOneInputAndDim_nightly, CropWithOneInputAndDim) {
      CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayerCropOneInput_nightly, CropWithOneInput) {
+TEST_P(myriadLayerCropOneInput_smoke, CropWithOneInput) {
      auto param = GetParam();
      tensor_test_params tensor1 = std::get<0>(param);
      tensor_test_params tensor2 = std::get<1>(param);
@@ -135,7 +135,7 @@ TEST_P(myriadLayerCropOneInput_nightly, CropWithOneInput) {
      CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayerCropTwoInputs_nightly, CropWithTwoInputs) {
+TEST_P(myriadLayerCropTwoInputs_smoke, CropWithTwoInputs) {
      auto param = GetParam();
      tensor_test_params tensor1 = std::get<0>(param);
      tensor_test_params tensor2 = std::get<1>(param);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.cpp

index d1413ef..a8352db 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.cpp
@@ -4,26 +4,27 @@
  
  #include "myriad_layers_custom_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsShuffleChannel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsShuffleChannel_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_ShuffleChannelTensors),
          ::testing::ValuesIn(s_ShuffleChannelGroup),
          ::testing::ValuesIn(s_CustomConfig)));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantize_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFakeQuantize_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_QuantizeTensors),
          ::testing::ValuesIn(s_QuantizeLevels),
+        ::testing::Values(IRVersion::v7, IRVersion::v10),
          ::testing::ValuesIn(s_CustomConfig)));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantizeBinarize_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsQuantizeBinarize_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_QuantizeTensors),
          ::testing::ValuesIn(s_QuantizeLevels),
          ::testing::ValuesIn(s_QuantizeSwitchOut),
          ::testing::ValuesIn(s_CustomConfig)));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBinaryConvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBinaryConvolution_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_BinaryConvolutionTensors),
          ::testing::ValuesIn(s_BinaryConvolutionDilations),
@@ -32,7 +33,18 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsBinaryConvolution_nightly,
          ::testing::ValuesIn(s_BinaryConvolutionStrides),
          ::testing::ValuesIn(s_CustomConfig)));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_ExperimentalDetectronPriorGridGeneratorImageDims),
          ::testing::ValuesIn(s_CustomConfig)));
+
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsCorrelate_smoke,
+        ::testing::Combine(
+        ::testing::ValuesIn(s_CorrelateParams),
+        ::testing::ValuesIn(s_CustomConfig)));
+
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSpatialTransform_smoke,
+        ::testing::Combine(
+        ::testing::ValuesIn(s_SpatialTransformInputs),
+        ::testing::ValuesIn(s_SpatialTransformTheta),
+        ::testing::ValuesIn(s_CustomConfig)));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.hpp

index d50e170..2e838f7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_custom_test.hpp
@@ -2,7 +2,12 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
+#pragma once
+
  #include "myriad_layers_tests.hpp"
+#include <vector>
+#include <array>
+#include <algorithm>
  
  using namespace InferenceEngine;
  
@@ -67,6 +72,7 @@ static void refQuantize(const Blob::Ptr src,
      int32_t W = 1;
      int32_t H = 1;
      int32_t C = 1;
+
      get_dims(src, W, H, C);
  
      for (int c = 0; c < C; c++) {
@@ -75,6 +81,10 @@ static void refQuantize(const Blob::Ptr src,
          float olow  = PrecisionUtils::f16tof32(output_low->size()  == 1 ? output_low_data[0]  : output_low_data[c]);
          float ohigh = PrecisionUtils::f16tof32(output_high->size() == 1 ? output_high_data[0] : output_high_data[c]);
  
+        // emulate half math to be close to half float SHAVE implementation
+               float a = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((float)(levels - 1) / (ihigh - ilow)));
+               float b = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((ohigh - olow) / (float)(levels - 1)));
+
          for (int h = 0; h < H; h++) {
              for (int w = 0; w < W; w++) {
                  int idx = (isCHW) ? c*W*H + h*W + w : c + h*W*C + w*C;
@@ -85,9 +95,15 @@ static void refQuantize(const Blob::Ptr src,
                      dst_val = olow;
                  } else if (src_val > ihigh) {
                      dst_val = ohigh;
-                } else {
-                    dst_val = round((src_val - ilow) * ((float)(levels - 1) / (ihigh - ilow))) * ((ohigh - olow) / (float)(levels - 1))+ olow;
-                    //dst_val = round((src_val - ilow) / (ihigh - ilow) * (levels - 1)) / (levels - 1) * (ohigh - olow) + olow;
+                               } else {
+                       if(!(ihigh - ilow) || !(levels - 1)) {
+                                               dst_val = olow;
+                                       } else {
+                                               // quantization pass
+                                               float quantized = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16((src_val - ilow) * a));
+                                               // de-quantization pass
+                                               dst_val = PrecisionUtils::f16tof32(PrecisionUtils::f32tof16(roundf(quantized) * b)) + olow;
+                                       }
                  }
  
                  dst_data[idx] = PrecisionUtils::f32tof16(dst_val);
@@ -308,6 +324,290 @@ static void refExperimentalDetectronPriorGridGenerator(
          }
      }
  }
+
+static void rearrange(const ie_fp16* in, ie_fp16* out, int num, int channels, int width, int height,
+               int widthheight, int padding, int pwidthheight)
+{
+    (void) height;
+    (void) pwidthheight;
+
+    ASSERT_TRUE(num == 1) << "batch is not supported for Myriad";
+
+    for (int xy = 0; xy < widthheight; xy++)
+    {
+        for (int ch = 0; ch < channels; ch++)
+        {
+            ie_fp16 value = in[ch * widthheight + xy];
+
+            int xpad  = (xy % width + padding);
+            int ypad  = (xy / width + padding);
+            int xypad = ypad * (width + 2 * padding) + xpad;
+
+            out[xypad * channels + ch] = value;
+        }
+    }
+}
+
+static void correlate(int nthreads, int num, int topwidth, int topheight, int topchannels, int topcount,
+                      int max_displacement, int neighborhood_grid_radius, int neighborhood_grid_width,
+                      int kernel_radius, int kernel_size, int stride1, int stride2,
+                      int bottomwidth, int bottomheight, int bottomchannels,
+                      const ie_fp16* bottom0, const ie_fp16* bottom1, ie_fp16* top)
+{
+    (void) nthreads;
+    (void) kernel_radius;
+    (void) topcount;
+    (void) bottomheight;
+    (void) num;
+
+    const int sumelems = kernel_size * kernel_size * bottomchannels;
+
+    auto patch_data = std::vector<ie_fp16>(sumelems);
+
+    for (int blockIdx_y = 0; blockIdx_y < topheight; blockIdx_y++)
+    {
+        for (int blockIdx_x = 0; blockIdx_x < topwidth; blockIdx_x++)
+        {
+            int x1 = blockIdx_x * stride1 + max_displacement;
+            int y1 = blockIdx_y * stride1 + max_displacement;
+            // Load 3D patch into shared memory
+            for (int j = 0; j < kernel_size; j++)
+            {
+                for (int i = 0; i < kernel_size; i++)
+                {
+                    int idx1 = (      j  * kernel_size      + i) * bottomchannels;
+                    int idx2 = ((y1 + j) * bottomwidth + x1 + i) * bottomchannels;
+
+                    for (int ch = 0; ch < bottomchannels; ch++)
+                        patch_data[idx1 + ch] = bottom0[idx2 + ch];
+                }
+            }
+
+            for (int top_channel = 0; top_channel < topchannels; top_channel++)
+            {
+                int x2 = x1 + (top_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+                int y2 = y1 + (top_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
+
+                float sum = (0.0f);
+                for (int j = 0; j < kernel_size; j++)
+                {
+                    for (int i = 0; i < kernel_size; i++)
+                    {
+                        int idx1 = (      j  * kernel_size      + i) * bottomchannels;
+                        int idx2 = ((y2 + j) * bottomwidth + x2 + i) * bottomchannels;
+
+                        for (int ch = 0; ch < bottomchannels; ch++)
+                            sum += PrecisionUtils::f16tof32(patch_data[idx1 + ch]) * PrecisionUtils::f16tof32(bottom1[idx2 + ch]);
+                    }
+                }
+                top[top_channel * topheight * topwidth + blockIdx_y * topwidth + blockIdx_x]
+                    = PrecisionUtils::f32tof16(sum / (float)sumelems);
+            }
+        }
+    }
+}
+
+static void refCorrelate(const Blob::Ptr in0,
+                         const Blob::Ptr in1,
+                         Blob::Ptr out,
+                         int kernel_size, int max_displacement, int pad_size,
+                         int stride1, int stride2) {
+    // Correlation type = MULTIPLY
+    ASSERT_NE(in0, nullptr);
+    ASSERT_NE(in1, nullptr);
+    ASSERT_NE(out, nullptr);
+
+    const ie_fp16 *in0_data = in0->buffer();
+    const ie_fp16 *in1_data = in1->buffer();
+    ie_fp16 *out_data = out->buffer();
+    ASSERT_NE(in0_data, nullptr);
+    ASSERT_NE(in1_data, nullptr);
+    ASSERT_NE(out_data, nullptr);
+
+    int32_t IW0 = 1;
+    int32_t IH0 = 1;
+    int32_t IC0 = 1;
+    get_dims(in0, IW0, IH0, IC0);
+    int32_t IW1 = 1;
+    int32_t IH1 = 1;
+    int32_t IC1 = 1;
+    get_dims(in1, IW1, IH1, IC1);
+    ASSERT_EQ(IW0, IW1);
+    ASSERT_EQ(IH0, IH1);
+    ASSERT_EQ(IC0, IC1);
+
+    int32_t OW = 1;
+    int32_t OH = 1;
+    int32_t OC = 1;
+    get_dims(out, OW, OH, OC);
+
+    const int bottomchannels = IC0;
+
+    const int paddedbottomwidth  = IW0 + 2 * pad_size;
+    const int paddedbottomheight = IH0 + 2 * pad_size;
+
+    const int kernel_radius = kernel_size / 2; //size of unreachable border region (on each side)
+    const int border_size = max_displacement + kernel_radius; //size of unreachable border region (on each side)
+
+    const int top_width  = (int)ceilf((float)(paddedbottomwidth  - border_size * 2) / (float)stride1);
+    const int top_height = (int)ceilf((float)(paddedbottomheight - border_size * 2) / (float)stride1);
+
+    ASSERT_TRUE(top_width >= 1 && top_height >= 1)
+        << "Correlation cannot be done with current settings. Neighborhood and kernel don't fit in blob";
+
+    // Given a center position in image 1,
+    // how many displaced positions in -x / +x direction do we consider in image 2 (neighborhoodGridWidth):
+    const int neighborhood_grid_radius = max_displacement / stride2;
+    const int neighborhood_grid_width = 2 * neighborhood_grid_radius + 1;
+
+    const int top_channels = neighborhood_grid_width * neighborhood_grid_width;
+
+    ASSERT_TRUE(OC == top_channels && OH == top_height && OW == top_width)
+        << "input and output blobs have incompatible shapes";
+
+    auto rbot1 = std::vector<ie_fp16>(paddedbottomheight * paddedbottomwidth * bottomchannels);
+    auto rbot2 = std::vector<ie_fp16>(paddedbottomheight * paddedbottomwidth * bottomchannels);
+
+    const int bnum = 1;
+    const int topcount = top_width * top_height * top_channels;
+
+    const int pwidthheight = (IW0 + 2 * pad_size) * (IH0 + 2 * pad_size);
+
+    rearrange(in0_data, rbot1.data(), bnum, IC0, IW0, IH0, IW0 * IH0, pad_size, pwidthheight);
+    rearrange(in1_data, rbot2.data(), bnum, IC0, IW0, IH0, IW0 * IH0, pad_size, pwidthheight);
+
+    const int height = IH0 + 2 * pad_size;
+    const int width  = IW0  + 2 * pad_size;
+    correlate(topcount, bnum, top_width, top_height, top_channels, topcount,
+              max_displacement, neighborhood_grid_radius, neighborhood_grid_width,
+              kernel_radius, kernel_size, stride1, stride2, width, height, IC0,
+              rbot1.data(), rbot2.data(), out_data);
+}
+
+static float transform_forward_cpu(const ie_fp16* pic, const float px, const float py, int W, int H) {
+    float res = 0.0f;
+    float x = (px + 1) / 2 * H;
+    float y = (py + 1) / 2 * W;
+    int m, n, k, l;
+    float w;
+    k = (floorf(x));
+    l = (floorf(y));
+    m = floorf(x);
+    n = floorf(y);
+    w = 0;
+
+    if (k >= 0 && k < H && l >= 0 && l < W) {
+        w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+        res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+    }
+
+    k = (floorf(x) + 1);
+    l = (floorf(y));
+    m = floorf(x) + 1;
+    n = floorf(y);
+
+    w = 0;
+    if (k >= 0 && k < H && l >= 0 && l < W) {
+        w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+        res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+    }
+    k = (floorf(x));
+    l = (floorf(y) + 1);
+    m = floorf(x);
+    n = floorf(y) + 1;
+    w = 0;
+    if (k >= 0 && k < H && l >= 0 && l < W) {
+        w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+        res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+    }
+    k = (floorf(x) + 1);
+    l = (floorf(y) + 1);
+    m = floorf(x) + 1;
+    n = floorf(y) + 1;
+    w = 0;
+
+    if (k >= 0 && k < H && l >= 0 && l < W) {
+        w = fmaxf(0.0f, 1 - fabsf(x - m)) * fmaxf(0.0f, 1 - fabsf(y - n));
+        res += w * PrecisionUtils::f16tof32(pic[k * W + l]);
+    }
+
+    return PrecisionUtils::f32tof16(res);
+}
+
+static void matrixMult(const std::vector<float>& A, const std::vector<float>& B, std::vector<float>& C,
+        const int m, const int n, const int k, const int transposeB) {
+    if (transposeB) {
+        for (int rowA = 0; rowA < m; rowA++) {
+            for (int rowB = 0; rowB < n; rowB++) {
+                float sum = 0;
+                for (int colA = 0; colA < k; colA++) {
+                    sum += A[rowA * k + colA] * B[rowB * k + colA];
+                }
+                C[rowA * n + rowB] = sum;
+            }
+        }
+    } else {
+        for (int rowA = 0; rowA < m; rowA++) {
+            for (int colB = 0; colB < n; colB++) {
+                float sum = 0;
+                for (int colA = 0; colA < k; colA++) {
+                    sum += A[rowA * k + colA] * B[colA * n + colB];
+                }
+                C[rowA * n + colB] = sum;
+            }
+        }
+    }
+}
+
+static void refSpatialTransform(const Blob::Ptr& src, const Blob::Ptr& theta, Blob::Ptr dst) {
+    ASSERT_NE(src, nullptr);
+    ASSERT_NE(theta, nullptr);
+    ASSERT_NE(dst, nullptr);
+
+    const ie_fp16 *src_data = src->buffer();
+    const ie_fp16 *theta_data = theta->buffer();
+    ie_fp16 *dst_data = dst->buffer();
+    ASSERT_NE(src_data, nullptr);
+    ASSERT_NE(theta_data, nullptr);
+    ASSERT_NE(dst_data, nullptr);
+    ASSERT_EQ(theta->size(), 6);
+
+    int C = src->getTensorDesc().getDims()[1];
+    int H = src->getTensorDesc().getDims()[2];
+    int W = src->getTensorDesc().getDims()[3];
+
+    auto input_grid_data = std::vector<float>(2*H*W);
+    auto output_grid_data = std::vector<float>(3*H*W);
+    auto theta_float = std::vector<float>(6);
+    for (size_t i = 0; i < 6; i++) {
+        theta_float[i] = PrecisionUtils::f16tof32(theta_data[i]);
+    }
+
+    for (int i = 0; i < H * W; ++i) {
+        output_grid_data[3 * i] = ((i / W) * 1.0f / H * 2.0f - 1.0f);
+        output_grid_data[3 * i + 1] = ((i % W) * 1.0f / W * 2.0f - 1.0f);
+        output_grid_data[3 * i + 2] = 1.0f;
+    }
+    // Actually execute
+    int M_size = H * W;
+    int N_size = 2;
+    int K_size = 3;
+    matrixMult(output_grid_data, theta_float, input_grid_data, M_size, N_size, K_size, 1);
+    for (int j = 0; j < C; ++j) {
+        for (int s = 0; s < H; ++s) {
+            for (int t = 0; t < W; ++t) {
+                int row_idx = W * s + t;
+                float px = input_grid_data[row_idx * 2 + 0];
+                float py = input_grid_data[row_idx * 2 + 1];
+
+                size_t dst_offset = (j * H + s) * W + t;
+                size_t src_offset = (j * H + 0) * W + 0;
+                dst_data[dst_offset] = transform_forward_cpu(src_data + src_offset, px, py, W, H);
+            }
+        }
+    }
+}
+
  static std::vector<std::string> s_CustomConfig = {
  #ifdef VPU_HAS_CUSTOM_KERNELS
      getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
@@ -321,15 +621,26 @@ PRETTY_PARAM(Dilations, int)
  PRETTY_PARAM(Kernel, param_size)
  PRETTY_PARAM(Strides, int)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Group, std::string>> myriadLayersTestsShuffleChannel_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, std::string>> myriadLayersTestsQuantize_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, SwitchOut, std::string>> myriadLayersTestsQuantizeBinarize_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dilations, Group, Kernel, Strides, std::string>> myriadLayersTestsBinaryConvolution_nightly;
-typedef myriadLayerTestBaseWithParam<std::tuple<std::vector<size_t>, std::string>>
-myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Group, std::string>> myriadLayersTestsShuffleChannel_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, IRVersion, std::string>> myriadLayersTestsFakeQuantize_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Levels, SwitchOut, std::string>> myriadLayersTestsQuantizeBinarize_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Dilations, Group, Kernel, Strides, std::string>> myriadLayersTestsBinaryConvolution_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<std::vector<size_t>, std::string>> myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, std::array<float, 6>, std::string>> myriadLayersTestsSpatialTransform_smoke;
+
+struct CorrelateParams {
+    tensor_test_params dims;
+    int kernel_size;
+    int pad_size;
+    int max_displacement;
+    int stride1;
+    int stride2;
+};
  
-TEST_P(myriadLayersTestsShuffleChannel_nightly, ShuffleChannel) {
-    tensor_test_params dims  = std::get<0>(GetParam());
+typedef myriadLayerTestBaseWithParam<std::tuple<CorrelateParams, std::string>> myriadLayersTestsCorrelate_smoke;
+
+TEST_P(myriadLayersTestsShuffleChannel_smoke, ShuffleChannel) {
+    tensor_test_params dims = std::get<0>(GetParam());
      int group                = std::get<1>(GetParam());
      std::string customConfig = std::get<2>(GetParam());
  
@@ -363,49 +674,65 @@ static std::vector<Group> s_ShuffleChannelGroup = {
      2
  };
  
-TEST_P(myriadLayersTestsQuantize_nightly, Quantize) {
+TEST_P(myriadLayersTestsFakeQuantize_smoke, FakeQuantize) {
      tensor_test_params dims  = std::get<0>(GetParam());
      int levels               = std::get<1>(GetParam());
-    std::string customConfig = std::get<2>(GetParam());
+    _irVersion               = std::get<2>(GetParam());
+    std::string customConfig = std::get<3>(GetParam());
  
-    if(!customConfig.empty() && !CheckMyriadX()) {
-        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+    if (!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
      }
      _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    IN_OUT_desc inpt(5);
-    for (int i = 0; i < inpt.size(); ++i) {
-        inpt[i].resize(4);
-        inpt[i][0] = dims.n;
-        inpt[i][1] = 1;
-        inpt[i][2] = 1;
-        inpt[i][3] = 1;
-    }
-    inpt[0][1] = dims.c;
-    inpt[0][2] = dims.h;
-    inpt[0][3] = dims.w;
-    for (int i = 1; i < inpt.size(); ++i) {
-        if (rand()%2 > 0) {
-            inpt[i][1] = dims.c;
-        }
-    }
+    srand(42);
+
+    const auto inputFqSize = rand() % 2 ? 1 : dims.c;
+    const auto outputFqSize = rand() % 2 ? 1 : dims.c;
+
+    const auto inputDims = IN_OUT_desc{dims.asVector(),
+        {1, inputFqSize, 1, 1},
+        {1, inputFqSize, 1, 1},
+        {1, outputFqSize, 1, 1},
+        {1, outputFqSize, 1, 1}
+    };
  
-    SetInputTensors(inpt);
+    SetInputTensors(inputDims);
      SetOutputTensor(dims);
  
      std::map<std::string, std::string> params;
      params["levels"] = std::to_string(levels);
  
-    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("FakeQuantize").params(params)));
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+        LayerInitParams("FakeQuantize").params(params),
+        NetworkInitParams()
+             .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+             .lockLayout(true)));
+
+    auto inputBlobs = std::vector<Blob::Ptr>{};
+    inputBlobs.reserve(5);
+    for (const auto& inputBlob : _inputMap) {
+        inputBlobs.push_back(inputBlob.second);
+    }
  
-    ASSERT_TRUE(Infer());
+    const auto generateQuantBounds = [](const Blob::Ptr& lowBlob, const Blob::Ptr& highBlob) {
+        IE_ASSERT(lowBlob->size() == highBlob->size());
+        IE_ASSERT(lowBlob->getTensorDesc().getDims() == highBlob->getTensorDesc().getDims());
+
+        const auto lowBound = lowBlob->buffer().as<ie_fp16 *>();
+        const auto highBound = highBlob->buffer().as<ie_fp16 *>();
+        for (std::size_t i = 0; i < lowBlob->size(); i++) {
+               const float val1 = rand() % 256;
+               const float val2 = 255.0f - fabs(val1);
+               lowBound[i] = PrecisionUtils::f32tof16(std::min(val1, val2));
+               highBound[i] = PrecisionUtils::f32tof16(std::max(val1, val2));
+        }
+    };
  
-    std::vector<Blob::Ptr> inputBlobs(inpt.size());
-    auto inptIter = _inputMap.begin();
-    for (int i = 0; i < inpt.size(); i++) {
-        inputBlobs[i] = inptIter->second;
-        inptIter++;
-    }
+    generateQuantBounds(inputBlobs[1], inputBlobs[2]);
+    generateQuantBounds(inputBlobs[3], inputBlobs[4]);
+
+    ASSERT_TRUE(Infer());
  
      ASSERT_NO_FATAL_FAILURE(refQuantize(inputBlobs[0],
                                          inputBlobs[1],
@@ -413,12 +740,12 @@ TEST_P(myriadLayersTestsQuantize_nightly, Quantize) {
                                          inputBlobs[3],
                                          inputBlobs[4],
                                          _refBlob,
-                                        levels, false));
+                                        levels, true));
  
-    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.01f);
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 1.f);
  }
  
-TEST_P(myriadLayersTestsQuantizeBinarize_nightly, Quantize_Binarization) {
+TEST_P(myriadLayersTestsQuantizeBinarize_smoke, Quantize_Binarization) {
      std::string model = R"V0G0N(
         <net name="Quantize_Binarization" version="2" batch="1">
             <layers>
@@ -548,10 +875,10 @@ TEST_P(myriadLayersTestsQuantizeBinarize_nightly, Quantize_Binarization) {
      int OH = dims.h;
      int OW = dims.w;
  
-    int input_low_size = (rand()%2>0) ? dims.c : 1; 
-    int input_high_size = (levels == 2) ? input_low_size : ((rand()%2>0) ? dims.c : 1); 
-    int output_low_size = (rand()%2>0) ? dims.c : 1; 
-    int output_high_size = (levels == 2) ? output_low_size : ((rand()%2>0) ? dims.c : 1); 
+    int input_low_size = (rand()%2>0) ? dims.c : 1;
+    int input_high_size = (levels == 2) ? input_low_size : ((rand()%2>0) ? dims.c : 1);
+    int output_low_size = (rand()%2>0) ? dims.c : 1;
+    int output_high_size = (levels == 2) ? output_low_size : ((rand()%2>0) ? dims.c : 1);
  
      model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
      model.replace( model.find("@IB@"), sizeof("@IB@") -1, std::to_string(IB));
@@ -717,7 +1044,7 @@ static std::vector<SwitchOut> s_QuantizeSwitchOut = {
      1
  };
  
-TEST_P(myriadLayersTestsBinaryConvolution_nightly, BinaryConvolution) {
+TEST_P(myriadLayersTestsBinaryConvolution_smoke, BinaryConvolution) {
      tensor_test_params dims  = std::get<0>(GetParam());
      int dilations            = std::get<1>(GetParam());
      int group                = std::get<2>(GetParam());
@@ -791,7 +1118,7 @@ static std::vector<Strides> s_BinaryConvolutionStrides = {
      1, 2
  };
  
-TEST_P(myriadLayersTestsExperimentalDetectronPriorGridGenerator_nightly,
+TEST_P(myriadLayersTestsExperimentalDetectronPriorGridGenerator_smoke,
         ExperimentalDetectronPriorGridGenerator) {
  
      // Setup parameters and configuration.
@@ -852,3 +1179,137 @@ s_ExperimentalDetectronPriorGridGeneratorImageDims = {
      {1, 128, 30, 30}
  };
  
+TEST_P(myriadLayersTestsCorrelate_smoke, Correlate) {
+    const auto test = std::get<0>(GetParam());
+    const auto dims = test.dims;
+    const int kernel_size = test.kernel_size;
+    const int pad_size = test.pad_size;
+    const int max_displacement = test.max_displacement;
+    const int stride1 = test.stride1;
+    const int stride2 = test.stride2;
+    const std::string customConfig = std::get<1>(GetParam());
+
+    if(!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+    }
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
+
+    const int paddedbottomwidth  = dims.w + 2 * pad_size;
+    const int paddedbottomheight = dims.h + 2 * pad_size;
+
+    const int kernel_radius = kernel_size / 2; //size of unreachable border region (on each side)
+    const int border_size = max_displacement + kernel_radius; //size of unreachable border region (on each side)
+
+    const int neighborhood_grid_radius = max_displacement / stride2;
+    const int neighborhood_grid_width = 2 * neighborhood_grid_radius + 1;
+
+    const int top_width  = (int)ceilf((float) (paddedbottomwidth - border_size * 2) / (float) stride1);
+    const int top_height = (int)ceilf((float)(paddedbottomheight - border_size * 2) / (float)stride1);
+    const int top_channels = (test.max_displacement + 1) * (test.max_displacement + 1);// neighborhood_grid_width * neighborhood_grid_width;
+
+    const auto inputTensors = IN_OUT_desc{dims.asVector(), dims.asVector()};
+    const auto outputTensors = IN_OUT_desc{{1, (uint32_t)top_channels, (uint32_t)top_height, (uint32_t)top_width}};
+
+    SetInputTensors(inputTensors);
+    SetOutputTensors(outputTensors);
+
+    std::map<std::string, std::string> params = {
+        {"top_width", std::to_string(top_width)},
+        {"top_height", std::to_string(top_height)},
+        {"width", std::to_string(dims.w)},
+        {"height", std::to_string(dims.h)},
+        {"channels", std::to_string(dims.c)},
+        {"displacement", std::to_string(max_displacement)},
+        {"pad", std::to_string(pad_size)},
+        {"neighborhood_grid_radius", std::to_string(neighborhood_grid_radius)},
+        {"neighborhood_grid_width", std::to_string(neighborhood_grid_width)},
+        {"kernel_size", std::to_string(kernel_size)},
+        {"stride", std::to_string(stride1) + "," + std::to_string(stride2)},
+    };
+
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+                LayerInitParams("Correlate").params(params),
+                NetworkInitParams()
+                .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+                .lockLayout(true)));
+
+    std::vector<Blob::Ptr> input_blobs{};
+    input_blobs.reserve(_inputMap.size());
+    for (auto& input : _inputMap) {
+        // generate input data
+        for (int i = 0; i < dims.c * dims.h * dims. w; i++) {
+            const float corr_min = -1.744443f;
+            const float corr_max = 11.167725f;
+            float val = (corr_min + (float) rand() / ((float) RAND_MAX / (corr_max - corr_min + 1.f) + 1.f));
+
+            auto buf = input.second->buffer().as<ie_fp16*>();
+            buf[i] = PrecisionUtils::f32tof16(val);
+        }
+
+        input_blobs.push_back(input.second);
+    }
+    const int output_size = top_width * top_height * top_channels;
+    for (int i = 0; i < output_size; i++) {
+        _outputMap.begin()->second->buffer().as<ie_fp16*>()[i] = 0;
+        _refBlob->buffer().as<ie_fp16*>()[i] = 0;
+    }
+
+    ASSERT_TRUE(Infer());
+
+    refCorrelate(input_blobs[0], input_blobs[1], _refBlob, kernel_size, max_displacement, pad_size, stride1, stride2);
+
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.1f);
+}
+
+static const std::vector<CorrelateParams> s_CorrelateParams = {
+    { {1, 64, 48, 64}, 1, 8, 8, 1, 2 },
+    { {1, 127, 12, 64}, 3, 8, 8, 1, 2 },
+    { {1, 256, 48, 64}, 1, 20, 20, 1, 2 }
+};
+
+TEST_P(myriadLayersTestsSpatialTransform_smoke, SpatialTransform) {
+    const tensor_test_params dims = std::get<0>(GetParam());
+    const std::array<float, 6> theta = std::get<1>(GetParam());
+    const std::string customConfig = std::get<2>(GetParam());
+
+    if(!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+    }
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
+
+    SetInputTensors({dims.asVector(), {1, 1, 2, 3}});
+    SetOutputTensor(dims);
+
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(
+                LayerInitParams("SpatialTransform"),
+                NetworkInitParams()
+                    .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+                    .lockLayout(true)));
+
+    auto theta_half = std::next(_inputMap.begin())->second;
+    for (int i = 0; i < 6; i++) {
+        theta_half->buffer().as<ie_fp16*>()[i] = PrecisionUtils::f32tof16(theta[i]);
+    }
+
+    ASSERT_TRUE(Infer());
+
+    ASSERT_NO_FATAL_FAILURE(refSpatialTransform(_inputMap.begin()->second,
+                            std::next(_inputMap.begin())->second,
+                            _refBlob));
+
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0.001f);
+}
+
+static const std::vector<Dims> s_SpatialTransformInputs = {
+       {{ 1, 3,  24,  94 }},
+       {{ 1, 3,  96, 188 }},
+       {{ 1, 3,  97, 189 }},
+       {{ 1, 3,  98, 190 }},
+       {{ 1, 3, 384, 512 }},
+       {{ 1, 3,  24, 640 }},
+};
+
+static const std::vector<std::array<float, 6>> s_SpatialTransformTheta = {
+       {1.2f, 0.2f, -0.2f, 0.2f, 1.2f, -0.2f},
+       {1.f, 0.f, 0.f, 0.0f, 1.f, 0.f}
+};
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.cpp

index 9a4703a..b1f42b1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_deconvolution_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv, myriadLayerDeconvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 6, 5, 6))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 1), MAKE_STRUCT(param_size, 3, 3))
@@ -17,7 +17,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv, myriadLayerDeconvolution_nightl
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv_2, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv_2, myriadLayerDeconvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 2, 256, 14, 14))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2), MAKE_STRUCT(param_size, 3, 3))
@@ -30,7 +30,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_deconv_to_conv_2, myriadLayerDeconvolution_nigh
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerDeconvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 384, 4, 2))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2)
@@ -47,7 +47,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_group, myriadLayerDeconvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_depthDeconv, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_depthDeconv, myriadLayerDeconvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 384, 4, 2))
            , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 2, 2)
@@ -84,7 +84,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_asymm_pad,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_smoke,
          ::testing::Combine(
              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 2, 37, 59)
                                         , MAKE_STRUCT(tensor_test_params, 1, 21, 16, 16)
@@ -109,7 +109,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerDeconvolution_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(extra3x3s1, myriadLayerDeconvolution_nightly,
+INSTANTIATE_TEST_CASE_P(extra3x3s1, myriadLayerDeconvolution_smoke,
                          ::testing::Combine(
                                  ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 256, 1, 1))
                                , ::testing::Values<kernel>(MAKE_STRUCT(param_size, 3, 3))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.hpp

index 87ebb68..7c3af16 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_deconvolution_test.hpp
@@ -40,12 +40,12 @@ PRETTY_PARAM(layoutPreference, vpu::LayoutPreference)
  PRETTY_PARAM(hw_optimization, bool)
  
  typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad
-        , out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_nightly;
+        , out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_smoke;
  
  typedef myriadLayerTestBaseWithParam<tuple<DimsInput, kernel, stride, pad, pad_end
          , out_channels, group, layoutPreference, hw_optimization >> myriadLayerDeconvolution_asymm_pad;
  
-TEST_P(myriadLayerDeconvolution_nightly, Deconvolution) {
+TEST_P(myriadLayerDeconvolution_smoke, Deconvolution) {
      tensor_test_params input_dims = get<0>(GetParam());
      param_size kernel = get<1>(GetParam());
      param_size stride = get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_detection_output_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_detection_output_test.cpp

index 4da2d75..50c2545 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_detection_output_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_detection_output_test.cpp
@@ -838,7 +838,7 @@ void checkDetectionObjectArrays(std::vector<DetectionObject> gold, std::vector<D
  
  }
  
-class myriadDetectionOutputTests_nightly : public myriadLayersTests_nightly {
+class myriadDetectionOutputTests_smoke : public myriadLayersTests_nightly {
  public:
      std::vector<float> gen_locations;
      std::vector<float> gen_confidence;
@@ -995,7 +995,7 @@ public:
      }
  };
  
-TEST_F(myriadDetectionOutputTests_nightly, NoConst) {
+TEST_F(myriadDetectionOutputTests_smoke, NoConst) {
      ASSERT_NO_FATAL_FAILURE(PrepareInput());
      ASSERT_NO_FATAL_FAILURE(CalcRefOutput(false));
  
@@ -1053,7 +1053,7 @@ TEST_F(myriadDetectionOutputTests_nightly, NoConst) {
      CheckResults();
  }
  
-TEST_F(myriadDetectionOutputTests_nightly, MxNet) {
+TEST_F(myriadDetectionOutputTests_smoke, MxNet) {
      ASSERT_NO_FATAL_FAILURE(PrepareInput());
      ASSERT_NO_FATAL_FAILURE(CalcRefOutput(true));
  
@@ -1109,7 +1109,7 @@ TEST_F(myriadDetectionOutputTests_nightly, MxNet) {
      CheckResults();
  }
  
-TEST_F(myriadDetectionOutputTests_nightly, WithConst) {
+TEST_F(myriadDetectionOutputTests_smoke, WithConst) {
      ASSERT_NO_FATAL_FAILURE(PrepareInput());
      ASSERT_NO_FATAL_FAILURE(CalcRefOutput(false));
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.cpp

index 467dd42..62031c6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.cpp
@@ -4,168 +4,168 @@
  
  #include "myriad_layers_eltwise_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMax_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSum_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSum_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSub_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSub_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMul_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMul_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithCoeff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithCoeff_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithBroadcast_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSumWithBroadcast_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::Values<int>(4))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithCoeff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithCoeff_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithBroadcast_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSubWithBroadcast_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::Values<int>(4))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseDiv_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseDiv_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMin_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMin_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSqDiff_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseSqDiff_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwisePow_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwisePow_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseFloorMod_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseFloorMod_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseEqual_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseNotEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseNotEqual_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreater_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreater_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreaterEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseGreaterEqual_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLess_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLess_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLessEqual_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLessEqual_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalNot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalNot_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyOneInput),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalAnd_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalAnd_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalOr_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalOr_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalXor_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseLogicalXor_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseInputs),
          ::testing::ValuesIn(s_eltwiseDims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMean_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsEltwiseMean_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseOnlyTwoInputs),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.hpp

index ae216bb..c420120 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_eltwise_test.hpp
@@ -375,51 +375,51 @@ protected:
      }
  };
  
-class myriadTestsEltwiseMax_nightly: public EltwiseTest<ELTWISE_MAX>
+class myriadTestsEltwiseMax_smoke: public EltwiseTest<ELTWISE_MAX>
  {
  };
  
-class myriadTestsEltwiseSum_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSum_smoke: public EltwiseTest<ELTWISE_SUM>
  {
  };
  
-class myriadTestsEltwiseSub_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSub_smoke: public EltwiseTest<ELTWISE_SUB>
  {
  };
  
-class myriadTestsEltwiseMul_nightly: public EltwiseTest<ELTWISE_MUL>
+class myriadTestsEltwiseMul_smoke: public EltwiseTest<ELTWISE_MUL>
  {
  };
  
-class myriadTestsEltwiseSumWithCoeff_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSumWithCoeff_smoke: public EltwiseTest<ELTWISE_SUM>
  {
  };
  
-class myriadTestsEltwiseSubWithCoeff_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSubWithCoeff_smoke: public EltwiseTest<ELTWISE_SUB>
  {
  };
  
-class myriadTestsEltwiseSumWithBroadcast_nightly: public EltwiseTest<ELTWISE_SUM>
+class myriadTestsEltwiseSumWithBroadcast_smoke: public EltwiseTest<ELTWISE_SUM>
  {
  };
  
-class myriadTestsEltwiseSubWithBroadcast_nightly: public EltwiseTest<ELTWISE_SUB>
+class myriadTestsEltwiseSubWithBroadcast_smoke: public EltwiseTest<ELTWISE_SUB>
  {
  };
  
-class myriadTestsEltwiseDiv_nightly: public EltwiseTest<ELTWISE_DIV>
+class myriadTestsEltwiseDiv_smoke: public EltwiseTest<ELTWISE_DIV>
  {
  };
  
-class myriadTestsEltwiseMin_nightly: public EltwiseTest<ELTWISE_MIN>
+class myriadTestsEltwiseMin_smoke: public EltwiseTest<ELTWISE_MIN>
  {
  };
  
-class myriadTestsEltwiseSqDiff_nightly: public EltwiseTest<ELTWISE_SQDIFF>
+class myriadTestsEltwiseSqDiff_smoke: public EltwiseTest<ELTWISE_SQDIFF>
  {
  };
  
-class myriadTestsEltwisePow_nightly: public EltwiseTest<ELTWISE_POW>
+class myriadTestsEltwisePow_smoke: public EltwiseTest<ELTWISE_POW>
  {
      void SetUp() override {
          EltwiseTest::SetUp();
@@ -427,35 +427,35 @@ class myriadTestsEltwisePow_nightly: public EltwiseTest<ELTWISE_POW>
      }
  };
  
-class myriadTestsEltwiseFloorMod_nightly: public EltwiseTest<ELTWISE_FLOOR_MOD>
+class myriadTestsEltwiseFloorMod_smoke: public EltwiseTest<ELTWISE_FLOOR_MOD>
  {
  };
  
-class myriadTestsEltwiseEqual_nightly: public EltwiseTest<ELTWISE_EQUAL>
+class myriadTestsEltwiseEqual_smoke: public EltwiseTest<ELTWISE_EQUAL>
  {
  };
  
-class myriadTestsEltwiseNotEqual_nightly: public EltwiseTest<ELTWISE_NOT_EQUAL>
+class myriadTestsEltwiseNotEqual_smoke: public EltwiseTest<ELTWISE_NOT_EQUAL>
  {
  };
  
-class myriadTestsEltwiseGreater_nightly: public EltwiseTest<ELTWISE_GREATER>
+class myriadTestsEltwiseGreater_smoke: public EltwiseTest<ELTWISE_GREATER>
  {
  };
  
-class myriadTestsEltwiseGreaterEqual_nightly: public EltwiseTest<ELTWISE_GREATER_EQUAL>
+class myriadTestsEltwiseGreaterEqual_smoke: public EltwiseTest<ELTWISE_GREATER_EQUAL>
  {
  };
  
-class myriadTestsEltwiseLess_nightly: public EltwiseTest<ELTWISE_LESS>
+class myriadTestsEltwiseLess_smoke: public EltwiseTest<ELTWISE_LESS>
  {
  };
  
-class myriadTestsEltwiseLessEqual_nightly: public EltwiseTest<ELTWISE_LESS_EQUAL>
+class myriadTestsEltwiseLessEqual_smoke: public EltwiseTest<ELTWISE_LESS_EQUAL>
  {
  };
  
-class myriadTestsEltwiseLogicalNot_nightly: public EltwiseTest<ELTWISE_LOGICAL_NOT>
+class myriadTestsEltwiseLogicalNot_smoke: public EltwiseTest<ELTWISE_LOGICAL_NOT>
  {
      void SetUp() override {
          EltwiseTest::SetUp();
@@ -463,7 +463,7 @@ class myriadTestsEltwiseLogicalNot_nightly: public EltwiseTest<ELTWISE_LOGICAL_N
      }
  };
  
-class myriadTestsEltwiseLogicalAnd_nightly: public EltwiseTest<ELTWISE_LOGICAL_AND>
+class myriadTestsEltwiseLogicalAnd_smoke: public EltwiseTest<ELTWISE_LOGICAL_AND>
  {
      void SetUp() override {
          EltwiseTest::SetUp();
@@ -471,7 +471,7 @@ class myriadTestsEltwiseLogicalAnd_nightly: public EltwiseTest<ELTWISE_LOGICAL_A
      }
  };
  
-class myriadTestsEltwiseLogicalOr_nightly: public EltwiseTest<ELTWISE_LOGICAL_OR>
+class myriadTestsEltwiseLogicalOr_smoke: public EltwiseTest<ELTWISE_LOGICAL_OR>
  {
      void SetUp() override {
          EltwiseTest::SetUp();
@@ -479,7 +479,7 @@ class myriadTestsEltwiseLogicalOr_nightly: public EltwiseTest<ELTWISE_LOGICAL_OR
      }
  };
  
-class myriadTestsEltwiseLogicalXor_nightly: public EltwiseTest<ELTWISE_LOGICAL_XOR>
+class myriadTestsEltwiseLogicalXor_smoke: public EltwiseTest<ELTWISE_LOGICAL_XOR>
  {
      void SetUp() override {
          EltwiseTest::SetUp();
@@ -487,126 +487,126 @@ class myriadTestsEltwiseLogicalXor_nightly: public EltwiseTest<ELTWISE_LOGICAL_X
      }
  };
  
-class myriadTestsEltwiseMean_nightly: public EltwiseTest<ELTWISE_MEAN>
+class myriadTestsEltwiseMean_smoke: public EltwiseTest<ELTWISE_MEAN>
  {
  };
  
-TEST_P(myriadTestsEltwiseMax_nightly, Max)
+TEST_P(myriadTestsEltwiseMax_smoke, Max)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseSum_nightly, Sum)
+TEST_P(myriadTestsEltwiseSum_smoke, Sum)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseSub_nightly, Sub)
+TEST_P(myriadTestsEltwiseSub_smoke, Sub)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseMul_nightly, Mul)
+TEST_P(myriadTestsEltwiseMul_smoke, Mul)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseSumWithCoeff_nightly, Sum)
+TEST_P(myriadTestsEltwiseSumWithCoeff_smoke, Sum)
  {
      InitBody(true);
  }
  
-TEST_P(myriadTestsEltwiseSubWithCoeff_nightly, Sub)
+TEST_P(myriadTestsEltwiseSubWithCoeff_smoke, Sub)
  {
      InitBody(true);
  }
  
-TEST_P(myriadTestsEltwiseSumWithBroadcast_nightly, Sum)
+TEST_P(myriadTestsEltwiseSumWithBroadcast_smoke, Sum)
  {
      InitBody(false, true);
  }
  
-TEST_P(myriadTestsEltwiseSubWithBroadcast_nightly, Sub)
+TEST_P(myriadTestsEltwiseSubWithBroadcast_smoke, Sub)
  {
      InitBody(false, true);
  }
  
-TEST_P(myriadTestsEltwiseDiv_nightly, Div)
+TEST_P(myriadTestsEltwiseDiv_smoke, Div)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseMin_nightly, Min)
+TEST_P(myriadTestsEltwiseMin_smoke, Min)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseSqDiff_nightly, SqDiff)
+TEST_P(myriadTestsEltwiseSqDiff_smoke, SqDiff)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwisePow_nightly, Pow)
+TEST_P(myriadTestsEltwisePow_smoke, Pow)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseFloorMod_nightly, FloorMod)
+TEST_P(myriadTestsEltwiseFloorMod_smoke, FloorMod)
  {
      InitBody();
  }
  
-TEST_P(myriadTestsEltwiseEqual_nightly, Equal)
+TEST_P(myriadTestsEltwiseEqual_smoke, Equal)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseNotEqual_nightly, NotEqual)
+TEST_P(myriadTestsEltwiseNotEqual_smoke, NotEqual)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseGreater_nightly, Greater)
+TEST_P(myriadTestsEltwiseGreater_smoke, Greater)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseGreaterEqual_nightly, GreaterEqual)
+TEST_P(myriadTestsEltwiseGreaterEqual_smoke, GreaterEqual)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLess_nightly, Less)
+TEST_P(myriadTestsEltwiseLess_smoke, Less)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLessEqual_nightly, LessEqual)
+TEST_P(myriadTestsEltwiseLessEqual_smoke, LessEqual)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLogicalNot_nightly, LogicalNot)
+TEST_P(myriadTestsEltwiseLogicalNot_smoke, LogicalNot)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLogicalAnd_nightly, LogicalAnd)
+TEST_P(myriadTestsEltwiseLogicalAnd_smoke, LogicalAnd)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLogicalOr_nightly, LogicalOr)
+TEST_P(myriadTestsEltwiseLogicalOr_smoke, LogicalOr)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseLogicalXor_nightly, LogicalXor)
+TEST_P(myriadTestsEltwiseLogicalXor_smoke, LogicalXor)
  {
      InitBody(false, false, true);
  }
  
-TEST_P(myriadTestsEltwiseMean_nightly, Mean)
+TEST_P(myriadTestsEltwiseMean_smoke, Mean)
  {
      InitBody();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.cpp

index c40101b..bd7f236 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_elu_test.hpp"
  
-INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsELUParams,
+INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsELUParams_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_powerTensors),
          ::testing::ValuesIn(s_powerParams))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.hpp

index 9a80695..83b7e4e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_elu_test.hpp
@@ -29,9 +29,9 @@ void gen_ref_elu(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, alpha>> myriadLayersTestsELUParams;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, alpha>> myriadLayersTestsELUParams_smoke;
  
-TEST_P(myriadLayersTestsELUParams, TestsELU) {
+TEST_P(myriadLayersTestsELUParams_smoke, TestsELU) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      auto param = GetParam();
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.cpp

index 9034960..8f5ef6a 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.cpp
@@ -5,5 +5,5 @@
  #include "myriad_layers_erf_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsErf_nightly,
+        accuracy, myriadLayersTestsErf_smoke,
          ::testing::ValuesIn(s_ErfDims));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.hpp

index d998674..07de335 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_erf_test.hpp
@@ -26,12 +26,12 @@ void ref_erf(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-class myriadLayersTestsErf_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsErf_smoke: public myriadLayersTests_nightly,
                                      public testing::WithParamInterface<SizeVector> {
  public:
  };
  
-TEST_P(myriadLayersTestsErf_nightly, TestsErf)
+TEST_P(myriadLayersTestsErf_smoke, TestsErf)
  {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto p = ::testing::WithParamInterface<SizeVector>::GetParam();
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.cpp

index d0b9287..969ffdc 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.cpp
@@ -18,7 +18,7 @@ static const std::vector<ExpDetectionOutputParams> s_layerParams_list =
      {{ 10.0, 10.0, 5.0, 5.0 }, 4.135166645050049, 0.5, 0.05, _MaxDetections, _NumClasses, 2000, 0 },
  };
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsExpDetectionOutput_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsExpDetectionOutput_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_sizeParams_list),
          ::testing::ValuesIn(s_layerParams_list))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.hpp

index 12eb048..5d168f6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_detectionoutput_test.hpp
@@ -381,11 +381,11 @@ protected:
          }
  };
  
-class myriadTestsExpDetectionOutput_nightly: public ExpDetectionOutputTest
+class myriadTestsExpDetectionOutput_smoke: public ExpDetectionOutputTest
  {
  };
  
-TEST_P(myriadTestsExpDetectionOutput_nightly, ExpDetectionOutput)
+TEST_P(myriadTestsExpDetectionOutput_smoke, ExpDetectionOutput)
  {
      testExpDetectionOutput();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals.cpp

index 546e74e..bedf24f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_exp_generateproposals_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpGenerateProposals_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpGenerateProposals_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ExpGenerateProposalsLayerScores),
          ::testing::ValuesIn(s_ExpGenerateProposalsLayerImInfo),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals_test.hpp

index 976b445..433caca 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_generateproposals_test.hpp
@@ -30,7 +30,7 @@ struct GenerateProposalsParam {
  
  using ExpGenerateProposalsTestParams = std::tuple<Dims, std::vector<int>, GenerateProposalsParam>;
  
-typedef myriadLayerTestBaseWithParam<ExpGenerateProposalsTestParams> myriadLayersTestsExpGenerateProposals_nightly;
+typedef myriadLayerTestBaseWithParam<ExpGenerateProposalsTestParams> myriadLayersTestsExpGenerateProposals_smoke;
  
  static void genInputs(InferenceEngine::BlobMap inputMap,
                        const int numProposals,
@@ -140,7 +140,7 @@ static void genInputs(InferenceEngine::BlobMap inputMap,
      inputIMinfo[1] = PrecisionUtils::f32tof16( (float) imgW );
  }
  
-TEST_P(myriadLayersTestsExpGenerateProposals_nightly, ExpGenerateProposals) {
+TEST_P(myriadLayersTestsExpGenerateProposals_smoke, ExpGenerateProposals) {
      tensor_test_params scoresDims = std::get<0>(GetParam());
      std::vector<int> im_info = std::get<1>(GetParam());
      GenerateProposalsParam opParams = std::get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.cpp

index 12283f0..f0aa7bb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_exp_priorgridgenerator_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpPriorGridGenerator_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpPriorGridGenerator_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ExpPriorGridGeneratorLayerInputs),
          ::testing::ValuesIn(s_ExpPriorGridGeneratorLayerParam))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.hpp

index 780b614..18177d8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_priorgridgenerator_test.hpp
@@ -42,7 +42,7 @@ struct InputDims {
  
  using ExpPriorGridGeneratorTestParams = std::tuple<InputDims, PriorGridGeneratorParam>;
  
-typedef myriadLayerTestBaseWithParam<ExpPriorGridGeneratorTestParams> myriadLayersTestsExpPriorGridGenerator_nightly;
+typedef myriadLayerTestBaseWithParam<ExpPriorGridGeneratorTestParams> myriadLayersTestsExpPriorGridGenerator_smoke;
  
  static void genPriors(InferenceEngine::Blob::Ptr rois,
                      const tensor_test_params& params,
@@ -64,7 +64,7 @@ static void genPriors(InferenceEngine::Blob::Ptr rois,
      }
  }
  
-TEST_P(myriadLayersTestsExpPriorGridGenerator_nightly, ExpPriorGridGenerator) {
+TEST_P(myriadLayersTestsExpPriorGridGenerator_smoke, ExpPriorGridGenerator) {
      InputDims inputTensorsDims = std::get<0>(GetParam());
      PriorGridGeneratorParam opParams = std::get<1>(GetParam());
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.cpp

index 71a3814..f5ce39b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.cpp
@@ -5,5 +5,5 @@
  #include "myriad_layers_exp_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsExp_nightly,
+        accuracy, myriadLayersTestsExp_smoke,
          ::testing::ValuesIn(s_expParams));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.hpp

index 1a95aaf..c8ea03e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_test.hpp
@@ -11,10 +11,10 @@
  
  using namespace InferenceEngine;
  
-class myriadLayersTestsExp_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsExp_smoke: public myriadLayersTests_nightly,
                                      public testing::WithParamInterface<Dims> {};
  
-TEST_P(myriadLayersTestsExp_nightly, TestsExp)
+TEST_P(myriadLayersTestsExp_smoke, TestsExp)
  {
      auto p = ::testing::WithParamInterface<Dims>::GetParam();
      SetInputTensor(p);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.cpp

index 80793c6..bd8a826 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_exp_topkrois_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpTopKROIs_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsExpTopKROIs_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ExpTopKROIsInputRoisNum),
          ::testing::ValuesIn(s_ExpTopKROIsMaxRoisNum))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.hpp

index f00be24..a220de3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_exp_topkrois_test.hpp
@@ -22,7 +22,7 @@ struct TopKROIsParam {
  
  using ExpTopKROIsTestParams = std::tuple<int, TopKROIsParam>;
  
-typedef myriadLayerTestBaseWithParam<ExpTopKROIsTestParams> myriadLayersTestsExpTopKROIs_nightly;
+typedef myriadLayerTestBaseWithParam<ExpTopKROIsTestParams> myriadLayersTestsExpTopKROIs_smoke;
  
  static void genInputs(InferenceEngine::BlobMap inputMap) {
      const std::string INPUT_ROIS    = "input0";
@@ -101,7 +101,7 @@ static void genInputs(InferenceEngine::BlobMap inputMap) {
      }
  }
  
-TEST_P(myriadLayersTestsExpTopKROIs_nightly, ExpTopKROIs) {
+TEST_P(myriadLayersTestsExpTopKROIs_smoke, ExpTopKROIs) {
      int inputRoisNum = std::get<0>(GetParam());
      TopKROIsParam opParams = std::get<1>(GetParam());
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.cpp

index 261a682..c98d36f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_flatten_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFlatten_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFlatten_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_flattenTensors),
          ::testing::ValuesIn(s_flattenAxis)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.hpp

index 833bf87..7dde010 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_flatten_test.hpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_tests.hpp"
  
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, int32_t>> myriadLayersTestsFlatten_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, int32_t>> myriadLayersTestsFlatten_smoke;
  
  static void ref_flatten(const InferenceEngine::Blob::Ptr src,
                          InferenceEngine::Blob::Ptr dst) {
@@ -53,7 +53,7 @@ static void ref_flatten(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-TEST_P(myriadLayersTestsFlatten_nightly, Flatten) {
+TEST_P(myriadLayersTestsFlatten_smoke, Flatten) {
      auto input = std::get<0>(GetParam());
      int32_t axis_val = std::get<1>(GetParam());
      IN_OUT_desc input_tensor;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.cpp

index 9342ee0..351dae0 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.cpp
@@ -5,5 +5,5 @@
  #include "myriad_layers_floor_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsFloor_nightly,
+        accuracy, myriadLayersTestsFloor_smoke,
          ::testing::ValuesIn(s_FloorParams));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.hpp

index a086ac9..8b52f4c 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_floor_test.hpp
@@ -25,12 +25,12 @@ void ref_floor(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-class myriadLayersTestsFloor_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFloor_smoke: public myriadLayersTests_nightly,
                                      public testing::WithParamInterface<Dims> {
  public:
  };
  
-TEST_P(myriadLayersTestsFloor_nightly, TestsFloor)
+TEST_P(myriadLayersTestsFloor_smoke, TestsFloor)
  {
      auto p = ::testing::WithParamInterface<Dims>::GetParam();
      SetInputTensor(p);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.cpp

index fa2f928..31e8a73 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.cpp
@@ -4,18 +4,18 @@
  
  #include "myriad_layers_fully_connected_tests.hpp"
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsFullyConnected_nightly,
+        accuracy, myriadLayersTestsFullyConnected_smoke,
          ::testing::ValuesIn(s_fcTestParams)
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedBatch_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedBatch_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_fcTestBatchParams)
            , ::testing::ValuesIn(s_fcTestBatchOutSizes)
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedPVA_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsFullyConnectedPVA_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_fcTestPVAParams)
            , ::testing::ValuesIn(s_fcTestPVAOutSizes)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.hpp

index 791e1a8..b5c9ef5 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_fully_connected_tests.hpp
@@ -7,16 +7,16 @@
  
  using namespace InferenceEngine;
  
-class myriadLayersTestsFullyConnected_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnected_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<fcon_test_params> {
  };
  
  typedef std::tuple<InferenceEngine::SizeVector, uint32_t> IR3_FC_params;
-class myriadLayersTestsFullyConnectedBatch_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnectedBatch_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<IR3_FC_params> {
  };
  
-TEST_P(myriadLayersTestsFullyConnected_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnected_smoke, TestsFullyConnected)
  {
      fcon_test_params p = ::testing::WithParamInterface<fcon_test_params>::GetParam();
  
@@ -83,7 +83,7 @@ static void genTestData1(InferenceEngine::Blob::Ptr blob) {
  }
  
  
-TEST_P(myriadLayersTestsFullyConnectedBatch_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnectedBatch_smoke, TestsFullyConnected)
  {
      auto p = ::testing::WithParamInterface<IR3_FC_params>::GetParam();
      auto input_tensor = std::get<0>(p);
@@ -117,11 +117,11 @@ TEST_P(myriadLayersTestsFullyConnectedBatch_nightly, TestsFullyConnected)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.02);
  }
  
-class myriadLayersTestsFullyConnectedPVA_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsFullyConnectedPVA_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<IR3_FC_params> {
  };
  
-TEST_P(myriadLayersTestsFullyConnectedPVA_nightly, TestsFullyConnected)
+TEST_P(myriadLayersTestsFullyConnectedPVA_smoke, TestsFullyConnected)
  {
      auto p = ::testing::WithParamInterface<IR3_FC_params>::GetParam();
      auto input_tensor = std::get<0>(p);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.cpp

index 251c65d..e860557 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.cpp
@@ -6,7 +6,7 @@
  
  using namespace testing;
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGather_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGather_smoke,
                          Values(GatherTestParams { {36549, 1024},   {16},           0, "FP16" },
                                 GatherTestParams { {10},            {10},           0, "FP16" },
                                 GatherTestParams { {36549, 1024},   {10},           0, "FP16" },
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.hpp

index 868858f..512fda7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gather_test.hpp
@@ -24,7 +24,7 @@ using GatherTestParams = std::tuple<InputShape,
                                      Axis,
                                      Type>;
  
-class myriadLayerGather_nightly :
+class myriadLayerGather_smoke :
      public myriadLayerTestBaseWithParam<GatherTestParams> {
  protected:
  
@@ -305,6 +305,6 @@ private:
      }
  };
  
-TEST_P(myriadLayerGather_nightly, Gather) {
+TEST_P(myriadLayerGather_smoke, Gather) {
      testGather();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.cpp

index 8baa645..2f0aafd 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_gemm_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGEMM,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerGEMM_smoke,
          ::testing::Combine(
          ::testing::Values<gemm_parameters>(
                  MAKE_STRUCT(gemm_parameters, 4.7f, 2.3f, 5,  7,   11,  1, 2,   3, 4,  5, 6,  7, 8),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.hpp

index a9ed573..65380ed 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_gemm_test.hpp
@@ -103,9 +103,9 @@ static void gemm_ref(int M, int N, int K,
      }
  }
  
-typedef myriadLayerTestBaseWithParam<tuple<gemm_parameters, layoutPreference, hasThreeInputs, transposeA, transposeB>> myriadLayerGEMM;
+typedef myriadLayerTestBaseWithParam<tuple<gemm_parameters, layoutPreference, hasThreeInputs, transposeA, transposeB>> myriadLayerGEMM_smoke;
  
-TEST_P(myriadLayerGEMM, GEMM) {
+TEST_P(myriadLayerGEMM_smoke, GEMM) {
      gemm_parameters gemm_parameter = get<0>(GetParam());
      auto layoutPreference = get<1>(GetParam());
      auto hasThreeInputs = get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.cpp

index 7a004ed..3d2f15d 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.cpp
@@ -4,96 +4,11 @@
  
  #include "myriad_layers_grn_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGRN_nightly,
-        ::testing::Combine(
-        ::testing::ValuesIn(s_GRNTensors),
-        ::testing::ValuesIn(s_GRN_bias),
-        ::testing::ValuesIn(s_MVNCustomConfig)));
-
-
-TEST_F(myriadLayersTests_nightly, GRN_CHW_Input)
-{
-    std::string model = R"V0G0N(
-        <net name="GRN" version="2" batch="1">
-            <layers>
-                <layer name="data" type="Input" precision="FP16" id="1">
-                    <output>
-                        <port id="1">
-                            <dim>1</dim>
-                            <dim>24</dim>
-                            <dim>128</dim>
-                            <dim>224</dim>
-                        </port>
-                    </output>
-                </layer>
-                <layer name="grn" type="GRN" precision="FP16" id="2">
-                    <data bias="0.5"/>
-                    <input>
-                        <port id="2">
-                            <dim>1</dim>
-                            <dim>24</dim>
-                            <dim>128</dim>
-                            <dim>224</dim>
-                        </port>
-                    </input>
-                    <output>
-                        <port id="3">
-                            <dim>1</dim>
-                            <dim>24</dim>
-                            <dim>128</dim>
-                            <dim>224</dim>
-                        </port>
-                    </output>
-                </layer>
-            </layers>
-            <edges>
-                <edge from-layer="1" from-port="1" to-layer="2" to-port="2"/>
-            </edges>
-        </net>
-    )V0G0N";
-
-    StatusCode st;
-
-    ASSERT_NO_THROW(readNetwork(model));
-
-    const auto& network = _cnnNetwork;
-
-    _inputsInfo = network.getInputsInfo();
-    _inputsInfo["data"]->setPrecision(Precision::FP16);
-
-    _outputsInfo = network.getOutputsInfo();
-    _outputsInfo["grn"]->setPrecision(Precision::FP16);
-
-    ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network, {}, &_resp));
-    ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-    ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
-
-    ASSERT_NO_THROW(st = _exeNetwork->CreateInferRequest(_inferRequest, &_resp));
-    ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
-    auto tensorDesc = TensorDesc(Precision::FP16, _inputsInfo["data"]->getTensorDesc().getDims(), Layout::NCHW);
-    
-    auto inputNCHW = make_shared_blob<ie_fp16>(tensorDesc);
-    ASSERT_NO_THROW(inputNCHW->allocate());
-
-    auto outputNCHW = make_shared_blob<ie_fp16>(tensorDesc);
-    ASSERT_NO_THROW(outputNCHW->allocate());
-
-    auto output_ref = make_shared_blob<ie_fp16>(tensorDesc);
-    ASSERT_NO_THROW(output_ref->allocate());
-
-    ASSERT_NO_THROW(GenRandomData(inputNCHW));
-
-    ASSERT_NO_THROW(st = _inferRequest->SetBlob("data", inputNCHW, &_resp));
-    ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
-    ASSERT_NO_THROW(st = _inferRequest->SetBlob("grn", outputNCHW, &_resp));
-    ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
-    ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
-    ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
-    ASSERT_NO_FATAL_FAILURE(refGRN(inputNCHW, output_ref, 0.5f, true));
-
-    CompareCommonAbsolute(outputNCHW, output_ref, 0.003);
-}
+INSTANTIATE_TEST_CASE_P(
+       accuracy, myriadLayersTestsGRN_smoke,
+       ::testing::Combine(
+               ::testing::ValuesIn(s_GRNInputs),
+               ::testing::Values<Bias>(0.5f, 10.f, 1.f),
+               ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10),
+               ::testing::ValuesIn(s_CustomConfig)
+));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.hpp

index 768cf52..5a2cb7e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_grn_test.hpp
@@ -43,46 +43,47 @@ static void refGRN(const Blob::Ptr src,
  }
  
  PRETTY_PARAM(Bias, float)
+PRETTY_PARAM(CustomConfig, std::string)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, Bias, std::string>> myriadLayersTestsGRN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Bias, IRVersion, CustomConfig>> myriadLayersTestsGRN_smoke;
  
-TEST_P(myriadLayersTestsGRN_nightly, GRN) {
-    tensor_test_params dims  = std::get<0>(GetParam());
-    float bias               = std::get<1>(GetParam());
-    std::string customConfig = std::get<2>(GetParam());
+TEST_P(myriadLayersTestsGRN_smoke, GRN) {
+    const SizeVector dims = std::get<0>(GetParam());
+       const float bias = std::get<1>(GetParam());
+       _irVersion = std::get<2>(GetParam());
+       const std::string customConfig = std::get<3>(GetParam());
  
-    if(!customConfig.empty() && !CheckMyriadX()) {
-        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
+    if (!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
      }
      _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    SetInputTensor(dims);
-    SetOutputTensor(dims);
+    SetInputTensors({dims});
+    SetOutputTensors({dims});
  
      std::map<std::string, std::string> params;
      params["bias"] = std::to_string(bias);
  
-    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("GRN").params(params)));
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("GRN").params(params),
+                                                                                                  NetworkInitParams()
+                                                                                                  .layoutPreference(vpu::LayoutPreference::ChannelMajor)
+                                                                                                  .lockLayout(true)));
  
      ASSERT_TRUE(Infer());
  
-    ASSERT_NO_FATAL_FAILURE(refGRN(_inputMap.begin()->second, _refBlob, bias, false));
+    ASSERT_NO_FATAL_FAILURE(refGRN(_inputMap.begin()->second, _refBlob, bias, true));
  
      CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-static std::vector<Dims> s_GRNTensors = {
-        {{1, 3, 16, 224}},
-        {{1, 24, 128, 224}},
-};
-
-static std::vector<Bias> s_GRN_bias = {
-        0.5f, 10.f
-};
-
-static std::vector<std::string> s_MVNCustomConfig = {
-    "" ,
+static std::vector<CustomConfig> s_CustomConfig = {
+       {""} ,
  #ifdef VPU_HAS_CUSTOM_KERNELS
      getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
  #endif
  };
+
+static std::vector<SizeVector> s_GRNInputs = {
+        {1, 3, 16, 224},
+        {1, 24, 128, 224},
+};
+\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.cpp

index 44eb70f..1628d02 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.cpp
@@ -5,7 +5,7 @@
  #include "myriad_layers_interp_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsInterp_nightly,
+        accuracy, myriadLayersTestsInterp_smoke,
          ::testing::Combine(
  
              ::testing::Values<SizeInputOutput>(
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.hpp

index 533940b..8344616 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_interp_test.hpp
@@ -14,7 +14,7 @@ PRETTY_PARAM(layoutPreference, vpu::LayoutPreference)
  PRETTY_PARAM(SizeInputOutput, interp_test_params)
  PRETTY_PARAM(align_corners, bool)
  
-typedef myriadLayerTestBaseWithParam<tuple<interp_test_params, layoutPreference, align_corners>> myriadLayersTestsInterp_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<interp_test_params, layoutPreference, align_corners>> myriadLayersTestsInterp_smoke;
  
  
  void ref_interp(const Blob::Ptr src,
@@ -103,7 +103,7 @@ void ref_interp(const Blob::Ptr src,
      }
  }
  
-TEST_P(myriadLayersTestsInterp_nightly, Interp)
+TEST_P(myriadLayersTestsInterp_smoke, Interp)
  {
      interp_test_params test_params = get<0>(GetParam());
      auto layoutPreference = get<1>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.cpp

index 3b23651..24d235e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.cpp
@@ -5,5 +5,5 @@
  #include "myriad_layers_log_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsLog_nightly,
+        accuracy, myriadLayersTestsLog_smoke,
          ::testing::ValuesIn(s_logParams));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.hpp

index 14c4db5..7a6abd7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_log_test.hpp
@@ -12,12 +12,12 @@
  
  using namespace InferenceEngine;
  
-class myriadLayersTestsLog_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsLog_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<Dims> {
  public:
  };
  
-TEST_P(myriadLayersTestsLog_nightly, TestsLog)
+TEST_P(myriadLayersTestsLog_smoke, TestsLog)
  {
      auto p = ::testing::WithParamInterface<Dims>::GetParam();
      SetInputTensor(p);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.cpp

index ff6f0cd..c0d9b15 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_lrn_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLRN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLRN_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_LRNTensors),
          ::testing::ValuesIn(s_LRNlocal_size),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.hpp

index 7137ffc..dcb4f76 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lrn_test.hpp
@@ -98,9 +98,9 @@ PRETTY_PARAM(k_val, float)
  PRETTY_PARAM(alpha, float)
  PRETTY_PARAM(beta,  float)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, local_size, k_val, alpha, beta>> myriadLayersTestsLRN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, local_size, k_val, alpha, beta>> myriadLayersTestsLRN_smoke;
  
-TEST_P(myriadLayersTestsLRN_nightly, LRN) {
+TEST_P(myriadLayersTestsLRN_smoke, LRN) {
      tensor_test_params dims = std::get<0>(GetParam());
      uint32_t local_v = std::get<1>(GetParam());
      float k          = std::get<2>(GetParam());
@@ -128,7 +128,7 @@ TEST_P(myriadLayersTestsLRN_nightly, LRN) {
      CompareCommonAbsolute(dst, _refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsLRN_nightly, InnerLRN) {
+TEST_P(myriadLayersTestsLRN_smoke, InnerLRN) {
      tensor_test_params dims = std::get<0>(GetParam());
      uint32_t local_v = std::get<1>(GetParam());
      float k          = std::get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.cpp

index 2eebd19..1b77979 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.cpp
@@ -172,6 +172,6 @@ TEST_F(myriadLayersTests_nightly, LSTMCellSequenceNet) {
      CompareCommonAbsolute(output, refOut0, ERROR_BOUND);
  }
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLSTMCell_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsLSTMCell_smoke,
                          ::testing::Values<lstmcell_test_params>(MAKE_STRUCT(lstmcell_test_params, 512, 128)),
  );
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.hpp

index 1491f72..860b808 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_lstm_cell.hpp
@@ -162,7 +162,7 @@ struct  lstmcell_test_params {
                    << ", state size = " << tst.state_size;
      };
  };
-typedef myriadLayerTestBaseWithParam<lstmcell_test_params> myriadLayersTestsLSTMCell_nightly;
+typedef myriadLayerTestBaseWithParam<lstmcell_test_params> myriadLayersTestsLSTMCell_smoke;
  
  #define f32Tof16 PrecisionUtils::f32tof16
  #define f16Tof32 PrecisionUtils::f16tof32
@@ -304,7 +304,7 @@ static void matrix_copy_transpose_repack(const ie_fp16 *psrc, ie_fp16 *pdst, int
      }
  }
  
-TEST_P(myriadLayersTestsLSTMCell_nightly, LSTMCell) {
+TEST_P(myriadLayersTestsLSTMCell_smoke, LSTMCell) {
      auto param = GetParam();
      lstmcell_test_params test_params = param;
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.cpp

index a54ed4a..825b577 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.cpp
@@ -4,12 +4,13 @@
  
  #include "myriad_layers_mvn_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMVN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMVN_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(s_MVNTensors),
                                  ::testing::ValuesIn(s_MVN_acrossChannels),
                                  ::testing::ValuesIn(s_MVN_normalize),
                                  ::testing::ValuesIn(s_MVN_epsilon),
+                                ::testing::Values(IRVersion::v7, IRVersion::v10),
                                  ::testing::ValuesIn(s_MVNCustomConfig)));
  
  TEST_F(myriadLayersTests_nightly, MVN_CHW_Input)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.hpp

index 79def37..ae7741b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_mvn_test.hpp
@@ -136,15 +136,16 @@ PRETTY_PARAM(AcrossChannels, int)
  PRETTY_PARAM(Normalize, int)
  PRETTY_PARAM(Epsilon, float)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossChannels, Normalize, Epsilon, std::string>> myriadLayersTestsMVN_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossChannels, Normalize, Epsilon, IRVersion, std::string>> myriadLayersTestsMVN_smoke;
  
-TEST_P(myriadLayersTestsMVN_nightly, MVN)
+TEST_P(myriadLayersTestsMVN_smoke, MVN)
  {
      tensor_test_params dims  = std::get<0>(GetParam());
      int acrossChannels       = std::get<1>(GetParam());
      int normalize            = std::get<2>(GetParam());
      float eps                = std::get<3>(GetParam());
-    std::string customConfig = std::get<4>(GetParam());
+    _irVersion               = std::get<4>(GetParam());
+    std::string customConfig = std::get<5>(GetParam());
  
      if(!customConfig.empty() && !CheckMyriadX()) {
          GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.cpp

index d93cfeb..a8c7fc6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_nms_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNonMaxSuppression_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNonMaxSuppression_smoke,
      ::testing::Values(
              MAKE_STRUCT(NMS_testParams,
                          {6, 1, 1}, // {spatial_dimension, num_classes, num_batches}
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.hpp

index ec6340f..6acf0e7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nms_test.hpp
@@ -174,9 +174,9 @@ static void copyReference(const Blob::Ptr& blob, const refType src) {
      }
  }
  
-typedef myriadLayerTestBaseWithParam<NMS_testParams> myriadLayersTestsNonMaxSuppression_nightly;
+typedef myriadLayerTestBaseWithParam<NMS_testParams> myriadLayersTestsNonMaxSuppression_smoke;
  
-TEST_P(myriadLayersTestsNonMaxSuppression_nightly, NonMaxSuppression) {
+TEST_P(myriadLayersTestsNonMaxSuppression_smoke, NonMaxSuppression) {
      const auto params = GetParam();
      const int spatDim = params.dims[0];
      const int numClasses = params.dims[1];
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.cpp

index a75812d..bbcd9e3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.cpp
@@ -4,5 +4,5 @@
  
  #include "myriad_layers_nonzero_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestNonZero_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestNonZero_smoke,
                          ::testing::ValuesIn(inputDims));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.hpp

index c2af76b..d294a18 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_nonzero_test.hpp
@@ -11,7 +11,7 @@
  
  using namespace InferenceEngine;
  
-class myriadLayerTestNonZero_nightly: public myriadLayersTests_nightly,
+class myriadLayerTestNonZero_smoke: public myriadLayersTests_nightly,
                                        public testing::WithParamInterface<SizeVector> {
  public:
      void testNonZero(vpu::LayoutPreference preference, Precision precision);
@@ -71,7 +71,7 @@ protected:
      }
  };
  
-void myriadLayerTestNonZero_nightly::testNonZero(vpu::LayoutPreference preference, Precision precision) {
+void myriadLayerTestNonZero_smoke::testNonZero(vpu::LayoutPreference preference, Precision precision) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      const auto& inputDims = GetParam();
@@ -111,19 +111,19 @@ void myriadLayerTestNonZero_nightly::testNonZero(vpu::LayoutPreference preferenc
      CompareNonZero(outputIndicesBlob, refIndicesBlob, outputDimsBlob, refDimsBlob);
  }
  
-TEST_P(myriadLayerTestNonZero_nightly, NonZero) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZero) {
      testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::FP16);
  }
  
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroNHWC) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroNHWC) {
      testNonZero(vpu::LayoutPreference::ChannelMinor, Precision::FP16);
  }
  
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroI32) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroI32) {
      testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::I32);
  }
  
-TEST_P(myriadLayerTestNonZero_nightly, NonZeroU8) {
+TEST_P(myriadLayerTestNonZero_smoke, NonZeroU8) {
      testNonZero(vpu::LayoutPreference::ChannelMajor, Precision::U8);
  }
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.cpp

index 3527790..a9a70a9 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_normalize_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNormalize_nightly, ::testing::Combine(
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNormalize_smoke, ::testing::Combine(
      ::testing::Values<Dims>(
          // small size, num_channels is not divisible by 8
          MAKE_STRUCT(tensor_test_params, 1, 33, 1, 1),
@@ -21,7 +21,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsNormalize_nightly, ::testing:
  ));
  
  
-INSTANTIATE_TEST_CASE_P(accuracy_more, myriadLayersTestsNormalize_nightly, ::testing::Combine(
+INSTANTIATE_TEST_CASE_P(accuracy_more, myriadLayersTestsNormalize_smoke, ::testing::Combine(
      ::testing::Values<Dims>(
          //more tests
          MAKE_STRUCT(tensor_test_params, 1, 1, 38, 38),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.hpp

index e060fab..64c24ed 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_normalize_test.hpp
@@ -99,9 +99,9 @@ PRETTY_PARAM(AcrossSpatial, bool)
  PRETTY_PARAM(ChannelSharedNormalize, bool)
  PRETTY_PARAM(EPS, float)
  
-typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossSpatial, ChannelSharedNormalize, EPS>> myriadLayersTestsNormalize_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<Dims, AcrossSpatial, ChannelSharedNormalize, EPS>> myriadLayersTestsNormalize_smoke;
  
-TEST_P(myriadLayersTestsNormalize_nightly, Normalize) {
+TEST_P(myriadLayersTestsNormalize_smoke, Normalize) {
      tensor_test_params dims = std::get<0>(GetParam());
      int across_spatial = std::get<1>(GetParam());
      int channel_shared = std::get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.cpp

index 3834b2b..59fa563 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_oneHot_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestOneHot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestOneHot_smoke,
                          ::testing::Values<oneHot_test_params>(
                                  MAKE_STRUCT(OneHotParams, {64}, 2, {0}, {}, {}),
                                  MAKE_STRUCT(OneHotParams, {64}, 2, {-1}, {}, {}),
@@ -22,7 +22,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestOneHot_nightly,
                                  MAKE_STRUCT(OneHotParams, {4, 8, 16, 32, 64}, 2, {-1}, {}, {})
                          ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_add, myriadLayerTestOneHot_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_add, myriadLayerTestOneHot_smoke,
                          ::testing::Values<oneHot_test_params>(
                                  MAKE_STRUCT(OneHotParams, {16, 32, 64}, 2, {2}, {}, {}),
                                  MAKE_STRUCT(OneHotParams, {8, 16, 32,64}, 2, {2}, {}, {}),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.hpp

index a12982f..58f4fc6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_oneHot_test.hpp
@@ -51,9 +51,9 @@ void ref_oneHot(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-typedef myriadLayerTestBaseWithParam<oneHot_test_params> myriadLayerTestOneHot_nightly;
+typedef myriadLayerTestBaseWithParam<oneHot_test_params> myriadLayerTestOneHot_smoke;
  
-TEST_P(myriadLayerTestOneHot_nightly, OneHot) {
+TEST_P(myriadLayerTestOneHot_smoke, OneHot) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      OneHotParams testParams = GetParam();
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.cpp

index fbbbf92..abe65e9 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_pad_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerPad,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerPad_smoke,
                          ::testing::Combine(
                              ::testing::Values<DimsInput>(MAKE_STRUCT(tensor_test_params, 1, 64, 16, 16)),
                              ::testing::Values<pad_parameters>(MAKE_STRUCT(pad_parameters, 0, 32, 1, 2,  0, 32, 3, 4)),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.hpp

index 82a375c..29b61b9 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pad_test.hpp
@@ -31,7 +31,7 @@ struct pad_parameters {
  PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
  PRETTY_PARAM(pad_mode, std::string);
  
-typedef myriadLayerTestBaseWithParam<std::tuple<DimsInput, pad_parameters, layoutPreference, pad_mode, IRVersion>> myriadLayerPad;
+typedef myriadLayerTestBaseWithParam<std::tuple<DimsInput, pad_parameters, layoutPreference, pad_mode, IRVersion>> myriadLayerPad_smoke;
  
  const float pad_value = 42.0f;
  
@@ -126,7 +126,7 @@ void ref_pad(const Blob::Ptr src,
      }
  }
  
-TEST_P(myriadLayerPad, Pad) {
+TEST_P(myriadLayerPad_smoke, Pad) {
      tensor_test_params input_dims = get<0>(GetParam());
      pad_parameters pad_parameter = get<1>(GetParam());
      auto layoutPreference = get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.cpp

index 8d88946..453e155 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_permute_nd_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy_2D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2D, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inTensors_2D)
            , ::testing::ValuesIn(s_permuteTensors_2D)
@@ -12,7 +12,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2D, myriadLayersPermuteNDTests_nightly,
            , ::testing::ValuesIn(s_permutePrecisions)
  ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_3D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3D, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inTensors_3D)
            , ::testing::ValuesIn(s_permuteTensors_3D)
@@ -20,7 +20,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_3D, myriadLayersPermuteNDTests_nightly,
            , ::testing::ValuesIn(s_permutePrecisions)
  ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_4D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_4D, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inTensors_4D)
            , ::testing::ValuesIn(s_permuteTensors_4D)
@@ -28,7 +28,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_4D, myriadLayersPermuteNDTests_nightly,
            , ::testing::ValuesIn(s_permutePrecisions)
  ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_5D, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_5D, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inTensors_5D)
            , ::testing::ValuesIn(s_permuteTensors_5D)
@@ -36,7 +36,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_5D, myriadLayersPermuteNDTests_nightly,
            , ::testing::ValuesIn(s_permutePrecisions)
  ));
  
-INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayersPermuteNDTests_smoke,
      ::testing::Values(
          std::make_tuple(
              SizeVector{8, 50, 256, 7, 7},
@@ -53,7 +53,7 @@ INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayersPermuteNDTests_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_FasterRCNN, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_FasterRCNN, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
               ::testing::Values<InferenceEngine::SizeVector>({1, 24, 14, 14})
              ,::testing::Values<InferenceEngine::SizeVector>({0, 2, 3, 1})
@@ -61,7 +61,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_FasterRCNN, myriadLayersPermuteNDTests_nightly,
              ,::testing::ValuesIn(s_permutePrecisions)
              ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_MaskRCNN, myriadLayersPermuteNDTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_MaskRCNN, myriadLayersPermuteNDTests_smoke,
          ::testing::Combine(
               ::testing::Values<InferenceEngine::SizeVector>({4, 3, 1, 88, 120})
              ,::testing::Values<InferenceEngine::SizeVector>({0, 3, 4, 1, 2})
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.hpp

index aac7cc2..736716d 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_nd_test.hpp
@@ -64,13 +64,13 @@ using PermuteNDParams = std::tuple<InferenceEngine::SizeVector,  // input tensor
                                     IRVersion,
                                     InferenceEngine::Precision>;
  
-class myriadLayersPermuteNDTests_nightly:
+class myriadLayersPermuteNDTests_smoke:
      public myriadLayersTests_nightly,
      public testing::WithParamInterface<PermuteNDParams> {
  };
  
  
-TEST_P(myriadLayersPermuteNDTests_nightly, Permute) {
+TEST_P(myriadLayersPermuteNDTests_smoke, Permute) {
      const auto& testParams = GetParam();
      const auto& inputTensorSizes   = std::get<0>(testParams);
      const auto& permutationVector  = std::get<1>(testParams);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.cpp

index 3638bd2..41973e3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.cpp
@@ -4,13 +4,13 @@
  
  #include "myriad_layers_permute_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersPermuteTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersPermuteTests_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inTensors)
            , ::testing::ValuesIn(s_permuteTensors)
  ));
  
-INSTANTIATE_TEST_CASE_P(accuracyFasterRCNN, myriadLayersPermuteTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracyFasterRCNN, myriadLayersPermuteTests_smoke,
          ::testing::Combine(
               ::testing::Values<InferenceEngine::SizeVector>({1, 24, 14, 14})
              ,::testing::Values<InferenceEngine::SizeVector>({0, 2, 3, 1})
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.hpp

index 1711086..cbc8bc8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_permute_test.hpp
@@ -22,7 +22,7 @@ static inline void PrintTo(const offset_test_params& param, ::std::ostream* os)
  }
  typedef std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector> PermuteParams;
  
-class myriadLayersPermuteTests_nightly: public myriadLayersTests_nightly, /*input tensor, order */
+class myriadLayersPermuteTests_smoke: public myriadLayersTests_nightly, /*input tensor, order */
                                          public testing::WithParamInterface<PermuteParams> {
  };
  
@@ -56,7 +56,7 @@ static void genRefData(InferenceEngine::Blob::Ptr blob) {
      }
  }
  
-TEST_P(myriadLayersPermuteTests_nightly, Permute) {
+TEST_P(myriadLayersPermuteTests_smoke, Permute) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.cpp

index a004fb7..e696c2b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.cpp
@@ -13,7 +13,7 @@ using namespace testing;
  //======================================================================
  
  INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -29,7 +29,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -44,7 +44,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -62,7 +62,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_avg_autopad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 19, 65, 47}),
          Values(KernelShape {1, 3, 5}),
@@ -85,7 +85,7 @@ INSTANTIATE_TEST_CASE_P(tricky_ncdhw_max_autopad,
  //======================================================================
  
  INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -101,7 +101,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -125,7 +125,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_userpad,
  //----------------------------------------------------------------------
  
  INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_1,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -142,7 +142,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_1,
  );
  
  INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_2,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -159,7 +159,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_avg_autopad_2,
  //----------------------------------------------------------------------
  
  INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 20, 64, 48}),
          Values(KernelShape {3, 3, 3}),
@@ -182,7 +182,7 @@ INSTANTIATE_TEST_CASE_P(simple_ncdhw_max_autopad,
  //======================================================================
  
  INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 5}),
@@ -198,7 +198,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_nchw_max_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 5}),
@@ -213,7 +213,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_max_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 5}),
@@ -231,7 +231,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_avg_autopad,
  );
  
  INSTANTIATE_TEST_CASE_P(tricky_nchw_max_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 65, 47}),
          Values(KernelShape {1, 5}),
@@ -254,7 +254,7 @@ INSTANTIATE_TEST_CASE_P(tricky_nchw_max_autopad,
  //======================================================================
  
  INSTANTIATE_TEST_CASE_P(simple_nchw_avg_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -270,7 +270,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_avg_userpad,
  );
  
  INSTANTIATE_TEST_CASE_P(simple_nchw_max_userpad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -294,7 +294,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_max_userpad,
  //----------------------------------------------------------------------
  
  INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_1,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -311,7 +311,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_1,
  );
  
  INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_2,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -328,7 +328,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_avg_autopad_2,
  //----------------------------------------------------------------------
  
  INSTANTIATE_TEST_CASE_P(simple_nchw_max_autopad,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
      Combine(
          Values(InputShape {1, 3, 64, 48}),
          Values(KernelShape {3, 3}),
@@ -351,7 +351,7 @@ INSTANTIATE_TEST_CASE_P(simple_nchw_max_autopad,
  //======================================================================
  
  INSTANTIATE_TEST_CASE_P(i3d_id10,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 64, 40, 112, 112}),
                                  Values(KernelShape {1, 3, 3}),
@@ -364,7 +364,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id10,
                                  Values(ExcludePad(true))));
  
  INSTANTIATE_TEST_CASE_P(i3d_id47,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 192, 40, 28, 28}),
                                  Values(KernelShape {3, 3, 3}),
@@ -377,7 +377,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id47,
                                  Values(ExcludePad(true))));
  
  INSTANTIATE_TEST_CASE_P(i3d_id247,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 832, 20, 14, 14}),
                                  Values(KernelShape {2, 2, 2}),
@@ -390,7 +390,7 @@ INSTANTIATE_TEST_CASE_P(i3d_id247,
                                  Values(ExcludePad(true))));
  
  INSTANTIATE_TEST_CASE_P(i3d_id312,
-                        myriadLayersPoolNDTest_nightly,
+                        myriadLayersPoolNDTest_smoke,
                          Combine(
                                  Values(InputShape {1, 1024, 10, 7, 7}),
                                  Values(KernelShape {2, 7, 7}),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.hpp

index b104393..232953e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pool_nd_test.hpp
@@ -775,8 +775,8 @@ private:
      }
  };
  
-class myriadLayersPoolNDTest_nightly: public PoolNDTest {};
+class myriadLayersPoolNDTest_smoke: public PoolNDTest {};
  
-TEST_P(myriadLayersPoolNDTest_nightly, PoolND) {
+TEST_P(myriadLayersPoolNDTest_smoke, PoolND) {
      testPoolND();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.cpp

index b312c13..1564201 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_pooling_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({10, 192, 56, 56})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3)) /* kernel     */
@@ -17,7 +17,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayers_IR3_BatchPoolingTests_nightly,
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({10, 576, 14, 14})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -31,7 +31,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1, myriadLayers_IR3_BatchPoolingTests_nightly,
  );
  
  
-INSTANTIATE_TEST_CASE_P(accuracy_4X4, myriadLayers_IR3_PoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_4X4, myriadLayers_IR3_PoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({10, 1024, 4, 4})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 4, 4)) /* kernel     */
@@ -44,7 +44,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_4X4, myriadLayers_IR3_PoolingTests_nightly,
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_1X1, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_1X1, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 3, 5, 7})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1)) /* kernel     */
@@ -57,7 +57,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_1X1, myriadLayers_IR3_BatchPoolingTests_nightly
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0000, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0000, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -70,7 +70,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p0000, myriadLayers_IR3_BatchPoolingTests_ni
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0001, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0001, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -82,7 +82,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p0001, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0011, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0011, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -94,7 +94,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p0011, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p0111, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p0111, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -106,7 +106,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p0111, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1111, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1111, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -118,7 +118,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1111, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1110, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1110, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -130,7 +130,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1110, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1100, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1100, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -142,7 +142,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1100, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1000, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1000, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -154,7 +154,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1000, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1101, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1101, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -166,7 +166,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1101, myriadLayers_IR3_BatchPoolingTests_ni
                                  , ::testing::ValuesIn(s_poolingMethod)
                          )
  );
-INSTANTIATE_TEST_CASE_P(accuracy_2X2p1011, myriadLayers_IR3_BatchPoolingTests_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_2X2p1011, myriadLayers_IR3_BatchPoolingTests_smoke,
                          ::testing::Combine(
                                  ::testing::Values<InferenceEngine::SizeVector>({1, 512, 26, 26})
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 2, 2)) /* kernel     */
@@ -179,14 +179,14 @@ INSTANTIATE_TEST_CASE_P(accuracy_2X2p1011, myriadLayers_IR3_BatchPoolingTests_ni
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMax_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(g_poolingLayerParamsFull),
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxOverlappedByKernel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxOverlappedByKernel_smoke,
                          ::testing::Combine(
                              ::testing::Values<InferenceEngine::SizeVector>({1, 1024, 6, 6}),
                              ::testing::Values<param_size>(MAKE_STRUCT(param_size, 7, 7)),
@@ -195,7 +195,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxOverlappedByKernel_nightly
                              ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPad4_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPad4_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInputPad4),
                                  ::testing::ValuesIn(g_poolingKernelPad4),
@@ -204,7 +204,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPad4_nightly,
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPad4_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPad4_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInputPad4),
                                  ::testing::ValuesIn(g_poolingKernelPad4),
@@ -213,24 +213,24 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPad4_nightly,
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalMax_smoke,
                          ::testing::ValuesIn(g_GlobalPoolingInput ));
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsMax_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(s_poolingLayerParams_k3x3),
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvg_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(g_poolingLayerParamsFull),
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgOverlappedByKernel_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgOverlappedByKernel_smoke,
                          ::testing::Combine(
                              ::testing::Values<InferenceEngine::SizeVector>({1, 1024, 6, 6}),
                              ::testing::Values<param_size>(MAKE_STRUCT(param_size, 7, 7)),
@@ -239,12 +239,12 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgOverlappedByKernel_nightly
                              ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_3x3, myriadLayersTestsAvg_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(s_poolingLayerParams_k3x3),
                                  ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalAvg_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsGlobalAvg_smoke,
                          ::testing::ValuesIn(g_GlobalPoolingInput));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.hpp

index 73123ac..c1923c6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_pooling_test.hpp
@@ -15,35 +15,35 @@ extern const char POOLING_MAX[] = "max";
  extern const char POOLING_AVG[] = "avg";
  
  
-class myriadLayersTestsMax_nightly: public PoolingTest<POOLING_MAX>
+class myriadLayersTestsMax_smoke: public PoolingTest<POOLING_MAX>
  {
  };
  
-class myriadLayersTestsMaxOverlappedByKernel_nightly: public PoolingTestPad4<POOLING_MAX, true>
+class myriadLayersTestsMaxOverlappedByKernel_smoke: public PoolingTestPad4<POOLING_MAX, true>
  {
  };
  
-class myriadLayersTestsMaxPad4_nightly: public PoolingTestPad4<POOLING_MAX>
+class myriadLayersTestsMaxPad4_smoke: public PoolingTestPad4<POOLING_MAX>
  {
  };
  
-class myriadLayersTestsGlobalMax_nightly: public GlobalPoolingTest<POOLING_MAX>
+class myriadLayersTestsGlobalMax_smoke: public GlobalPoolingTest<POOLING_MAX>
  {
  };
  
-class myriadLayersTestsAvg_nightly: public PoolingTest<POOLING_AVG>
+class myriadLayersTestsAvg_smoke: public PoolingTest<POOLING_AVG>
  {
  };
  
-class myriadLayersTestsAvgOverlappedByKernel_nightly: public PoolingTestPad4<POOLING_AVG, true>
+class myriadLayersTestsAvgOverlappedByKernel_smoke: public PoolingTestPad4<POOLING_AVG, true>
  {
  };
  
-class myriadLayersTestsAvgPad4_nightly: public PoolingTestPad4<POOLING_AVG>
+class myriadLayersTestsAvgPad4_smoke: public PoolingTestPad4<POOLING_AVG>
  {
  };
  
-class myriadLayersTestsGlobalAvg_nightly: public GlobalPoolingTest<POOLING_AVG>
+class myriadLayersTestsGlobalAvg_smoke: public GlobalPoolingTest<POOLING_AVG>
  {
  };
  
@@ -51,7 +51,7 @@ class myriadLayersTestsGlobalAvg_nightly: public GlobalPoolingTest<POOLING_AVG>
  /*                   input tensor,               kernel,     stride,    pads_begin, pads_end,  auto_pad,     exclude_pad  method */
  typedef std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, param_size, const char*, const char*, const char*> IR3_PoolParams;
  
-class myriadLayers_IR3_PoolingTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_PoolingTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
                                               public testing::WithParamInterface<IR3_PoolParams> {
  };
  
@@ -90,7 +90,7 @@ static void genTestData(InferenceEngine::Blob::Ptr blob) {
  }
  
  
-TEST_P(myriadLayers_IR3_PoolingTests_nightly, Pooling) {
+TEST_P(myriadLayers_IR3_PoolingTests_smoke, Pooling) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
@@ -166,11 +166,11 @@ TEST_P(myriadLayers_IR3_PoolingTests_nightly, Pooling) {
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
  }
  
-class myriadLayers_IR3_BatchPoolingTests_nightly: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
+class myriadLayers_IR3_BatchPoolingTests_smoke: public myriadLayersTests_nightly, /*input tensor, kernel, stride, pads_begin, pads_end, out_channel, group */
                                                    public testing::WithParamInterface<IR3_PoolParams> {
  };
  
-TEST_P(myriadLayers_IR3_BatchPoolingTests_nightly, Pooling) {
+TEST_P(myriadLayers_IR3_BatchPoolingTests_smoke, Pooling) {
      std::map<std::string, std::string> params;
      InferenceEngine::SizeVector output_tensor;
      int32_t IW = 0;
@@ -259,52 +259,52 @@ static const std::vector<const char*> s_poolingMethod = {
          "max"
  };
  
-TEST_P(myriadLayersTestsMax_nightly, MaxPooling)
+TEST_P(myriadLayersTestsMax_smoke, MaxPooling)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsMaxOverlappedByKernel_nightly, MaxPooling)
+TEST_P(myriadLayersTestsMaxOverlappedByKernel_smoke, MaxPooling)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsMaxPad4_nightly, MaxPoolingPad4)
+TEST_P(myriadLayersTestsMaxPad4_smoke, MaxPoolingPad4)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      auto refBlob = getReferenceOutput();
      CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsAvg_nightly, AvgPooling)
+TEST_P(myriadLayersTestsAvg_smoke, AvgPooling)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsAvgOverlappedByKernel_nightly, AvgPooling)
+TEST_P(myriadLayersTestsAvgOverlappedByKernel_smoke, AvgPooling)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsAvgPad4_nightly, AvgPoolingPad4)
+TEST_P(myriadLayersTestsAvgPad4_smoke, AvgPoolingPad4)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(_layout_preference)));
      auto refBlob = getReferenceOutput();
      CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsGlobalMax_nightly, GlobalMaxPooling)
+TEST_P(myriadLayersTestsGlobalMax_smoke, GlobalMaxPooling)
  {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
      auto refBlob = getReferenceOutput();
      CompareCommonAbsolute(_outputMap.begin()->second, refBlob, ERROR_BOUND);
  }
  
-TEST_P(myriadLayersTestsGlobalAvg_nightly, GlobalAvgPooling)
+TEST_P(myriadLayersTestsGlobalAvg_smoke, GlobalAvgPooling)
  {
      if(_pad_val.x != 0 || _pad_val.y != 0) {
          GTEST_SKIP() << "paddings should not be exist for GlobalAvgPool";
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.cpp

index 5ed5eb6..69142a1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.cpp
@@ -3,7 +3,7 @@
  //
  
  #include "myriad_layers_power_test.hpp"
-INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsPowerParams_nightly,
+INSTANTIATE_TEST_CASE_P( accuracy, myriadLayersTestsPowerParams_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_powerTensors),
          ::testing::ValuesIn(s_powerParams))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.hpp

index ebe4eb9..c2086c7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_power_test.hpp
@@ -39,9 +39,9 @@ static void gen_ref_power(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, pwr_test_params>> myriadLayersTestsPowerParams_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, pwr_test_params>> myriadLayersTestsPowerParams_smoke;
  
-TEST_P(myriadLayersTestsPowerParams_nightly, TestsPower) {
+TEST_P(myriadLayersTestsPowerParams_smoke, TestsPower) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto param = GetParam();
      SizeVector tensor = std::get<0>(param);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.cpp

index 3f952ce..f15f0f2 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_prelu_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy_PReLU, myriadLayerPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_PReLU, myriadLayerPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_PReLUTensors)
        , ::testing::Values<ChannelSharedPrelu>(0, 1)
@@ -12,7 +12,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_PReLU, myriadLayerPReLU_nightly,
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayerFullyConnectedWithPReLU_nightly,
+    accuracy, myriadLayerFullyConnectedWithPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_fcTestParamsSubset),
          ::testing::Values(g_dimensionsFC[0]),
@@ -21,7 +21,7 @@ INSTANTIATE_TEST_CASE_P(
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
@@ -29,7 +29,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithPReLU_nightly,
          ::testing::ValuesIn(s_PReluLayerParams))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
@@ -37,7 +37,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithPReLU_nightly,
          ::testing::ValuesIn(s_PReluLayerParams))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput_postOp),
          ::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
@@ -45,7 +45,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithPReLU_ni
          ::testing::Values<PReLULayerDef>(MAKE_STRUCT(PReLULayerDef, {{{PRELU_PARAM, "0"}}})))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithPReLU_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput_postOp),
          ::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
@@ -53,7 +53,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithPReLU_ni
          ::testing::Values<PReLULayerDef>(MAKE_STRUCT(PReLULayerDef, {{{PRELU_PARAM, "0"}}})))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithPReLU_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(g_convolutionTensors)
            , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
@@ -65,7 +65,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithPReLU_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithPReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithPReLU_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(g_poolingInput_postOp)
            , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.hpp

index ca1a14e..78394dd 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prelu_test.hpp
@@ -12,9 +12,9 @@ using std::get;
  using namespace InferenceEngine;
  
  PRETTY_PARAM(ChannelSharedPrelu, int);
-typedef myriadLayerTestBaseWithParam<tuple<SizeVector, ChannelSharedPrelu >> myriadLayerPReLU_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<SizeVector, ChannelSharedPrelu >> myriadLayerPReLU_smoke;
  
-TEST_P(myriadLayerPReLU_nightly, PReLU) {
+TEST_P(myriadLayerPReLU_smoke, PReLU) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      SizeVector dims = get<0>(GetParam());
@@ -62,7 +62,7 @@ static std::vector<PReLULayerDef> s_PReluLayerParams = {
      {{{PRELU_PARAM, "1"}}}
  };
  
-class myriadLayerFullyConnectedWithPReLU_nightly: public FCTest<PReLULayerDef>{
+class myriadLayerFullyConnectedWithPReLU_smoke: public FCTest<PReLULayerDef>{
  };
  
  #define TEST_BODY \
@@ -89,7 +89,7 @@ class myriadLayerFullyConnectedWithPReLU_nightly: public FCTest<PReLULayerDef>{
               ref_PReLU_wrap);\
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
  
-TEST_P(myriadLayerFullyConnectedWithPReLU_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithPReLU_smoke, TestsFullyConnected)
  {
      auto p = ::testing::WithParamInterface<std::tuple<fcon_test_params, int32_t, int32_t, PReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -99,13 +99,13 @@ TEST_P(myriadLayerFullyConnectedWithPReLU_nightly, TestsFullyConnected)
  
  #define ERROR_BOUND_WITH_RELU (4.e-3f)
  
-class myriadLayersTestsMaxPoolingWithPReLU_nightly: public PoolingTest<POOLING_MAX, PReLULayerDef>{
+class myriadLayersTestsMaxPoolingWithPReLU_smoke: public PoolingTest<POOLING_MAX, PReLULayerDef>{
  };
  
-class myriadLayersTestsAvgPoolingWithPReLU_nightly: public PoolingTest<POOLING_AVG, PReLULayerDef>{
+class myriadLayersTestsAvgPoolingWithPReLU_smoke: public PoolingTest<POOLING_AVG, PReLULayerDef>{
  };
  
-TEST_P(myriadLayersTestsMaxPoolingWithPReLU_nightly, TestsMaxPoolingWithPReLU)
+TEST_P(myriadLayersTestsMaxPoolingWithPReLU_smoke, TestsMaxPoolingWithPReLU)
  {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, PReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -113,7 +113,7 @@ TEST_P(myriadLayersTestsMaxPoolingWithPReLU_nightly, TestsMaxPoolingWithPReLU)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
  }
  
-TEST_P(myriadLayersTestsAvgPoolingWithPReLU_nightly, TestsAvgPoolingWithPReLU)
+TEST_P(myriadLayersTestsAvgPoolingWithPReLU_smoke, TestsAvgPoolingWithPReLU)
  {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, PReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -121,10 +121,10 @@ TEST_P(myriadLayersTestsAvgPoolingWithPReLU_nightly, TestsAvgPoolingWithPReLU)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
  }
  
-class myriadLayerConvolutionWithPReLU_nightly: public ConvolutionTest<PReLULayerDef>{
+class myriadLayerConvolutionWithPReLU_smoke: public ConvolutionTest<PReLULayerDef>{
  };
  
-TEST_P(myriadLayerConvolutionWithPReLU_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithPReLU_smoke, Convolution) {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, PReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<6>(p);
      TEST_BODY;
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prior_box_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prior_box_test.cpp

index 34aad16..c0334fc 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prior_box_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_prior_box_test.cpp
@@ -267,7 +267,7 @@ void refPriorBox(Blob::Ptr dst, const PriorBoxParams &p) {
      }
  }
  
-class myriadLayersPriorBoxTests_nightly : public myriadLayersTests_nightly {
+class myriadLayersPriorBoxTests_smoke : public myriadLayersTests_nightly {
  public:
      Blob::Ptr getFp16Blob(const Blob::Ptr& in) {
          if (in->getTensorDesc().getPrecision() == Precision::FP16)
@@ -342,7 +342,7 @@ public:
      }
  };
  
-TEST_F(myriadLayersPriorBoxTests_nightly, NotLastLayer)
+TEST_F(myriadLayersPriorBoxTests_smoke, NotLastLayer)
  {
      std::string model = R"V0G0N(
          <net name="PriorBox" version="2" batch="1">
@@ -476,7 +476,7 @@ TEST_F(myriadLayersPriorBoxTests_nightly, NotLastLayer)
      RunOnModel(model, "priorbox_copy");
  }
  
-TEST_F(myriadLayersPriorBoxTests_nightly, LastLayer_FP16)
+TEST_F(myriadLayersPriorBoxTests_smoke, LastLayer_FP16)
  {
      std::string model = R"V0G0N(
          <net name="PriorBox" version="2" batch="1">
@@ -592,7 +592,7 @@ TEST_F(myriadLayersPriorBoxTests_nightly, LastLayer_FP16)
      RunOnModel(model, "priorbox", Precision::FP16);
  }
  
-TEST_F(myriadLayersPriorBoxTests_nightly, LastLayer_FP32)
+TEST_F(myriadLayersPriorBoxTests_smoke, LastLayer_FP32)
  {
      std::string model = R"V0G0N(
          <net name="PriorBox" version="2" batch="1">
@@ -1532,7 +1532,7 @@ TEST_F(myriadLayersTests_nightly, PriorBox_WithConcat)
      CompareCommonAbsolute(_refBlob, outputBlob, 0.0);
  }
  
-TEST_F(myriadLayersPriorBoxTests_nightly, FaceBoxLayer)
+TEST_F(myriadLayersPriorBoxTests_smoke, FaceBoxLayer)
  {
      std::string model = R"V0G0N(
          <net name="PriorBox" version="2" batch="1">
@@ -1664,7 +1664,7 @@ TEST_F(myriadLayersPriorBoxTests_nightly, FaceBoxLayer)
      RunOnModelWithParams(model, "priorbox", params, Precision::FP16);
  }
  
-TEST_F(myriadLayersPriorBoxTests_nightly, TwoPriorBoxLayersWithUnusedInput)
+TEST_F(myriadLayersPriorBoxTests_smoke, TwoPriorBoxLayersWithUnusedInput)
  {
      std::string model = R"V0G0N(
          <net name="PriorBox" version="2" batch="1">
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_proposal_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_proposal_test.cpp

index e70318b..9844cee 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_proposal_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_proposal_test.cpp
@@ -13,7 +13,7 @@ using namespace InferenceEngine;
  #define OUTPUT_SAMPLING_NUM   (20)  // Validate only top 20 rois
  #define OUTPUT_ROI_MATCH_THRESHOLD   (18)  // At least 18 rois should be matched
  
-class myriadLayersTestsProposal_nightly : public myriadLayersTests_nightly {
+class myriadLayersTestsProposal_smoke : public myriadLayersTests_nightly {
  
  protected:
   std::string model;
@@ -277,7 +277,7 @@ std::string caffeModel() {
      )V0G0N";
  }
  
-TEST_F(myriadLayersTestsProposal_nightly, Caffe) {
+TEST_F(myriadLayersTestsProposal_smoke, Caffe) {
  
      // Verify only 20 ranked proposal output with GT values
      std::vector<float> gt_values = {
@@ -313,7 +313,7 @@ TEST_F(myriadLayersTestsProposal_nightly, Caffe) {
      ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
  }
  
-TEST_F(myriadLayersTestsProposal_nightly, CaffeNoClipBeforeNms) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeNoClipBeforeNms) {
  
      // Verify only 20 ranked proposal output with GT values - reference get from MKLDNN plugin
      std::vector<float> gt_values = {
@@ -352,7 +352,7 @@ TEST_F(myriadLayersTestsProposal_nightly, CaffeNoClipBeforeNms) {
      ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
  }
  
-TEST_F(myriadLayersTestsProposal_nightly, CaffeClipAfterNms) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeClipAfterNms) {
  
      // Verify only 20 ranked proposal output with GT values
      std::vector<float> gt_values = {
@@ -394,7 +394,7 @@ TEST_F(myriadLayersTestsProposal_nightly, CaffeClipAfterNms) {
      ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.26f));
  }
  
-TEST_F(myriadLayersTestsProposal_nightly, CaffeNormalizedOutput) {
+TEST_F(myriadLayersTestsProposal_smoke, CaffeNormalizedOutput) {
  
      // Verify only 20 ranked proposal output with GT values
      std::vector<float> gt_values = {
@@ -436,7 +436,7 @@ TEST_F(myriadLayersTestsProposal_nightly, CaffeNormalizedOutput) {
      ASSERT_NO_FATAL_FAILURE(compareOutputSampleToRef(gt_values, 0.026f));
  }
  
-TEST_F(myriadLayersTestsProposal_nightly, TensorFlow) {
+TEST_F(myriadLayersTestsProposal_smoke, TensorFlow) {
  
       model = R"V0G0N(
          <net name="testProposal" version="2" batch="1">
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.cpp

index ae8f101..535cdf1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_psroipooling_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsPSROIPooling_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsPSROIPooling_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_PSROIPoolingLayerInput),
          ::testing::ValuesIn(s_PSROIPoolingLayerParam),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp

index abc9584..66195e6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_psroipooling_test.hpp
@@ -27,7 +27,7 @@ static inline void PrintTo(const PSROIPoolingParams& param, ::std::ostream* os)
  
  using PSROIPoolingTestParams = std::tuple<Dims, psroipooling_param, uint32_t>;
  
-class myriadLayersTestsPSROIPooling_nightly: public myriadLayerTestBaseWithParam<PSROIPoolingTestParams> {
+class myriadLayersTestsPSROIPooling_smoke: public myriadLayerTestBaseWithParam<PSROIPoolingTestParams> {
  public:
      void genROIs(InferenceEngine::Blob::Ptr rois,
                   const PSROIPoolingParams& params,
@@ -200,7 +200,7 @@ static std::vector<uint32_t> s_PSROIPoolingNumROIs = {
          1, 10, 30, 50, 100, 300
  };
  
-TEST_P(myriadLayersTestsPSROIPooling_nightly, PSROIPooling) {
+TEST_P(myriadLayersTestsPSROIPooling_smoke, PSROIPooling) {
  #if defined(_WIN32) || defined(WIN32)
      SKIP() << "Disabled for Windows. CVS-13239";
  #endif
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.cpp

index f9620a3..525691f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.cpp
@@ -4,42 +4,42 @@
  
  #include "myriad_layers_reduce_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceAnd_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceAnd_smoke,
      ::testing::Combine(
-        ::testing::ValuesIn(s_input_dims),
+        ::testing::ValuesIn(s_input_pair),
          ::testing::ValuesIn(s_axes_list),
          ::testing::ValuesIn(s_data_precision),
          ::testing::ValuesIn(s_keep_dims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMin_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMin_smoke,
      ::testing::Combine(
-        ::testing::ValuesIn(s_input_dims),
+        ::testing::ValuesIn(s_input_pair),
          ::testing::ValuesIn(s_axes_list),
          ::testing::ValuesIn(s_data_precision),
          ::testing::ValuesIn(s_keep_dims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMax_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMax_smoke,
      ::testing::Combine(
-        ::testing::ValuesIn(s_input_dims),
+        ::testing::ValuesIn(s_input_pair),
          ::testing::ValuesIn(s_axes_list),
          ::testing::ValuesIn(s_data_precision),
          ::testing::ValuesIn(s_keep_dims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceSum_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceSum_smoke,
      ::testing::Combine(
-        ::testing::ValuesIn(s_input_dims),
+        ::testing::ValuesIn(s_input_pair),
          ::testing::ValuesIn(s_axes_list),
          ::testing::ValuesIn(s_data_precision),
          ::testing::ValuesIn(s_keep_dims))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMean_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsReduceMean_smoke,
      ::testing::Combine(
-        ::testing::ValuesIn(s_input_dims),
+        ::testing::ValuesIn(s_input_pair),
          ::testing::ValuesIn(s_axes_list),
          ::testing::ValuesIn(s_data_precision),
          ::testing::ValuesIn(s_keep_dims))
-);
+);
+\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.hpp

index aa49a09..4de5349 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reduce_test.hpp
@@ -245,7 +245,7 @@ static const std::map<const char*, ReduceOpParams<int32_t>> refMapI32 =
                  {REDUCE_MEAN, {&refReduceMeanI32, 0.0f, RefReduceMean<int32_t>::generateData}},
          };
  
-using ReduceTestParams = std::tuple<SizeVector, SizeVector, Precision, bool>;
+using ReduceTestParams = std::tuple<std::pair<SizeVector, vpu::LayoutPreference>, SizeVector, Precision, bool>;
  
  static const Precision axesPrecision = Precision::I32;
  
@@ -349,24 +349,13 @@ public:
          for (int i : list)
          {
              if (i < 0) // handle negative indices
-                i = ndims - i;
+                i = ndims - std::abs(i);
              EXPECT_TRUE((i >= 0) && (i < ndims));
              mask |= (1 << i);
          }
          return mask;
      }
-    static Layout defaultLayout(int ndims)
-    {
-        switch (ndims)
-        {
-        case 5: return NCDHW;
-        case 4: return NCHW;
-        case 3: return CHW;
-        case 2: return NC;
-        case 1: return C;
-        }
-        return ANY;
-    }
+
      static void getAxesBlob(const SizeVector& axesList, TBlob<uint8_t>::Ptr& weightsBlob, TBlob<int32_t>::Ptr& axesBlob)
      {
          size_t axes_size = axesList.size();
@@ -400,11 +389,14 @@ protected:
          _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
          const auto params = GetParam();
-        const auto inputDims = std::get<0>(params);
-        const auto axesList = std::get<1>(params);
+        const auto inputPair = std::get<0>(params);
+        auto axesList = std::get<1>(params);
          const auto dataPrecision = std::get<2>(params);
          const int keepDims = std::get<3>(params) ? 1 : 0;
  
+        const auto inputDims = inputPair.first;
+        const auto layoutPreference = inputPair.second;
+
          const auto outputDims = ReduceUtils::calcOutputDims(inputDims, axesList, keepDims);
          const auto model = ReduceUtils::getModel(inputDims, axesList, outputDims, ReduceType, dataPrecision, keepDims);
  
@@ -412,19 +404,18 @@ protected:
          TBlob<int32_t>::Ptr axesBlob;
          ReduceUtils::getAxesBlob(axesList, weightsBlob, axesBlob);
          ASSERT_NE(weightsBlob, nullptr);
-
+ 
          ASSERT_NO_THROW(readNetwork(model, weightsBlob));
  
          const auto& network = _cnnNetwork;
  
          _inputsInfo = network.getInputsInfo();
          _inputsInfo["reduce_input"]->setPrecision(dataPrecision);
-        _inputsInfo["reduce_input"]->setLayout(ReduceUtils::defaultLayout(inputDims.size()));
+        _inputsInfo["reduce_input"]->setLayout(vpu::deviceLayout(TensorDesc::getLayoutByDims(inputDims), layoutPreference));
  
          _outputsInfo = network.getOutputsInfo();
          _outputsInfo["reduce"]->setPrecision(dataPrecision);
-        _outputsInfo["reduce"]->setLayout(ReduceUtils::defaultLayout(outputDims.size()));
-
+        _outputsInfo["reduce"]->setLayout(vpu::deviceLayout(TensorDesc::getLayoutByDims(outputDims), layoutPreference));
          StatusCode st = OK;
  
          ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network, _config, &_resp));
@@ -457,87 +448,95 @@ protected:
  
              refBlob = make_shared_blob<ie_fp16>(outputBlob->getTensorDesc());
              refBlob->allocate();
-            ref_reduce(inputBlob, axesBlob, refBlob, keepDims, reduceOp);
+            ref_reduce(inputBlob, axesBlob, refBlob, keepDims, layoutPreference, reduceOp);
              CompareCommonAbsolute(outputBlob, refBlob, compareThreshold);
-        } else if (dataPrecision == Precision::I32) {
-            auto opIt = refMapI32.find(ReduceType);
-            ASSERT_TRUE(opIt != refMapI32.end());
-            auto reduceOp = opIt->second.op;
-            auto generateData = opIt->second.generateData;
-            generateData(inputBlob);
-
-            ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
-            ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-
-            refBlob = make_shared_blob<int32_t>(outputBlob->getTensorDesc());
-            refBlob->allocate();
-            ref_reduce(inputBlob, axesBlob, refBlob, keepDims, reduceOp);
-            CompareCommonExact(outputBlob, refBlob);
-        }
+       } else if (dataPrecision == Precision::I32) {
+           auto opIt = refMapI32.find(ReduceType);
+           ASSERT_TRUE(opIt != refMapI32.end());
+           auto reduceOp = opIt->second.op;
+           auto generateData = opIt->second.generateData;
+           generateData(inputBlob);
+
+           ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
+           ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+
+           refBlob = make_shared_blob<int32_t>(outputBlob->getTensorDesc());
+           refBlob->allocate();
+           ref_reduce(inputBlob, axesBlob, refBlob, keepDims, layoutPreference, reduceOp);
+           CompareCommonExact(outputBlob, refBlob);
+       }
      }
  };
  
-class myriadTestsReduceAnd_nightly: public ReduceTest<REDUCE_AND>
+class myriadTestsReduceAnd_smoke: public ReduceTest<REDUCE_AND>
  {
  };
  
-class myriadTestsReduceMin_nightly: public ReduceTest<REDUCE_MIN>
+class myriadTestsReduceMin_smoke: public ReduceTest<REDUCE_MIN>
  {
  };
  
-class myriadTestsReduceMax_nightly: public ReduceTest<REDUCE_MAX>
+class myriadTestsReduceMax_smoke: public ReduceTest<REDUCE_MAX>
  {
  };
  
-class myriadTestsReduceSum_nightly: public ReduceTest<REDUCE_SUM>
+class myriadTestsReduceSum_smoke: public ReduceTest<REDUCE_SUM>
  {
  };
  
-class myriadTestsReduceMean_nightly: public ReduceTest<REDUCE_MEAN>
+class myriadTestsReduceMean_smoke: public ReduceTest<REDUCE_MEAN>
  {
  };
  
  // Tests are disabled due to hang: #-28315
  
-TEST_P(myriadTestsReduceAnd_nightly, And)
+TEST_P(myriadTestsReduceAnd_smoke, And)
  {
      testReduce();
  }
-TEST_P(myriadTestsReduceMin_nightly, Min)
+TEST_P(myriadTestsReduceMin_smoke, Min)
  {
      testReduce();
  }
-TEST_P(myriadTestsReduceMax_nightly, Max)
+TEST_P(myriadTestsReduceMax_smoke, Max)
  {
      testReduce();
  }
-TEST_P(myriadTestsReduceSum_nightly, Sum)
+TEST_P(myriadTestsReduceSum_smoke, Sum)
  {
      testReduce();
  }
-TEST_P(myriadTestsReduceMean_nightly, Mean)
+TEST_P(myriadTestsReduceMean_smoke, Mean)
  {
      testReduce();
  }
  
-static const std::vector<SizeVector> s_input_dims =
+static const std::vector<std::pair<SizeVector, vpu::LayoutPreference>> s_input_pair =
          {
-                {1, 3, 2, 14, 32},
-                {2, 2, 2, 14, 32},
-                {3, 5, 4, 8, 16},
-                {4, 2, 16, 16, 8},
-
-                {3, 2, 14, 32},
-                {2, 2, 14, 32},
-                {5, 4, 8, 16},
-                {2, 16, 16, 8},
-
-                {3, 2, 14},
-                {2, 2, 14},
-                {5, 4, 8},
-                {2, 16, 16},
-
-                { 7, 3, 5, 1, 7, 11, 12},
+                {{1, 3, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+                {{1, 3, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+                {{2, 2, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+                {{2, 2, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+                {{3, 5, 4, 8, 16}, vpu::LayoutPreference::ChannelMinor},
+                {{3, 5, 4, 8, 16}, vpu::LayoutPreference::ChannelMajor},
+                {{4, 2, 16, 16, 8}, vpu::LayoutPreference::ChannelMinor},
+                {{4, 2, 16, 16, 8}, vpu::LayoutPreference::ChannelMajor},
+
+                {{3, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+                {{3, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+                {{2, 2, 14, 32}, vpu::LayoutPreference::ChannelMinor},
+                {{2, 2, 14, 32}, vpu::LayoutPreference::ChannelMajor},
+                {{5, 4, 8, 16}, vpu::LayoutPreference::ChannelMinor},
+                {{5, 4, 8, 16}, vpu::LayoutPreference::ChannelMajor},
+                {{2, 16, 16, 8}, vpu::LayoutPreference::ChannelMinor},
+                {{2, 16, 16, 8}, vpu::LayoutPreference::ChannelMajor},
+
+                {{3, 2, 14}, vpu::LayoutPreference::ChannelMajor},
+                {{2, 2, 14}, vpu::LayoutPreference::ChannelMajor},
+                {{5, 4, 8}, vpu::LayoutPreference::ChannelMajor},
+                {{2, 16, 16}, vpu::LayoutPreference::ChannelMajor},
+
+                {{7, 3, 5, 1, 7, 11, 12}, vpu::LayoutPreference::ChannelMajor},
          };
  
  static const std::vector<SizeVector> s_axes_list =
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.cpp

index cad6b54..50eb4eb 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.cpp
@@ -5,32 +5,14 @@
  #include "myriad_layers_region_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayerRegionYolo_nightly,
-        ::testing::ValuesIn(s_regionData)
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsRegion_CHW_HW_nightly,
-        ::testing::Combine(
-            ::testing::Values<InferenceEngine::SizeVector>({1, 125, 13, 13})
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 0, 0))
-          , ::testing::Values<uint32_t>(125)
-          , ::testing::Values<uint32_t>(1)
-          )
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsRegion_CHW_HW_80cl_nightly,
-        ::testing::Combine(
-            ::testing::Values<InferenceEngine::SizeVector>({1, 425, 13, 13})
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
-          , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 0, 0))
-          , ::testing::Values<uint32_t>(425)
-          , ::testing::Values<uint32_t>(1)
-          )
-);
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerRegionYolo_CHW_nightly,
-        ::testing::ValuesIn(s_classes)
-);
+       accuracy, myriadLayersTestsRegionYolo_smoke,
+       ::testing::Combine(
+               ::testing::Values<Coords>(4),
+               ::testing::Values<Classes>(20, 80),
+               ::testing::Values<Num>(5, 10),
+               ::testing::Values<MaskSize>(3),
+               ::testing::Values<DoSoftmax>(1, 0),
+               ::testing::Values(vpu::LayoutPreference::ChannelMajor, vpu::LayoutPreference::ChannelMinor),
+               ::testing::Values(IRVersion::v7, IRVersion::v10),
+               ::testing::ValuesIn(s_CustomConfig)
+));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.hpp

index 768aba5..71090b0 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_region_test.hpp
@@ -4,327 +4,82 @@
  
  #include <gtest/gtest.h>
  #include "myriad_layers_tests.hpp"
-#include <math.h>
+#include <cmath>
  
  using namespace InferenceEngine;
  
-struct region_test_params {
-    tensor_test_params in;
-    int coords;
-    int classes;
-    int num;
-    int maskSize;
-    int doSoftMax;
-    std::string customLayers;
-    friend std::ostream& operator<<(std::ostream& os, region_test_params const& tst)
-    {
-        return os << "tensor (" << tst.in
-                  << "),coords=" << tst.coords
-                  << ", classes=" << tst.classes
-                  << ", num=" << tst.num
-                  << ", maskSize=" << tst.maskSize
-                  << ", doSoftMax=" << tst.doSoftMax
-                  << ", by using custom layer=" << (tst.customLayers.empty() ? "no" : "yes");
-    };
-};
-
-class myriadLayerRegionYolo_nightly: public myriadLayersTests_nightly,
-                             public testing::WithParamInterface<region_test_params> {
-};
-
-TEST_P(myriadLayerRegionYolo_nightly, BaseTestsRegion) {
-    region_test_params p = ::testing::WithParamInterface<region_test_params>::GetParam();
-
-    // TODO: M2 mode is not working for OpenCL compiler
-    if(!p.customLayers.empty() && !CheckMyriadX()) {
-        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
-    }
-
-    std::map<std::string, std::string> params;
-
-    params["coords"] = std::to_string(p.coords);
-    params["classes"] = std::to_string(p.classes);
-    params["num"] = std::to_string(p.num);
-    params["mask"] = "0,1,2";
-    params["do_softmax"] = std::to_string(p.doSoftMax);
-
-    InferenceEngine::SizeVector tensor;
-    tensor.resize(4);
-    tensor[3] = p.in.w;
-    tensor[2] = p.in.h;
-    tensor[1] = p.in.c;
-    tensor[0] = 1;
-    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = p.customLayers;
-    _testNet.addLayer(LayerInitParams("RegionYolo")
-             .params(params)
-             .in({tensor})
-             .out({tensor}),
-             ref_RegionYolo_wrap);
-    ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().layoutPreference(vpu::LayoutPreference::ChannelMinor)));
-    CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0025);
-}
-
-static std::vector<region_test_params> s_regionData = {
-    region_test_params{{1, (4+20+1)*5, 13, 13}, 4, 20, 5, 3, 1, ""},
-    region_test_params{{1, (4+80+1)*5, 13, 13}, 4, 80, 5, 3, 1, ""},
-    region_test_params{{1, (4+20+1)*3, 13, 13}, 4, 20, 9, 3, 0, ""},
-    region_test_params{{1, (4+80+1)*3, 13, 13}, 4, 80, 9, 3, 0, ""},
-
-#ifdef VPU_HAS_CUSTOM_KERNELS
-   region_test_params{{1, (4+20+1)*5, 13, 13}, 4, 20, 5, 3, 1, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
-   region_test_params{{1, (4+80+1)*5, 13, 13}, 4, 80, 5, 3, 1, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
-   region_test_params{{1, (4+20+1)*3, 13, 13}, 4, 20, 9, 3, 0, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
-   region_test_params{{1, (4+80+1)*3, 13, 13}, 4, 80, 9, 3, 0, getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"},
-#endif
-};
-
-/* HW network needs to be created to test strides influence to RegionYolo input */
-/* so convolution layer added as the first layer to this test                   */
-class myriadLayersTestsRegion_CHW_HW_nightly: public ConvolutionTest<>{
-};
-
-/*80 input classes */
-class myriadLayersTestsRegion_CHW_HW_80cl_nightly: public ConvolutionTest<>{
-};
-
-/* to passthrough "original" data */
-template<size_t width>
-void constWeightsRange(uint16_t* ptr, size_t weightsSize) {
-    ASSERT_NE(ptr, nullptr);
-    ASSERT_EQ(weightsSize, width * width);
-    std::memset(ptr, 0, sizeof(uint16_t) * (weightsSize));
-    for (int i = 0; i < weightsSize/width; ++i) {
-        ptr[i * width + i] = PrecisionUtils::f32tof16(1.0f);
-    }
-}
-
-void constBiasesRange(uint16_t* ptr, size_t weightsSize) {
-    std::memset(ptr, 0, sizeof(uint16_t) * (weightsSize));
-}
-
-void loadData(InferenceEngine::Blob::Ptr blob) {
-    /* input blob has predefined size and CHW layout */
-    ASSERT_NE(blob, nullptr);
-    auto inDims = blob->getTensorDesc().getDims();
-    InferenceEngine::Blob::Ptr inputBlobRef =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, inDims, InferenceEngine::NCHW});
-    inputBlobRef->allocate();
-    const float* ref_values = inputBlobRef->buffer();
-
-    std::string inputTensorBinary = TestDataHelpers::get_data_path();
-    inputTensorBinary += "/vpu/InputYoLoV2Tiny.bin";
-    ASSERT_TRUE(fromBinaryFile(inputTensorBinary, inputBlobRef));
-    uint16_t *inputBlobRawDataFp16 = static_cast<uint16_t *>(blob->buffer());
-    ASSERT_NE(inputBlobRawDataFp16, nullptr);
-
-    switch(blob->getTensorDesc().getLayout()) {
-    case InferenceEngine::NCHW:
-        for (int indx = 0; indx < blob->size(); indx++) {
-            inputBlobRawDataFp16[indx] = PrecisionUtils::f32tof16(ref_values[indx]);
-        }
-        break;
-    case InferenceEngine::NHWC:
-        for (int h = 0 ; h < inDims[2]; ++h) {
-            for (int w = 0 ; w < inDims[3]; ++w) {
-                for (int c = 0 ; c < inDims[1]; ++c) {
-                    int src_i = w + inDims[3] * h + inDims[3] * inDims[2] * c;
-                    int dst_i = c + inDims[1] * w + inDims[3] * inDims[1] * h;
-                    inputBlobRawDataFp16[dst_i] = PrecisionUtils::f32tof16(ref_values[src_i]);
-                }
-            }
-        }
-        break;
-    default:
-        FAIL() << "unsupported layout: " << blob->getTensorDesc().getLayout();
+#define ERROR_BOUND 0.0005f
+
+PRETTY_PARAM(Coords, int)
+PRETTY_PARAM(Classes, int)
+PRETTY_PARAM(Num, int)
+PRETTY_PARAM(MaskSize, int)
+PRETTY_PARAM(DoSoftmax, int)
+PRETTY_PARAM(CustomConfig, std::string)
+
+typedef myriadLayerTestBaseWithParam<std::tuple<Coords, Classes, Num, MaskSize, DoSoftmax,
+    vpu::LayoutPreference, IRVersion, CustomConfig>> myriadLayersTestsRegionYolo_smoke;
+
+TEST_P(myriadLayersTestsRegionYolo_smoke, RegionYolo) {
+    const int coords = std::get<0>(GetParam());
+    const int classes = std::get<1>(GetParam());
+    const int num = std::get<2>(GetParam());
+    const int maskSize = std::get<3>(GetParam());
+    const int doSoftmax = std::get<4>(GetParam());
+    const auto layoutPreference = std::get<5>(GetParam());
+    _irVersion = std::get<6>(GetParam());
+    const std::string customConfig = std::get<7>(GetParam());
+
+    if (!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
      }
-}
-
-void loadData_80cl(InferenceEngine::Blob::Ptr blob) {
-    /* input blob has predefined size and CHW layout */
-    ASSERT_NE(blob, nullptr);
-    auto inDims = blob->getTensorDesc().getDims();
-    InferenceEngine::Blob::Ptr inputBlobRef =
-            InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, inDims, InferenceEngine::NCHW});
-    inputBlobRef->allocate();
-    const float* ref_values = inputBlobRef->buffer();
  
-    std::string inputTensorBinary = TestDataHelpers::get_data_path();
-    inputTensorBinary += "/vpu/InputYoLoV2_80cl.bin";
-    ASSERT_TRUE(fromBinaryFile(inputTensorBinary, inputBlobRef));
-    uint16_t *inputBlobRawDataFp16 = static_cast<uint16_t *>(blob->buffer());
-    ASSERT_NE(inputBlobRawDataFp16, nullptr);
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    switch(blob->getTensorDesc().getLayout()) {
-    case InferenceEngine::NCHW:
-        for (int indx = 0; indx < blob->size(); indx++) {
-            inputBlobRawDataFp16[indx] = PrecisionUtils::f32tof16(ref_values[indx]);
+    const auto mask = [&] {
+        std::string mask;
+        for (int i = 0; i < maskSize; i++) {
+            mask += std::to_string(i) + ',';
          }
-        break;
-    case InferenceEngine::NHWC:
-        for (int h = 0 ; h < inDims[2]; ++h) {
-            for (int w = 0 ; w < inDims[3]; ++w) {
-                for (int c = 0 ; c < inDims[1]; ++c) {
-                    int src_i = w + inDims[3] * h + inDims[3] * inDims[2] * c;
-                    int dst_i = c + inDims[1] * w + inDims[3] * inDims[1] * h;
-                    inputBlobRawDataFp16[dst_i] = PrecisionUtils::f32tof16(ref_values[src_i]);
-                }
-            }
-        }
-        break;
-    default:
-        FAIL() << "unsupported layout: " << blob->getTensorDesc().getLayout();
-     }
-}
-
-TEST_P(myriadLayersTestsRegion_CHW_HW_nightly, RegionYolo) {
-    std::map<std::string, std::string> params;
-    params["coords"] = "4";
-    params["classes"] = "20";
-    params["num"] = "5";
-    params["mask"] = std::string("0,1,2");
-    params["do_softmax"] = "1";
-    _testNet.addLayer(LayerInitParams("RegionYolo")
-             .params(params)
-             .in({_output_tensor})
-             .out({{1, _output_tensor[0] * _output_tensor[1] * _output_tensor[2] * _output_tensor[3]}}),
-             ref_RegionYolo_wrap);
-    _testNet.setWeightsCallbackForLayer(0, constWeightsRange<125>);
-    _testNet.setBiasesCallbackForLayer(0, constBiasesRange);
-    _genDataCallback = loadData;
-    ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(true)));
-    CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0035);
-}
-
-TEST_P(myriadLayersTestsRegion_CHW_HW_80cl_nightly, RegionYolol) {
-    std::map<std::string, std::string> params;
-    params["coords"] = "4";
-    params["classes"] = "80";
-    params["num"] = "5";
-    params["mask"] = std::string("0,1,2");
-    params["do_softmax"] = "1";
-    _testNet.addLayer(LayerInitParams("RegionYolo")
-             .params(params)
-             .in({_output_tensor})
-             .out({{1, _output_tensor[0] * _output_tensor[1] * _output_tensor[2] * _output_tensor[3]}}),
-             ref_RegionYolo_wrap);
-    _testNet.setWeightsCallbackForLayer(0, constWeightsRange<425>);
-    _testNet.setBiasesCallbackForLayer(0, constBiasesRange);
-    _genDataCallback = loadData_80cl;
-    ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(true)));
-    CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.0060);
-}
-
-class myriadLayerRegionYolo_CHW_nightly: public myriadLayersTests_nightly,
-                             public testing::WithParamInterface<int> {
-};
-
-TEST_P(myriadLayerRegionYolo_CHW_nightly, TestsRegion) {
-    auto classes = GetParam();
-    InferenceEngine::SizeVector input_dims = {1, 125, 13, 13};
-    if (classes == 80) {
-        input_dims[1] = 425;
-    }
-    IN_OUT_desc input_tensor;
-    input_tensor.push_back(input_dims);
+        if (!mask.empty()) mask.pop_back();
+        return mask;
+    }();
  
      std::map<std::string, std::string> params;
-    params["coords"] = "4";
+    params["coords"] = std::to_string(coords);
      params["classes"] = std::to_string(classes);
-    params["num"] = "5";
-    params["mask"] = std::string("0,1,2");
-    params["do_softmax"] = "1";
-    _testNet.addLayer(LayerInitParams("RegionYolo")
-             .params(params)
-             .in(input_tensor)
-             .out({{1, input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3]}}),
-             ref_RegionYolo_wrap);
-    _genDataCallback = loadData;
-    if (classes == 80) {
-        _genDataCallback = loadData_80cl;
-    }
-    ASSERT_TRUE(generateNetAndInfer(NetworkInitParams()));
-    /* bound is too high , set for M2 tests */
-    CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), 0.006);
-}
+    params["num"] = std::to_string(num);
+    params["mask"] = mask;
+    params["do_softmax"] = std::to_string(doSoftmax);
+    params["axis"] = "0";
+    params["end_axis"] = "0";
  
-TEST_P(myriadLayerRegionYolo_CHW_nightly, Test_CHW_HWC_Compare) {
-    auto classes = GetParam();
-    IN_OUT_desc input_tensor;
-    InferenceEngine::SizeVector input_dims = {1, 125, 13, 13};
-    if (classes == 80) {
-        input_dims[1] = 425;
-    }
+    const auto dims = [&] {
+        const auto regions = doSoftmax ? num : maskSize;
+        const uint32_t channels = (coords + classes + 1) * regions;
+        IE_ASSERT(channels > 0);
+        return tensor_test_params{1, channels, 13, 13};
+    }();
  
-    input_tensor.push_back(input_dims);
+    SetInputTensor(dims);
+    SetOutputTensor(dims);
  
-    std::map<std::string, std::string> params;
-    params["coords"] = "4";
-    params["classes"] = std::to_string(classes);
-    params["num"] = "5";
-    params["mask"] = std::string("0,1,2");
-    params["do_softmax"] = "1";
-    _testNet.addLayer(LayerInitParams("RegionYolo")
-             .params(params)
-             .in(input_tensor)
-             .out({{1, input_dims[0] * input_dims[1] * input_dims[2] * input_dims[3]}}),
-             ref_RegionYolo_wrap);
-    if (classes == 80) {
-        _genDataCallback = loadData_80cl;
-    }
-    _config[VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION)] = CONFIG_VALUE(NO);
-    ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt(false).runRefGraph(false)));
-    /* create  NHWC version                                */
-    /* we cannot use the same generateNetAndInfer call due */
-    /* to IE bug.                                          */
-    InferenceEngine::InputsDataMap           inputsInfo;
-    InferenceEngine::BlobMap                 outputMap;
-    InferenceEngine::OutputsDataMap          outputsInfo;
-    InferenceEngine::IExecutableNetwork::Ptr exeNetwork;
-    InferenceEngine::IInferRequest::Ptr      inferRequest;
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("RegionYolo").params(params),
+                                                   NetworkInitParams()
+                                                       .layoutPreference(layoutPreference)
+                                                       .lockLayout(true)));
  
-    _inputsInfo.begin()->second->setLayout(NHWC);
-    _outputsInfo.begin()->second->setLayout(NC);
+    ASSERT_TRUE(Infer());
  
-    InferenceEngine::StatusCode st = InferenceEngine::StatusCode::GENERAL_ERROR;
-    ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(exeNetwork, _cnnNetwork, _config, &_resp));
-    ASSERT_NE(exeNetwork, nullptr) << _resp.msg;
-    ASSERT_NO_THROW(exeNetwork->CreateInferRequest(inferRequest, &_resp)) << _resp.msg;
-    ASSERT_NE(inferRequest, nullptr) << _resp.msg;
-    ASSERT_NO_THROW(inputsInfo = _cnnNetwork.getInputsInfo());
-    auto inIt = _inputsInfo.begin();
-    for (auto in = _inputsInfo.begin(); in != _inputsInfo.end(); in++) {
-        Blob::Ptr inpt;
-        ASSERT_NO_THROW(_inferRequest->GetBlob(inIt->first.c_str(), inpt, &_resp));
-        ASSERT_NO_THROW(inferRequest->SetBlob(inIt->first.c_str(), inpt, &_resp));
-        ++inIt;
-    }
-    ASSERT_NO_THROW(outputsInfo = _cnnNetwork.getOutputsInfo());
-    auto outIt = _outputsInfo.begin();
-    for (auto outputInfo : outputsInfo) {
-        outputInfo.second->setPrecision(outIt->second->getTensorDesc().getPrecision());
-        InferenceEngine::SizeVector outputDims = outputInfo.second->getTensorDesc().getDims();
-        Blob::Ptr outputBlob = nullptr;
-        Layout layout = outIt->second->getTensorDesc().getLayout();
-        // work only with NHWC layout if size of the input dimensions == NHWC
-        switch (outputInfo.second->getPrecision()) {
-        case Precision::FP16:
-            outputBlob = InferenceEngine::make_shared_blob<ie_fp16>({Precision::FP16, outputDims, layout});
-            break;
-        case Precision::FP32:
-            outputBlob = InferenceEngine::make_shared_blob<float>({Precision::FP32, outputDims, layout});
-            break;
-        default:
-            THROW_IE_EXCEPTION << "Unsupported precision for output. Supported FP16, FP32";
-        }
-        outputBlob->allocate();
-        st = inferRequest->SetBlob(outputInfo.first.c_str(), outputBlob, &_resp);
-        outputMap[outputInfo.first] = outputBlob;
-        ASSERT_EQ((int) InferenceEngine::StatusCode::OK, st) << _resp.msg;
-        ++outIt;
-    }
-    ASSERT_EQ(inferRequest->Infer(&_resp), InferenceEngine::OK);
-    /* bound is too high !!!! investigation TBD */
-    CompareCommonAbsolute(_outputMap.begin()->second, outputMap.begin()->second, 0.001);
+    ASSERT_NO_FATAL_FAILURE(ref_RegionYolo(_inputMap.begin()->second, _refBlob,
+       coords, classes, num, maskSize, doSoftmax));
+
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-const std::vector<int> s_classes = {20, 80};
+std::vector<CustomConfig> s_CustomConfig = {
+       {""},
+#ifdef VPU_HAS_CUSTOM_KERNELS
+    getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
+#endif
+};
+\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.cpp

index 63218e5..0fee57b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_relu_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(s_copyTensors),
                                  ::testing::ValuesIn(s_reluLayerParams)
@@ -12,7 +12,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReLU_nightly,
  );
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayerFullyConnectedWithReLU_nightly,
+        accuracy, myriadLayerFullyConnectedWithReLU_smoke,
          ::testing::Combine(
                  ::testing::ValuesIn(g_fcTestParamsSubset),
                  ::testing::Values(g_dimensionsFC[0]),
@@ -21,7 +21,7 @@ INSTANTIATE_TEST_CASE_P(
          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(g_poolingLayerParamsLite),
@@ -29,7 +29,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithReLU_nightly,
                                  ::testing::ValuesIn(s_reluLayerParams))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput),
                                  ::testing::ValuesIn(g_poolingLayerParamsLite),
@@ -37,7 +37,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithReLU_nightly,
                                  ::testing::ValuesIn(s_reluLayerParams))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput_postOp),
                                  ::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
@@ -45,7 +45,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsMaxPoolingWithReLU_nig
                                  ::testing::Values<ReLULayerDef>(MAKE_STRUCT(ReLULayerDef, {{{"negative_slope", "0.0"}}})))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput_postOp),
                                  ::testing::Values<pooling_layer_params>(MAKE_STRUCT(pooling_layer_params, {3, 3}, {1, 1}, {1, 1})),
@@ -53,7 +53,7 @@ INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayersTestsAvgPoolingWithReLU_nig
                                  ::testing::Values<ReLULayerDef>(MAKE_STRUCT(ReLULayerDef, {{{"negative_slope", "0.0"}}})))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_convolutionTensors)
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
@@ -65,7 +65,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithReLU_nightly,
                          )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithReLU_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy_postop, myriadLayerConvolutionWithReLU_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(g_poolingInput_postOp)
                                  , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.hpp

index eb1e2ce..4337eda 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_relu_test.hpp
@@ -11,7 +11,7 @@ using namespace InferenceEngine;
  
  const std::string relu_param = "negative_slope";
  
-class myriadLayersTestsReLUMergeWithBias_nightly : public myriadLayersTests_nightly {
+class myriadLayersTestsReLUMergeWithBias_smoke : public myriadLayersTests_nightly {
  public:
      void RunTest(const std::string& model, size_t num_weights, size_t num_bias) {
          StatusCode st;
@@ -65,9 +65,9 @@ static std::vector<ReLULayerDef> s_reluLayerParams = {
      {{{"negative_slope", "0.1"}}},
  };
  
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, ReLULayerDef>> myriadLayerReLU_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, ReLULayerDef>> myriadLayerReLU_smoke;
  
-TEST_P(myriadLayerReLU_nightly, ReLU) {
+TEST_P(myriadLayerReLU_smoke, ReLU) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto input_dims = std::get<0>(GetParam());
      auto extraLayerParams = std::get<1>(GetParam());
@@ -99,10 +99,10 @@ static std::vector<InferenceEngine::SizeVector> s_copyTensors = {
      },
  };
  
-class myriadLayerFullyConnectedWithReLU_nightly: public FCTest<ReLULayerDef>{
+class myriadLayerFullyConnectedWithReLU_smoke: public FCTest<ReLULayerDef>{
  };
  
-TEST_P(myriadLayerFullyConnectedWithReLU_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithReLU_smoke, TestsFullyConnected)
  {
      auto p = ::testing::WithParamInterface<std::tuple<fcon_test_params, int32_t, int32_t, ReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -117,13 +117,13 @@ TEST_P(myriadLayerFullyConnectedWithReLU_nightly, TestsFullyConnected)
  
  #define ERROR_BOUND_WITH_RELU (4.e-3f)
  
-class myriadLayersTestsMaxPoolingWithReLU_nightly: public PoolingTest<POOLING_MAX, ReLULayerDef>{
+class myriadLayersTestsMaxPoolingWithReLU_smoke: public PoolingTest<POOLING_MAX, ReLULayerDef>{
  };
  
-class myriadLayersTestsAvgPoolingWithReLU_nightly: public PoolingTest<POOLING_AVG, ReLULayerDef>{
+class myriadLayersTestsAvgPoolingWithReLU_smoke: public PoolingTest<POOLING_AVG, ReLULayerDef>{
  };
  
-TEST_P(myriadLayersTestsMaxPoolingWithReLU_nightly, TestsMaxPoolingWithReLU)
+TEST_P(myriadLayersTestsMaxPoolingWithReLU_smoke, TestsMaxPoolingWithReLU)
  {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, ReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -136,7 +136,7 @@ TEST_P(myriadLayersTestsMaxPoolingWithReLU_nightly, TestsMaxPoolingWithReLU)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
  }
  
-TEST_P(myriadLayersTestsAvgPoolingWithReLU_nightly, TestsAvgPoolingWithReLU)
+TEST_P(myriadLayersTestsAvgPoolingWithReLU_smoke, TestsAvgPoolingWithReLU)
  {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, pooling_layer_params, vpu::LayoutPreference, ReLULayerDef>>::GetParam();
      auto extraLayerParams = std::get<3>(p);
@@ -149,10 +149,10 @@ TEST_P(myriadLayersTestsAvgPoolingWithReLU_nightly, TestsAvgPoolingWithReLU)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_RELU);
  }
  
-class myriadLayerConvolutionWithReLU_nightly: public ConvolutionTest<ReLULayerDef>{
+class myriadLayerConvolutionWithReLU_smoke: public ConvolutionTest<ReLULayerDef>{
  };
  
-TEST_P(myriadLayerConvolutionWithReLU_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithReLU_smoke, Convolution) {
      auto p = ::testing::WithParamInterface<std::tuple<InferenceEngine::SizeVector, param_size, param_size, param_size, uint32_t, uint32_t, ReLULayerDef>>::GetParam();
      auto ReLUParam = std::get<6>(p);
      _testNet.addLayer(LayerInitParams("ReLU")
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.cpp

index bd604f4..d60a7d4 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.cpp
@@ -4,29 +4,10 @@
  
  #include "myriad_layers_reorg_test.hpp"
  
-static std::vector<std::string> s_CustomConfig = {
-    "",
-#ifdef VPU_HAS_CUSTOM_KERNELS
-    getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
-#endif
-};
-
-static std::vector<layoutPreference> layoutPreferences = {
-    vpu::LayoutPreference::ChannelMajor,
-#ifndef VPU_HAS_CUSTOM_KERNELS
-    vpu::LayoutPreference::ChannelMinor
-#endif
-};
-
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReorg_nightly, ::testing::Combine(
-    ::testing::Values<DimsInput>(
-        MAKE_STRUCT(tensor_test_params, 1, 64, 26, 26),
-        MAKE_STRUCT(tensor_test_params, 1, 192, 6 * 26, 6 * 26),
-        MAKE_STRUCT(tensor_test_params, 1,  4,  6,  6)
-    ),
-    ::testing::Values<ScaleOutput>(2),
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReorg_smoke, ::testing::Combine(
+    ::testing::ValuesIn(s_ReorgInputs),
      ::testing::Values<Stride>(2),
-    ::testing::ValuesIn(layoutPreferences),
-    ::testing::ValuesIn(s_CustomConfig),
-    ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10)
+    ::testing::Values(vpu::LayoutPreference::ChannelMinor, vpu::LayoutPreference::ChannelMajor),
+    ::testing::Values(IRVersion::v7, IRVersion::v10),
+    ::testing::ValuesIn(s_CustomConfig)
  ));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.hpp

index be5cdad..1fa387b 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reorg_test.hpp
@@ -5,121 +5,109 @@
  #include <gtest/gtest.h>
  #include "myriad_layers_tests.hpp"
  
-using std::tuple;
-using std::get;
-
  using namespace InferenceEngine;
  
-static void reorg_calculate(short *inp, int w, int h, int c, int batch, int stride, float *out)
+static void reorg_calculate(const Blob::Ptr src, Blob::Ptr dst, int stride)
  {
-    int out_c = c / (stride*stride);
-
-    int oc = c * (stride*stride);
-    int oh = h / stride;
-    int ow = w / stride;
-
-    for(int b = 0; b < batch; ++b)
-    {
-        for(int k = 0; k < c; ++k)
-        {
-            for(int j = 0; j < h; ++j)
-            {
-                for(int i = 0; i < w; ++i)
-                {
-                    int in_index = i + w * (j + h * (k + c * b));
-
-                    int new_z = in_index / (oh*ow);
-                    int new_y = (in_index %(oh*ow)) / ow;
-                    int new_x = (in_index %(oh*ow)) % ow;
-                    int new_index = new_z + new_x * oc + new_y * oc * ow;
-
-                    int c2 = k % out_c;
-                    int offset = k / out_c;
-                    int w2 = i*stride + offset % stride;
-                    int h2 = j*stride + offset / stride;
-                    int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
-
-                    out[new_index] = PrecisionUtils::f16tof32(inp[out_index]);
-                }
-            }
-        }
-    }
+       ASSERT_NE(src, nullptr);
+    ASSERT_NE(dst, nullptr);
+    const uint16_t *src_data = src->buffer();
+          uint16_t *dst_data = dst->buffer();
+    ASSERT_NE(src_data, nullptr);
+    ASSERT_NE(dst_data, nullptr);
+
+    const auto inputDims = src->getTensorDesc().getDims();
+    const int C = inputDims[1];
+    const int H = inputDims[2];
+    const int W = inputDims[3];
+
+       const auto inputCHW = [&] {
+               auto inputCHW = std::vector<ie_fp16>(C*H*W);
+               if (Layout::NCHW == src->getTensorDesc().getLayout()) {
+                       std::copy(src_data, src_data + C*H*W, begin(inputCHW));
+               } else {
+                       for (int c = 0; c < C; c++) {
+                               for (int h = 0; h < H; h++) {
+                                       for (int w = 0; w < W; w++) {
+                                               inputCHW[c*H*W + h*W + w] = src_data[h*W*C + w*C + c];
+                                       }
+                               }
+                       }
+               }
+               return inputCHW;
+       }();
+
+       const int C2 = C/(stride*stride);
+       const int H2 = H*stride;
+       const int W2 = W*stride;
+
+    for (int c = 0; c < C; ++c) {
+               for (int h = 0; h < H; ++h) {
+                       for (int w = 0; w < W; ++w) {
+                               const int offset = c/C2;
+                               const int c2 = c - C2*offset;
+                               const int h2 = h*stride + offset/stride;
+                               const int w2 = w*stride + offset - stride*(offset/stride);
+
+                               dst_data[c*H*W + h*W + w] = inputCHW[c2*H2*W2 + h2*W2 + w2];
+                       }
+               }
+       }
+
+       dst->getTensorDesc().setLayout(Layout::NCHW);
  }
  
  PRETTY_PARAM(Stride, int);
-PRETTY_PARAM(ScaleOutput, int);
  PRETTY_PARAM(layoutPreference, vpu::LayoutPreference);
+PRETTY_PARAM(CustomConfig, std::string)
  
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Stride, layoutPreference, IRVersion, CustomConfig>>
+       myriadLayersTestsReorg_smoke;
  
-typedef myriadLayerTestBaseWithParam<tuple<DimsInput, ScaleOutput, Stride, layoutPreference, std::string, IRVersion>> myriadLayersTestsReorg_nightly;
-
-TEST_P(myriadLayersTestsReorg_nightly, TestsReorg) {
+TEST_P(myriadLayersTestsReorg_smoke, TestsReorg) {
+    const SizeVector dimsInput = std::get<0>(GetParam());
+    const int stride = std::get<1>(GetParam());
+    const auto layoutPreference = std::get<2>(GetParam());
+    _irVersion = std::get<3>(GetParam());
+       const std::string customConfig = std::get<4>(GetParam());
  
-    // TODO: M2 mode is not working for OpenCL compiler
-    if(!get<4>(GetParam()).empty() && !CheckMyriadX()) {
-        GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
-    }
+    if(!customConfig.empty() && !CheckMyriadX()) {
+               GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+       }
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-    tensor_test_params dimsInput = get<0>(GetParam());
+    const auto dimsOutput = SizeVector{dimsInput[0],
+                                                                          dimsInput[1] * (stride * stride),
+                                                                          dimsInput[2] / stride,
+                                                                          dimsInput[3] / stride};
  
-    int scaleOutput = get<1>(GetParam());
-    tensor_test_params dimsOutput = {dimsInput.n, dimsInput.c * (scaleOutput * scaleOutput), dimsInput.h / scaleOutput, dimsInput.w / scaleOutput};
+    SetInputTensors({dimsInput});
+    SetOutputTensors({dimsOutput});
  
-    int stride = get<2>(GetParam());
-    auto layoutPreference = get<3>(GetParam());
-    _irVersion = get<5>(GetParam());
      std::map<std::string, std::string> params;
-    std::string type =  "ReorgYolo";
-
      params["stride"] = std::to_string(stride);
-    SetInputTensor(dimsInput);
-    SetOutputTensor(dimsOutput);
-    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = get<4>(GetParam());
-    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams(type)
-                                                   .params(params),
-                                                   NetworkInitParams().layoutPreference(layoutPreference)
-                                                   .outputPrecision(InferenceEngine::Precision::FP32)));
-    /* input data preparation */
-    SetInputInOrder();
+
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("ReorgYolo").params(params),
+                                                                                                  NetworkInitParams()
+                                                                                                          .layoutPreference(layoutPreference)
+                                                                                                          .lockLayout(true)));
  
      ASSERT_TRUE(Infer());
-    InferenceEngine::SizeVector inputDims = _inputsInfo.begin()->second->getTensorDesc().getDims();
-    InferenceEngine::Blob::Ptr inputBlobRef =
-            InferenceEngine::make_shared_blob<short>({InferenceEngine::Precision::FP16, inputDims, InferenceEngine::NHWC});
-    inputBlobRef->allocate();
-    short *inputBlobRefRawData = inputBlobRef->buffer();
-
-    int c = inputDims[1];
-    int h = inputDims[2];
-    int w = inputDims[3];
-
-    auto inputBlob =_inputMap[_inputsInfo.begin()->first];
-    short * inputBlob_data = inputBlob->buffer();
-
-    /* Preliminary repacking */
-    for(int k = 0; k < c; k++)
-    {
-        for(int j = 0; j < h; j++)
-        {
-            for(int i = 0; i < w; i++)
-            {
-                int dst_index = i + w * j + w * h * k;
-                int src_index = k + c * i + c * w * j;
-
-                inputBlobRefRawData[dst_index] = inputBlob_data[src_index];
-            }
-        }
-    }
-
-    auto outputBlob =_outputMap[_outputsInfo.begin()->first];
-    InferenceEngine::SizeVector outputDims = _outputsInfo.begin()->second->getTensorDesc().getDims();
-
-    InferenceEngine::TBlob<float>::Ptr outputBlobRef =
-                InferenceEngine::make_shared_blob<float>(TensorDesc(InferenceEngine::Precision::FP32, outputDims, InferenceEngine::NCHW));
-    outputBlobRef->allocate();
-    float *outputBlobRefRawData = outputBlobRef->buffer();
-
-    reorg_calculate(inputBlobRefRawData, w, h, c, 1, stride, outputBlobRefRawData);
-
-    compare(outputBlob->buffer(), outputBlobRef->buffer(), outputBlob->size(), 0.0);
+
+    ASSERT_NO_FATAL_FAILURE(reorg_calculate(_inputMap.begin()->second, _refBlob, stride));
+
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, 0);
  }
+
+static std::vector<CustomConfig> s_CustomConfig = {
+       {""},
+#ifdef VPU_HAS_CUSTOM_KERNELS
+    getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
+#endif
+};
+
+static std::vector<SizeVector> s_ReorgInputs = {
+               {1, 64, 26, 26},
+               {1, 192, 6 * 26, 6 * 26},
+               {1, 4, 6, 6}
+};
+\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.cpp

index 7528199..6030976 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.cpp
@@ -4,8 +4,13 @@
  
  #include "myriad_layers_resample_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(myriad, myriadResampleLayerTests_nightly,
-        ::testing::Combine(
-        ::testing::Values(CONFIG_VALUE(NO), CONFIG_VALUE(YES)),
-        ::testing::ValuesIn(s_ResampleCustomConfig),
-        ::testing::ValuesIn(s_ResampleAntialias)));
+// #-31522
+INSTANTIATE_TEST_CASE_P(
+       DISABLED_accuracy, myriadResampleLayerTests_smoke,
+       ::testing::Combine(
+               ::testing::ValuesIn(s_ResampleInput),
+               ::testing::Values<Factor>(2.0f, 0.5f),
+               ::testing::Values<Antialias>(false, true),
+               ::testing::Values<HwOptimization>(false, true),
+               ::testing::ValuesIn(s_CustomConfig))
+);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.hpp

index 40b2e76..ada3a74 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_resample_test.hpp
@@ -4,16 +4,24 @@
  
  #include <cmath>
  #include "myriad_layers_tests.hpp"
-// #include <iostream>
  
  using namespace InferenceEngine;
  
  #define ERROR_BOUND 1e-3
  
+PRETTY_PARAM(Factor, float)
+PRETTY_PARAM(Antialias, int)
+PRETTY_PARAM(HwOptimization, bool);
+PRETTY_PARAM(CustomConfig, std::string);
+
+typedef myriadLayerTestBaseWithParam<std::tuple<SizeVector, Factor, Antialias, HwOptimization, CustomConfig>>
+       myriadResampleLayerTests_smoke;
+
  static inline float triangleCoeff(float x)
  {
      return (1.0f - fabsf(x));
  }
+
  void refResample(const Blob::Ptr src, Blob::Ptr dst, int antialias) {
      ie_fp16 *src_data = static_cast<ie_fp16*>(src->buffer());
      ie_fp16 *output_sequences = static_cast<ie_fp16*>(dst->buffer());
@@ -31,14 +39,7 @@ void refResample(const Blob::Ptr src, Blob::Ptr dst, int antialias) {
  
      if (IH == OH && IW == OW)
      {
-        int b = 0;
-        for (int c = 0; c < C; c++)
-            for (int h = 0; h < IH; h++)
-                for (int w = 0; w < IW; w++){
-                int dst_index = w + IW * h + IW * IH * c;
-                int src_index = dst_index;
-                output_sequences[dst_index] = src_data[src_index];
-                }
+       std::copy(src_data, src_data + C*IH*IW, output_sequences);
          return;
      }
  
@@ -96,115 +97,59 @@ void refResample(const Blob::Ptr src, Blob::Ptr dst, int antialias) {
      }
  }
  
-PRETTY_PARAM(hwAcceleration, std::string);
-PRETTY_PARAM(customConfig, std::string);
-PRETTY_PARAM(Antialias, int)
-
-typedef myriadLayerTestBaseWithParam<std::tuple<std::string, std::string, Antialias>> myriadResampleLayerTests_nightly;
-
-TEST_P(myriadResampleLayerTests_nightly, Resample) {
-    std::string model = R"V0G0N(
-       <net name="Resample" version="2" batch="1">
-           <layers>
-            <layer id="0" name="data" precision="FP16" type="Input">
-                <output>
-                    <port id="0">
-                        <dim>1</dim>
-                        <dim>128</dim>
-                        <dim>26</dim>
-                        <dim>26</dim>
-                    </port>
-                </output>
-            </layer>
-               <layer id="1" name="detector/yolo-v3/ResizeNearestNeighbor" precision="FP16" type="Resample">
-                  <data antialias="@TEST@" factor="2.0" type="caffe.ResampleParameter.NEAREST" fx="0.5" fy="0.5"/>
-                <input>
-                    <port id="1">
-                        <dim>1</dim>
-                        <dim>128</dim>
-                        <dim>26</dim>
-                        <dim>26</dim>
-                    </port>
-                </input>
-                <output>
-                    <port id="2">
-                        <dim>1</dim>
-                        <dim>128</dim>
-                        <dim>52</dim>
-                        <dim>52</dim>
-                    </port>
-                </output>
-            </layer>
-           </layers>
-           <edges>
-               <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-           </edges>
-       </net>
-   )V0G0N";
-
-    SetSeed(DEFAULT_SEED_VALUE + 6);
-
-    std::string HWConfigValue = std::get<0>(GetParam());
-    std::string customConfig = std::get<1>(GetParam());
-    int antialias = std::get<2>(GetParam());
-
-    model.replace( model.find("@TEST@"), sizeof("@TEST@") -1, std::to_string(antialias));
-    if((customConfig != "") || (antialias != 1)){
-        if(!customConfig.empty() && !CheckMyriadX()) {
-            GTEST_SKIP()<<"Custom layers for MYRIAD2 not supported";
-        }
-        _config[VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION)] = HWConfigValue;
-        _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
-        StatusCode st;
-
-        ASSERT_NO_THROW(readNetwork(model));
-
-        const auto& network = _cnnNetwork;
+TEST_P(myriadResampleLayerTests_smoke, Resample) {
+    const SizeVector inputDims = std::get<0>(GetParam());
+    const float factor = std::get<1>(GetParam());
+    const bool antialias = std::get<2>(GetParam());
+    const bool hwOptimization = std::get<3>(GetParam());
+    const std::string customConfig = std::get<4>(GetParam());
  
-        _inputsInfo = network.getInputsInfo();
-        _inputsInfo["data"]->setPrecision(Precision::FP16);
-        _inputsInfo["data"]->setLayout(NCHW);
+    ASSERT_GT(factor, 0);
  
-        _outputsInfo = network.getOutputsInfo();
-        _outputsInfo["detector/yolo-v3/ResizeNearestNeighbor"]->setPrecision(Precision::FP16);
+    if (customConfig.empty() && antialias) {
+        GTEST_SKIP() << "Native Resample with antialiasing is not supported";
+    }
  
-        ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork(_exeNetwork, network,
-                                                        {{VPU_CONFIG_KEY(CUSTOM_LAYERS), customConfig}, {VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION), HWConfigValue}}, &_resp));
-        ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
-        ASSERT_NE(_exeNetwork, nullptr) << _resp.msg;
+    if (!customConfig.empty() && !CheckMyriadX()) {
+        GTEST_SKIP() << "Custom layers for MYRIAD2 not supported";
+    }
  
-        ASSERT_NO_THROW(st = _exeNetwork->CreateInferRequest(_inferRequest, &_resp));
-        ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+    _config[VPU_CONFIG_KEY(CUSTOM_LAYERS)] = customConfig;
  
-        Blob::Ptr data;
-        ASSERT_NO_THROW(st = _inferRequest->GetBlob("data", data, &_resp));
-        ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+    const auto outputDims = SizeVector{inputDims[0],
+                                       inputDims[1],
+                                       (size_t)(inputDims[2] * factor),
+                                       (size_t)(inputDims[3] * factor)};
  
-        GenRandomData(data);
+    SetInputTensors({inputDims});
+    SetOutputTensors({outputDims});
  
-        ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
-        ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+    std::map<std::string, std::string> params;
+    params["antialias"] = std::to_string((int)antialias);
+    params["factor"] = std::to_string(factor);
  
-        Blob::Ptr outputBlob;
-        ASSERT_NO_THROW(_inferRequest->GetBlob("detector/yolo-v3/ResizeNearestNeighbor", outputBlob, &_resp));
-        ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
+    ASSERT_NO_FATAL_FAILURE(makeSingleLayerNetwork(LayerInitParams("Resample").params(params),
+                                                   NetworkInitParams()
+                                                        .useHWOpt(hwOptimization)
+                                                        .lockLayout(true)));
  
-        _refBlob = make_shared_blob<ie_fp16>(TensorDesc(Precision::FP16, outputBlob->getTensorDesc().getDims(), NCHW));
-        _refBlob->allocate();
+    ASSERT_TRUE(Infer());
  
-        refResample(data, _refBlob, antialias);
+    ASSERT_NO_FATAL_FAILURE(refResample(_inputMap.begin()->second, _refBlob, antialias));
  
-        CompareCommonAbsolute(outputBlob, _refBlob, ERROR_BOUND);
-    }
+    CompareCommonAbsolute(_outputMap.begin()->second, _refBlob, ERROR_BOUND);
  }
  
-static std::vector<std::string> s_ResampleCustomConfig = {
-    "",
+static std::vector<SizeVector> s_ResampleInput = {
+        {1, 128, 26, 26},
+        {1, 64, 52, 52},
+        {1, 23, 14, 14}
+};
+
+static std::vector<CustomConfig> s_CustomConfig = {
+    {""},
  #ifdef VPU_HAS_CUSTOM_KERNELS
     getIELibraryPath() + "/vpu_custom_kernels/customLayerBindings.xml"
  #endif
  };
  
-static std::vector<Antialias> s_ResampleAntialias = {
-        {0, 1}
-};
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.cpp

index 3a9f302..a5abd89 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.cpp
@@ -5,7 +5,7 @@
  #include "ngraph_functions/subgraph_builders.hpp"
  #include "myriad_layers_reshape_test.hpp"
  
-TEST_F(myriadEliminateReshapeTests_nightly, SplitConvConcat) {
+TEST_F(myriadEliminateReshapeTests_smoke, SplitConvConcat) {
      ASSERT_NO_THROW(_cnnNetwork = InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSplitConvConcat()));
  
      StatusCode st;
@@ -265,7 +265,7 @@ TEST_F(myriadLayersTests_nightly, ReshapeAfterConcat_Eliminate) {
      EXPECT_EQ(InferenceEngineProfileInfo::NOT_RUN, layerInfo.status);
  }
  
-TEST_F(myriadLayerReshapeFasterRCNN_nightly, Reshape) {
+TEST_F(myriadLayerReshapeFasterRCNN_smoke, Reshape) {
      InferenceEngine::SizeVector input_tensor = {1, 14, 14, 24};
      InferenceEngine::SizeVector output_tensor = {1, 2352, 2};
      std::map<std::string, std::string> layer_params = {
@@ -281,13 +281,13 @@ TEST_F(myriadLayerReshapeFasterRCNN_nightly, Reshape) {
      ASSERT_TRUE(generateNetAndInfer(NetworkInitParams().useHWOpt( CheckMyriadX())));
  }
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReshape_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReshape_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_reshapeInParams),
          ::testing::ValuesIn(s_reshapeOutParams))
  );
  
-INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayerReshape_nightly,
+INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayerReshape_smoke,
      ::testing::Values(
          std::make_tuple(
              SizeVector{400, 12544},
@@ -308,11 +308,11 @@ INSTANTIATE_TEST_CASE_P(fc_to_conv_case, myriadLayerReshape_nightly,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeBeforeFC_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeBeforeFC_smoke,
          ::testing::Values(CONFIG_VALUE(YES), CONFIG_VALUE(NO))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeFasterRCNN_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsReshapeFasterRCNN_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_convTensor)
            , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 1, 1))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.hpp

index 22fa254..942c655 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reshape_test.hpp
@@ -10,11 +10,11 @@
  
  using namespace InferenceEngine;
  
-using myriadEliminateReshapeTests_nightly = myriadLayersTests_nightly;
+using myriadEliminateReshapeTests_smoke = myriadLayersTests_nightly;
  
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> myriadLayerReshape_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> myriadLayerReshape_smoke;
  
-TEST_P(myriadLayerReshape_nightly, Reshape) {
+TEST_P(myriadLayerReshape_smoke, Reshape) {
      auto input_tensor = std::get<0>(GetParam());
      auto output_tensor = std::get<1>(GetParam());
  
@@ -38,7 +38,7 @@ TEST_P(myriadLayerReshape_nightly, Reshape) {
  }
  
  
-typedef myriadLayersTests_nightly myriadLayerReshapeFasterRCNN_nightly;
+typedef myriadLayersTests_nightly myriadLayerReshapeFasterRCNN_smoke;
  
  static std::vector<InferenceEngine::SizeVector> s_reshapeInParams = {
      {{1, 4, 2, 16}},
@@ -198,9 +198,9 @@ std::string MODEL_WITHOUT_FLATTEN = R"V0G0N(
  )V0G0N";
  
  
-typedef myriadLayerTestBaseWithParam<std::string> myriadLayersTestsReshapeBeforeFC_nightly;
+typedef myriadLayerTestBaseWithParam<std::string> myriadLayersTestsReshapeBeforeFC_smoke;
  
-TEST_P(myriadLayersTestsReshapeBeforeFC_nightly, OptimizeReshapeIfItIsPlacedBeforeFC) {
+TEST_P(myriadLayersTestsReshapeBeforeFC_smoke, OptimizeReshapeIfItIsPlacedBeforeFC) {
      std::string HWConfigValue = GetParam();
      if (!CheckMyriadX() && HWConfigValue == CONFIG_VALUE(YES)) {
          std::cout << "Disable for non-MyriadX devices" << std::endl;
@@ -247,11 +247,11 @@ TEST_P(myriadLayersTestsReshapeBeforeFC_nightly, OptimizeReshapeIfItIsPlacedBefo
      EXPECT_EQ(InferenceEngineProfileInfo::NOT_RUN, layerInfo.status);
  }
  
-class myriadLayersTestsReshapeFasterRCNN_nightly: public ConvolutionTest<>{
+class myriadLayersTestsReshapeFasterRCNN_smoke: public ConvolutionTest<>{
  };
  
  // FIXME: rewrite the test (it doesn't use Convolution) avoid HWC layout for 3D tensor in reference code
-TEST_P(myriadLayersTestsReshapeFasterRCNN_nightly, DISABLED_Convolution) {
+TEST_P(myriadLayersTestsReshapeFasterRCNN_smoke, DISABLED_Convolution) {
      std::map<std::string, std::string> permute_params = {
                {"order", "0,2,3,1"}
      };
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.cpp

index 2a9dfab..bc5c38e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_reverse_sequence_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReverseSequence_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerReverseSequence_smoke,
      ::testing::Combine(
          ::testing::Values<ReverseSequence>(
                  MAKE_STRUCT(reverse_sequence_test_params, {5, 6, 18}, 0, 0)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.hpp

index 3481c9a..829f3e9 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_reverse_sequence_test.hpp
@@ -30,7 +30,7 @@ struct reverse_sequence_test_params {
  };
  
  PRETTY_PARAM(ReverseSequence, reverse_sequence_test_params);
-typedef myriadLayerTestBaseWithParam<std::tuple<ReverseSequence, IRVersion>> myriadLayerReverseSequence_nightly;
+typedef myriadLayerTestBaseWithParam<std::tuple<ReverseSequence, IRVersion>> myriadLayerReverseSequence_smoke;
  
  static int nchw_to_nhwc(InferenceEngine::SizeVector dims, int ind)
  {
@@ -104,7 +104,7 @@ static void ref_reverse_sequence(
      }
  }
  
-TEST_P(myriadLayerReverseSequence_nightly, ReverseSequence) {
+TEST_P(myriadLayerReverseSequence_smoke, ReverseSequence) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      reverse_sequence_test_params input_dims = std::get<0>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_rfcn_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_rfcn_test.cpp

index 627b459..8204fbf 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_rfcn_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_rfcn_test.cpp
@@ -647,7 +647,7 @@ static void refGlobalAvgPooling7x7Rfcn(const Blob::Ptr src,
      }
  }
  
-class myriadLayersRfcnTests_nightly: public myriadLayersTests_nightly {
+class myriadLayersRfcnTests_smoke: public myriadLayersTests_nightly {
  public:
      void GenROIs(InferenceEngine::Blob::Ptr rois,
                   const uint32_t in_width, const uint32_t in_height,
@@ -796,7 +796,7 @@ public:
      Blob::Ptr outputBlob;
  };
  
-TEST_F(myriadLayersRfcnTests_nightly, ReshapeRfcn)
+TEST_F(myriadLayersRfcnTests_smoke, ReshapeRfcn)
  {
      StatusCode st = GENERAL_ERROR;
  
@@ -812,7 +812,7 @@ TEST_F(myriadLayersRfcnTests_nightly, ReshapeRfcn)
      CompareCommonAbsolute(outputBlob, prior_network_output, 0.0f);
  }
  
-TEST_F(myriadLayersRfcnTests_nightly, SoftmaxRfcn)
+TEST_F(myriadLayersRfcnTests_smoke, SoftmaxRfcn)
  {
      StatusCode st = GENERAL_ERROR;
  
@@ -830,7 +830,7 @@ TEST_F(myriadLayersRfcnTests_nightly, SoftmaxRfcn)
      CompareCommonAbsolute(outputBlob, _refBlob, ERROR_BOUND);
  }
  
-TEST_F(myriadLayersRfcnTests_nightly, GlobalAvgPooling7x7Rfcn)
+TEST_F(myriadLayersRfcnTests_smoke, GlobalAvgPooling7x7Rfcn)
  {
      StatusCode st = GENERAL_ERROR;
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.cpp

index 84649ea..b9b7a7f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_roi_align_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIAlign_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIAlign_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ROIAlignLayerInput),
          ::testing::ValuesIn(s_ROIAlignLayerParam),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.hpp

index 0326e83..f2c3021 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_align_test.hpp
@@ -35,7 +35,7 @@ PRETTY_PARAM(roi_align_mode, std::string);
  PRETTY_PARAM(number_rois, uint32_t);
  
  using ROIAlignTestParams = std::tuple<Dims, roi_align_param, number_rois, roi_align_mode>;
-typedef myriadLayerTestBaseWithParam<ROIAlignTestParams> myriadLayersTestsROIAlign_nightly;
+typedef myriadLayerTestBaseWithParam<ROIAlignTestParams> myriadLayersTestsROIAlign_smoke;
  
  const int roi_cols = 4;
  
@@ -161,7 +161,7 @@ static std::string getModel(const int batches, const int channels, const int hei
      return model;
  }
  
-TEST_P(myriadLayersTestsROIAlign_nightly, ROIAlign) {
+TEST_P(myriadLayersTestsROIAlign_smoke, ROIAlign) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      const tensor_test_params dims_layer_in = std::get<0>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.cpp

index bfe1307..4c44ef6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_roi_feature_extractor_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIFeatureExtractor_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIFeatureExtractor_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ROIFeatureExtractorLayerInput),
          ::testing::ValuesIn(s_ROIFeatureExtractorLayerParam),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.hpp

index 6ad35a6..2d3933e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_feature_extractor_test.hpp
@@ -30,7 +30,7 @@ PRETTY_PARAM(number_rois, uint32_t);
  
  using ROIFeatureExtractorTestParams = std::tuple<Dims, roi_feature_extractor_param, number_rois>;
  
-typedef myriadLayerTestBaseWithParam<ROIFeatureExtractorTestParams> myriadLayersTestsROIFeatureExtractor_nightly;
+typedef myriadLayerTestBaseWithParam<ROIFeatureExtractorTestParams> myriadLayersTestsROIFeatureExtractor_smoke;
  
  static void genROIs(InferenceEngine::Blob::Ptr rois,
                      const roi_feature_extractor_param& params,
@@ -55,7 +55,7 @@ static void genROIs(InferenceEngine::Blob::Ptr rois,
      }
  }
  
-TEST_P(myriadLayersTestsROIFeatureExtractor_nightly, ROIFeatureExtractor) {
+TEST_P(myriadLayersTestsROIFeatureExtractor_smoke, ROIFeatureExtractor) {
      tensor_test_params dims_layer_in = std::get<0>(GetParam());
      roi_feature_extractor_param test_params = std::get<1>(GetParam());
      const uint32_t num_rois = std::get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.cpp

index 8f875a1..ff45363 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_roi_pooling_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIPooling_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsROIPooling_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_ROIPoolingLayerInput),
          ::testing::ValuesIn(s_ROIPoolingLayerParam),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.hpp

index 6b8d988..68b89f8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_roi_pooling_test.hpp
@@ -42,7 +42,7 @@ static inline void PrintTo(const t_ROIPooling_method& param, ::std::ostream* os)
  
  using ROIPoolingTestParams = std::tuple<Dims, roi_pooling_param, uint32_t, roi_pooling_method, IRVersion>;
  
-class myriadLayersTestsROIPooling_nightly: public myriadLayerTestBaseWithParam<ROIPoolingTestParams> {
+class myriadLayersTestsROIPooling_smoke: public myriadLayerTestBaseWithParam<ROIPoolingTestParams> {
  public:
      void genROIs(InferenceEngine::Blob::Ptr rois,
                   const ROIPoolingParams& params,
@@ -260,7 +260,7 @@ public:
      }
  };
  
-TEST_P(myriadLayersTestsROIPooling_nightly, ROIPooling) {
+TEST_P(myriadLayersTestsROIPooling_smoke, ROIPooling) {
      tensor_test_params dims_layer_in = std::get<0>(GetParam());
      ROIPoolingParams test_params = std::get<1>(GetParam());
      const uint32_t num_rois = std::get<2>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.cpp

index 05cedf0..8c8de92 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.cpp
@@ -5,7 +5,7 @@
  #include "myriad_layers_scale_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsScale_nightly,
+        accuracy, myriadLayersTestsScale_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(s_inputScaleTensors),
              ::testing::ValuesIn(s_inputBiasScale)));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.hpp

index cdad5d7..956e025 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scale_test.hpp
@@ -97,11 +97,11 @@ void ref_scale(const InferenceEngine::Blob::Ptr src,
  
  typedef std::tuple<SizeVector, bool> TestScaleShift;
  
-class myriadLayersTestsScale_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsScale_smoke: public myriadLayersTests_nightly,
                                public testing::WithParamInterface<TestScaleShift> {
  };
  
-TEST_P(myriadLayersTestsScale_nightly, TestsScale)
+TEST_P(myriadLayersTestsScale_smoke, TestsScale)
  {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.cpp

index 8ecc122..7d7b6a3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.cpp
@@ -53,12 +53,12 @@ static const std::vector<DataShape> dataShapeList_useCases = {
      { 16, 512, 56, 56 },
  };
  
-INSTANTIATE_TEST_CASE_P(nd_tensors, myriadLayersScatterElementsUpdateTest_nightly,
+INSTANTIATE_TEST_CASE_P(nd_tensors, myriadLayersScatterElementsUpdateTest_smoke,
                          Combine(
                              ValuesIn(dataShapeList_ndTensors),
                              ValuesIn(dataTypeList)));
  
-INSTANTIATE_TEST_CASE_P(use_cases, myriadLayersScatterElementsUpdateTest_nightly,
+INSTANTIATE_TEST_CASE_P(use_cases, myriadLayersScatterElementsUpdateTest_smoke,
                          Combine(
                              ValuesIn(dataShapeList_useCases),
                              ValuesIn(dataTypeList)));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.hpp

index acfa1b6..57b49de 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_elements_update_test.hpp
@@ -41,7 +41,7 @@ using DataType  = std::string;  // "FP16", "I32"
  using ScatterElementsUpdateTestParams = std::tuple<DataShape,
                                                     DataType>;
  
-class myriadLayersScatterElementsUpdateTest_nightly :
+class myriadLayersScatterElementsUpdateTest_smoke :
      public myriadLayerTestBaseWithParam<ScatterElementsUpdateTestParams> {
  protected:
  
@@ -392,6 +392,6 @@ private:
      std::mt19937 m_gen;
  };
  
-TEST_P(myriadLayersScatterElementsUpdateTest_nightly, accuracy) {
+TEST_P(myriadLayersScatterElementsUpdateTest_smoke, accuracy) {
      testScatterElementsUpdate();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.cpp

index 1d3d7af..6f5fda6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.cpp
@@ -14,7 +14,7 @@ using namespace testing;
  
  INSTANTIATE_TEST_CASE_P(
      nd_tensors,
-    myriadLayersScatterUpdateTest_nightly,
+    myriadLayersScatterUpdateTest_smoke,
      Values(
          //  1-dimensional `indices`
          ScatterUpdateTestParams { { 1000 }, { 100000 } },
@@ -51,7 +51,7 @@ INSTANTIATE_TEST_CASE_P(
  
  INSTANTIATE_TEST_CASE_P(
      use_cases,
-    myriadLayersScatterUpdateTest_nightly,
+    myriadLayersScatterUpdateTest_smoke,
      Values(
          // use case from Mask R-CNN: N = 1000, C = 256, HxW = 7x7
          ScatterUpdateTestParams { { 32 },      { 1000, 256, 7, 7} },
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.hpp

index 6685bbe..f9352f3 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_scatter_update_test.hpp
@@ -43,7 +43,7 @@ using IndicesShape = SizeVector;
  using ScatterUpdateTestParams = std::tuple<IndicesShape,
                                             InputShape>;
  
-class myriadLayersScatterUpdateTest_nightly:
+class myriadLayersScatterUpdateTest_smoke:
      public myriadLayerTestBaseWithParam<ScatterUpdateTestParams>
  {
  protected:
@@ -459,6 +459,6 @@ private:
      }
  };
  
-TEST_P(myriadLayersScatterUpdateTest_nightly, accuracy) {
+TEST_P(myriadLayersScatterUpdateTest_smoke, accuracy) {
      testScatterUpdate();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.cpp

index d5db411..0d63d0f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_select_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsSelect_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsSelect_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_eltwiseTensors),
          ::testing::ValuesIn(s_eltwiseDims))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.hpp

index 560728b..ac7d8d8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_select_test.hpp
@@ -116,7 +116,7 @@ protected:
      }
  };
  
-class myriadTestsSelect_nightly: public SelectTest
+class myriadTestsSelect_smoke: public SelectTest
  {
      void SetUp() override {
          SelectTest::SetUp();
@@ -124,7 +124,7 @@ class myriadTestsSelect_nightly: public SelectTest
      }
  };
  
-TEST_P(myriadTestsSelect_nightly, Select)
+TEST_P(myriadTestsSelect_smoke, Select)
  {
      InitBody();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.cpp

index 8d79d80..89d8f40 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.cpp
@@ -5,24 +5,24 @@
  #include "myriad_layers_sigmoid_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsSigmoid_nightly,
+        accuracy, myriadLayersTestsSigmoid_smoke,
          ::testing::ValuesIn(s_sigmoidParams));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithSigmoid_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
          ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithSigmoid_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
          ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithSigmoid_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithSigmoid_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(g_convolutionTensors)
            , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
@@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithSigmoid_nightly,
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayerFullyConnectedWithSigmoid_nightly,
+    accuracy, myriadLayerFullyConnectedWithSigmoid_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_fcTestParamsSubset),
          ::testing::Values(g_dimensionsFC[0]),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.hpp

index dc25cb6..c4ec91e 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_sigmoid_test.hpp
@@ -13,12 +13,12 @@
  
  using namespace InferenceEngine;
  
-class myriadLayersTestsSigmoid_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsSigmoid_smoke: public myriadLayersTests_nightly,
                             public testing::WithParamInterface<InferenceEngine::SizeVector> {
  public:
  };
  
-TEST_P(myriadLayersTestsSigmoid_nightly, TestsSigmoid)
+TEST_P(myriadLayersTestsSigmoid_smoke, TestsSigmoid)
  {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
@@ -52,13 +52,13 @@ static std::vector<InferenceEngine::SizeVector> s_sigmoidParams = {
      {{1, 3, 277, 230}}
  };
  
-class myriadLayersTestsMaxPoolingWithSigmoid_nightly: public PoolingTest<POOLING_MAX>{
+class myriadLayersTestsMaxPoolingWithSigmoid_smoke: public PoolingTest<POOLING_MAX>{
  };
  
-class myriadLayersTestsAvgPoolingWithSigmoid_nightly: public PoolingTest<POOLING_AVG>{
+class myriadLayersTestsAvgPoolingWithSigmoid_smoke: public PoolingTest<POOLING_AVG>{
  };
  
-TEST_P(myriadLayersTestsMaxPoolingWithSigmoid_nightly, TestsMaxPoolingWithSigmoid)
+TEST_P(myriadLayersTestsMaxPoolingWithSigmoid_smoke, TestsMaxPoolingWithSigmoid)
  {
      _testNet.addLayer(LayerInitParams("Sigmoid")
               .in({_output_tensor})
@@ -68,7 +68,7 @@ TEST_P(myriadLayersTestsMaxPoolingWithSigmoid_nightly, TestsMaxPoolingWithSigmoi
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_SIGMOID);
  }
  
-TEST_P(myriadLayersTestsAvgPoolingWithSigmoid_nightly, TestsAvgPoolingWithSigmoid)
+TEST_P(myriadLayersTestsAvgPoolingWithSigmoid_smoke, TestsAvgPoolingWithSigmoid)
  {
      _testNet.addLayer(LayerInitParams("Sigmoid")
               .in({_output_tensor})
@@ -78,10 +78,10 @@ TEST_P(myriadLayersTestsAvgPoolingWithSigmoid_nightly, TestsAvgPoolingWithSigmoi
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_SIGMOID);
  }
  
-class myriadLayerConvolutionWithSigmoid_nightly: public ConvolutionTest<IRVersion>{
+class myriadLayerConvolutionWithSigmoid_smoke: public ConvolutionTest<IRVersion>{
  };
  
-TEST_P(myriadLayerConvolutionWithSigmoid_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithSigmoid_smoke, Convolution) {
      _irVersion = std::get<6>(GetParam());
      _testNet.addLayer(LayerInitParams("Sigmoid")
               .in({_output_tensor})
@@ -97,10 +97,10 @@ TEST_P(myriadLayerConvolutionWithSigmoid_nightly, Convolution) {
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
  }
  
-class myriadLayerFullyConnectedWithSigmoid_nightly: public FCTest<>{
+class myriadLayerFullyConnectedWithSigmoid_smoke: public FCTest<>{
  };
  
-TEST_P(myriadLayerFullyConnectedWithSigmoid_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithSigmoid_smoke, TestsFullyConnected)
  {
      _testNet.addLayer(LayerInitParams("Sigmoid")
               .in({_output_tensor})
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.cpp

index 71eae36..d5a5ee1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_slice_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSlice_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSlice_smoke,
                          ::testing::Values<SliceTestParams>(
                                  MAKE_STRUCT(SliceParams, {4, 8, 16, 32, 64}, {{4, 8, 16, 10, 64}, {4, 8, 16, 22, 64}}, 3),
                                  MAKE_STRUCT(SliceParams, {4, 8, 16, 32}, {{4, 8, 2, 32}, {4, 8, 14, 32}}, 2))
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.hpp

index 6f79476..1a8f209 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_slice_test.hpp
@@ -16,9 +16,9 @@ struct SliceParams {
  
  PRETTY_PARAM(SliceTestParams, SliceParams);
  
-typedef myriadLayerTestBaseWithParam<SliceTestParams> myriadLayersTestsSlice_nightly;
+typedef myriadLayerTestBaseWithParam<SliceTestParams> myriadLayersTestsSlice_smoke;
  
-TEST_P(myriadLayersTestsSlice_nightly, Slice) {
+TEST_P(myriadLayersTestsSlice_smoke, Slice) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      const SliceParams testParams = GetParam();
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.cpp

index 236329a..8e3bd56 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.cpp
@@ -5,7 +5,7 @@
  #include "myriad_layers_softmax_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayersTestsSoftMax_nightly,    
+    accuracy, myriadLayersTestsSoftMax_smoke,    
      ::testing::Combine(
          ::testing::ValuesIn(s_softMaxTensors)
        , ::testing::Values<IRVersion>(IRVersion::v7, IRVersion::v10)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.hpp

index 3f27293..93d8eb7 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_softmax_test.hpp
@@ -18,20 +18,20 @@ void PrintTo(const SoftmaxAxisSizes& p, std::ostream* os) {
      *os << "axis=" << p.axis << ", sizes=" << testing::PrintToString(p.sizes);
  }
  
-using myriadLayersTestsSoftMaxParams_nightly = myriadLayerTestBaseWithParam<std::tuple<SoftmaxAxisSizes, IRVersion>>;
+using myriadLayersTestsSoftMaxParams_smoke = myriadLayerTestBaseWithParam<std::tuple<SoftmaxAxisSizes, IRVersion>>;
  
-class myriadLayersTestsSoftMax_nightly: public myriadLayersTestsSoftMaxParams_nightly {
+class myriadLayersTestsSoftMax_smoke: public myriadLayersTestsSoftMaxParams_smoke {
  protected:
      SoftmaxAxisSizes _testingInput;
  
      void SetUp() override {
-        myriadLayersTestsSoftMaxParams_nightly::SetUp();
+        myriadLayersTestsSoftMaxParams_smoke::SetUp();
          _testingInput = std::get<0>(GetParam());
          _irVersion = std::get<1>(GetParam());
      }
  };
  
-TEST_P(myriadLayersTestsSoftMax_nightly, TestsSoftMax)
+TEST_P(myriadLayersTestsSoftMax_smoke, TestsSoftMax)
  {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      SetInputTensors({_testingInput.sizes});
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.cpp

index 45bdd62..72ba0c1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_split_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSplit_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSplit_smoke,
                          ::testing::Values<SplitTestParams>(
                                  MAKE_STRUCT(SplitParams, {4, 8, 16, 32, 64}, 2, 6),
                                  MAKE_STRUCT(SplitParams, {4, 8, 16, 32}, 2, 6),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.hpp

index bba4236..a4cbe23 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_split_test.hpp
@@ -16,9 +16,9 @@ struct SplitParams {
  
  PRETTY_PARAM(SplitTestParams, SplitParams);
  
-typedef myriadLayerTestBaseWithParam<SplitTestParams> myriadLayersTestsSplit_nightly;
+typedef myriadLayerTestBaseWithParam<SplitTestParams> myriadLayersTestsSplit_smoke;
  
-TEST_P(myriadLayersTestsSplit_nightly, Split) {
+TEST_P(myriadLayersTestsSplit_smoke, Split) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      const SplitParams testParams = GetParam();
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.cpp

index c8c2eee..3ab61df 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_squeeze_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC1,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC1_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_squeezeTensorsTC1),
          ::testing::ValuesIn(s_squeezeIndicesTC1),
@@ -13,7 +13,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC1,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC2,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC2_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_squeezeTensorsTC2),
          ::testing::ValuesIn(s_squeezeIndicesTC2),
@@ -22,7 +22,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC2,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC3,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC3_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_squeezeTensorsTC3),
          ::testing::ValuesIn(s_squeezeIndicesTC3),
@@ -31,7 +31,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC3,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC4,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC4_smoke,
          ::testing::Combine(
          ::testing::ValuesIn(s_squeezeTensorsTC4),
          ::testing::ValuesIn(s_squeezeIndicesTC4),
@@ -40,7 +40,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC4,
      )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC5,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsSqueezeTC5_smoke,
                          ::testing::Combine(
                                  ::testing::ValuesIn(s_squeezeTensorsTC5),
                                  ::testing::ValuesIn(s_squeezeIndicesTC5),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.hpp

index fc510eb..c0d8ef8 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_squeeze_test.hpp
@@ -169,47 +169,47 @@ protected:
      }
  };
  
-class myriadLayersTestsSqueezeTC1 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC1_smoke : public myriadLayersTestsSqueezeBase
  {
  };
  
-class myriadLayersTestsSqueezeTC2 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC2_smoke : public myriadLayersTestsSqueezeBase
  {
  };
  
-class myriadLayersTestsSqueezeTC3 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC3_smoke : public myriadLayersTestsSqueezeBase
  {
  };
  
-class myriadLayersTestsSqueezeTC4 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC4_smoke : public myriadLayersTestsSqueezeBase
  {
  };
  
-class myriadLayersTestsSqueezeTC5 : public myriadLayersTestsSqueezeBase
+class myriadLayersTestsSqueezeTC5_smoke : public myriadLayersTestsSqueezeBase
  {
  };
  
-TEST_P(myriadLayersTestsSqueezeTC1, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC1_smoke, Squeeze) {
      DISABLE_IF(!CheckMyriadX());
      InitBody();
  }
  
-TEST_P(myriadLayersTestsSqueezeTC2, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC2_smoke, Squeeze) {
      DISABLE_IF(!CheckMyriadX());
      InitBody();
  }
  
-TEST_P(myriadLayersTestsSqueezeTC3, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC3_smoke, Squeeze) {
      DISABLE_IF(!CheckMyriadX());
      InitBody();
  }
  
-TEST_P(myriadLayersTestsSqueezeTC4, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC4_smoke, Squeeze) {
      DISABLE_IF(!CheckMyriadX());
      InitBody();
  }
  
-TEST_P(myriadLayersTestsSqueezeTC5, Squeeze) {
+TEST_P(myriadLayersTestsSqueezeTC5_smoke, Squeeze) {
      DISABLE_IF(!CheckMyriadX());
      InitBody();
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.cpp

index 249c589..5be47e6 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.cpp
@@ -5,5 +5,5 @@
  #include "myriad_layers_strided_slice_test.h"
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayersTestsStridedSlice_nightly,
+    accuracy, myriadLayersTestsStridedSlice_smoke,
      ::testing::ValuesIn(s_stridedSliceParams));
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h

index e62dfbf..4f324e9 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h
@@ -25,7 +25,7 @@ struct strided_slice_test_param {
      InferenceEngine::SizeVector out_shape;
  };
  
-class myriadLayersTestsStridedSlice_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsStridedSlice_smoke: public myriadLayersTests_nightly,
                                               public testing::WithParamInterface<strided_slice_test_param> {
  public:
      std::string model_t = R"V0G0N(
@@ -222,7 +222,7 @@ std::string stridesEdge = R"V0G0N(
      }
  };
  
-TEST_P(myriadLayersTestsStridedSlice_nightly, TestsStridedSlice) {
+TEST_P(myriadLayersTestsStridedSlice_smoke, TestsStridedSlice) {
      auto p = ::testing::WithParamInterface<strided_slice_test_param>::GetParam();
  
      std::string model = getModel(p);
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.cpp

index c2129b5..20268ff 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.cpp
@@ -5,10 +5,10 @@
  #include "myriad_layers_tanh_test.hpp"
  
  INSTANTIATE_TEST_CASE_P(
-        accuracy, myriadLayersTestsTanh_nightly,
+        accuracy, myriadLayersTestsTanh_smoke,
          ::testing::ValuesIn(s_tanhParams));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithTanH_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithTanH_smoke,
          ::testing::Combine(
              ::testing::ValuesIn(g_convolutionTensors)
            , ::testing::Values<param_size>(MAKE_STRUCT(param_size, 3, 3))
@@ -20,14 +20,14 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerConvolutionWithTanH_nightly,
            )
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithTanh_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsMaxPoolingWithTanh_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
          ::testing::ValuesIn(g_poolingLayout))
  );
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithTanh_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithTanh_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_poolingInput),
          ::testing::ValuesIn(g_poolingLayerParamsLite),
@@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsAvgPoolingWithTanh_nightly,
  );
  
  INSTANTIATE_TEST_CASE_P(
-    accuracy, myriadLayerFullyConnectedWithTanH_nightly,
+    accuracy, myriadLayerFullyConnectedWithTanH_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(g_fcTestParamsSubset),
          ::testing::Values(g_dimensionsFC[0]),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.hpp

index af95678..a72e3d0 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tanh_test.hpp
@@ -10,11 +10,11 @@
  #define ERROR_BOUND_WITH_TANH (1.0e-3f)
  using namespace InferenceEngine;
  
-class myriadLayersTestsTanh_nightly: public myriadLayersTests_nightly,
+class myriadLayersTestsTanh_smoke: public myriadLayersTests_nightly,
                               public testing::WithParamInterface<SizeVector> {
  };
  
-TEST_P(myriadLayersTestsTanh_nightly, TestsTanh)
+TEST_P(myriadLayersTestsTanh_smoke, TestsTanh)
  {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
      auto p = ::testing::WithParamInterface<SizeVector>::GetParam();
@@ -71,10 +71,10 @@ static std::vector<fcon_test_params> s_fcTestParamsSubset = {
      {{1, 16, 8, 8},    8, 0.065f}
  };
  
-class myriadLayerConvolutionWithTanH_nightly: public ConvolutionTest<IRVersion>{
+class myriadLayerConvolutionWithTanH_smoke: public ConvolutionTest<IRVersion>{
  };
  
-TEST_P(myriadLayerConvolutionWithTanH_nightly, Convolution) {
+TEST_P(myriadLayerConvolutionWithTanH_smoke, Convolution) {
      auto param = GetParam();
      _irVersion = std::get<6>(param);
  
@@ -92,13 +92,13 @@ TEST_P(myriadLayerConvolutionWithTanH_nightly, Convolution) {
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), maxerr);
  }
  
-class myriadLayersTestsMaxPoolingWithTanh_nightly: public PoolingTest<POOLING_MAX>{
+class myriadLayersTestsMaxPoolingWithTanh_smoke: public PoolingTest<POOLING_MAX>{
  };
  
-class myriadLayersTestsAvgPoolingWithTanh_nightly: public PoolingTest<POOLING_AVG>{
+class myriadLayersTestsAvgPoolingWithTanh_smoke: public PoolingTest<POOLING_AVG>{
  };
  
-TEST_P(myriadLayersTestsMaxPoolingWithTanh_nightly, TestsMaxPoolingWithTanh)
+TEST_P(myriadLayersTestsMaxPoolingWithTanh_smoke, TestsMaxPoolingWithTanh)
  {
      _testNet.addLayer(LayerInitParams("TanH")
               .in({_output_tensor})
@@ -108,7 +108,7 @@ TEST_P(myriadLayersTestsMaxPoolingWithTanh_nightly, TestsMaxPoolingWithTanh)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_TANH);
  }
  
-TEST_P(myriadLayersTestsAvgPoolingWithTanh_nightly, TestsAvgPoolingWithTanh)
+TEST_P(myriadLayersTestsAvgPoolingWithTanh_smoke, TestsAvgPoolingWithTanh)
  {
      _testNet.addLayer(LayerInitParams("TanH")
               .in({_output_tensor})
@@ -118,10 +118,10 @@ TEST_P(myriadLayersTestsAvgPoolingWithTanh_nightly, TestsAvgPoolingWithTanh)
      CompareCommonAbsolute(_outputMap.begin()->second, getReferenceOutput(), ERROR_BOUND_WITH_TANH);
  }
  
-class myriadLayerFullyConnectedWithTanH_nightly: public FCTest<>{
+class myriadLayerFullyConnectedWithTanH_smoke: public FCTest<>{
  };
  
-TEST_P(myriadLayerFullyConnectedWithTanH_nightly, TestsFullyConnected)
+TEST_P(myriadLayerFullyConnectedWithTanH_smoke, TestsFullyConnected)
  {
      _testNet.addLayer(LayerInitParams("TanH")
               .in({_output_tensor})
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tensor_iterator_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tensor_iterator_test.cpp

deleted file mode 100644 (file)

index e18562d..0000000
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tensor_iterator_test.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (C) 2019 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "vpu_layers_tests.hpp"
-#include "vpu_case_params.hpp"
-#include "common/include/vpu/utils/error.hpp"
-
-#include "single_layer_common.hpp"
-
-#include "gtest/gtest.h"
-
-#include <string>
-#include <ngraph_functions/subgraph_builders.hpp>
-#include <common_test_utils/test_common.hpp>
-#include <functional_test_utils/blob_utils.hpp>
-#include <vpu_case_common.hpp>
-
-namespace {
-
-class MyriadLayersTestsTensorIterator : public CommonTestUtils::TestsCommon {
-public:
-    void SetUp() override {
-        fn_ptr = ngraph::builder::subgraph::makeTIwithLSTMcell();
-    }
-protected:
-    std::shared_ptr<ngraph::Function> fn_ptr;
-};
-
-// TODO: Issue: 29485
-TEST_F(MyriadLayersTestsTensorIterator, CompareNativeVersionWithUnrolledLoop) {
-    DISABLE_IF(!CheckMyriadX () && !CheckMA2085());
-    CNNNetwork network(fn_ptr);
-    network.getInputsInfo().begin()->second->setPrecision(Precision::FP16);
-
-
-    auto ie = PluginCache::get().ie();
-
-    ExecutableNetwork exeNetworkWithConfig = ie->LoadNetwork(network, CommonTestUtils::DEVICE_MYRIAD,
-                                                             {{VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR), CONFIG_VALUE(NO)},
-                                                              {VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING), CONFIG_VALUE(YES)}});
-    InferRequest inferRequestWithConfig = exeNetworkWithConfig.CreateInferRequest();
-    auto blobWithConfig = FuncTestUtils::createAndFillBlob(network.getInputsInfo().begin()->second->getTensorDesc());
-    inferRequestWithConfig.SetBlob(network.getInputsInfo().begin()->first, blobWithConfig);
-    inferRequestWithConfig.Infer();
-    auto* outRawDataWithConfig = inferRequestWithConfig.GetBlob(network.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
-    ExecutableNetwork exeNetworkWithoutConfig = ie->LoadNetwork(network, CommonTestUtils::DEVICE_MYRIAD,
-                                                                {{VPU_CONFIG_KEY(FORCE_PURE_TENSOR_ITERATOR), CONFIG_VALUE(YES)},
-                                                                 {VPU_CONFIG_KEY(ENABLE_TENSOR_ITERATOR_UNROLLING), CONFIG_VALUE(NO)}});
-    InferRequest inferRequestWithoutConfig = exeNetworkWithoutConfig.CreateInferRequest();
-    auto blobWithoutConfig = FuncTestUtils::createAndFillBlob(network.getInputsInfo().begin()->second->getTensorDesc());
-    inferRequestWithoutConfig.SetBlob(network.getInputsInfo().begin()->first, blobWithoutConfig);
-    inferRequestWithoutConfig.Infer();
-    auto* outRawDataWithoutConfig = inferRequestWithoutConfig.GetBlob(network.getOutputsInfo().begin()->first)->cbuffer().as<float*>();
-
-    auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP16);
-    size_t outElementsCount = std::accumulate(begin(fn_ptr->get_output_shape(0)), end(fn_ptr->get_output_shape(0)), 1,
-                                              std::multiplies<size_t>());
-
-    FuncTestUtils::compareRawBuffers(outRawDataWithoutConfig, outRawDataWithConfig, outElementsCount,
-                                     outElementsCount,
-                                     thr);
-
-}
-}
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.cpp

index bf23c58..e4eb5bc 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_tile_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracyAdd, myriadLayerTestTile_nightly,
+INSTANTIATE_TEST_CASE_P(accuracyAdd, myriadLayerTestTile_smoke,
                          ::testing::Combine(
                                  ::testing::Values<test_params>(
                                          MAKE_STRUCT(tile_test::nd_tensor_test_params, {4, 5, 6}, 0)
@@ -21,7 +21,7 @@ INSTANTIATE_TEST_CASE_P(accuracyAdd, myriadLayerTestTile_nightly,
                                  , ::testing::Values<tiles>(2, 3, 5)
                          ));
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestTile_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayerTestTile_smoke,
                          ::testing::Combine(
                                  ::testing::Values<test_params>(
                                          MAKE_STRUCT(tile_test::nd_tensor_test_params, {4, 5, 6}, 1)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.hpp

index 1d20b1a..8424212 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_tile_test.hpp
@@ -106,9 +106,9 @@ void ref_tile(const InferenceEngine::Blob::Ptr src,
      }
  }
  
-typedef myriadLayerTestBaseWithParam<tuple<test_params, tiles>> myriadLayerTestTile_nightly;
+typedef myriadLayerTestBaseWithParam<tuple<test_params, tiles>> myriadLayerTestTile_smoke;
  
-TEST_P(myriadLayerTestTile_nightly, Tile) {
+TEST_P(myriadLayerTestTile_smoke, Tile) {
      _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
  
      tile_test::nd_tensor_test_params input_dims = get<0>(GetParam());
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.cpp

index e31adc8..6f5f795 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.cpp
@@ -32,7 +32,7 @@ static const std::vector<std::string> s_sorts_list =
  //    "none", // currently is not supported by firmware
  };
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsTopK_nightly,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadTestsTopK_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_geometries_list),
          ::testing::ValuesIn(s_modes_list),
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.hpp

index 53baba2..6b58906 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_topk_test.hpp
@@ -32,17 +32,6 @@ static const Precision indexPrecision = Precision::I32;
  class TopKTest: public myriadLayerTestBaseWithParam<TopKTestParams>
  {
  protected:
-    std::set<std::string> getExecutedStagesTypes() const {
-        std::set<std::string> result;
-        std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
-        _inferRequest->GetPerformanceCounts(perfMap, nullptr);
-
-        for (const auto& perf : perfMap)
-            result.emplace(perf.second.exec_type);
-
-        return result;
-    }
-
      void testTopK(const IRVersion irVersion, const bool outputValues, const bool outputIndices) {
          _config[VPU_CONFIG_KEY(DETECT_NETWORK_BATCH)] = CONFIG_VALUE(NO);
          _config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
@@ -105,16 +94,6 @@ protected:
          ASSERT_NO_THROW(st = _inferRequest->Infer(&_resp));
          ASSERT_EQ(StatusCode::OK, st) << _resp.msg;
  
-        const auto executedTypes = getExecutedStagesTypes();
-
-        // This logic must be synchronized with TopKStage class.
-        const bool useArgMaxOptimization = (!outputValues || !outputIndices)
-                && mode == "max"
-                && ((sort == "value" && outputValues) || (sort == "index" && outputIndices));
-
-        ASSERT_EQ(executedTypes.count("ArgMax"), useArgMaxOptimization);
-        ASSERT_EQ(executedTypes.count("TopK"), !useArgMaxOptimization);
-
          Blob::Ptr outputValuesBlob, outputIndicesBlob;
          if (outputValues) {
              ASSERT_NO_THROW(st = _inferRequest->GetBlob("topk.0", outputValuesBlob, &_resp));
@@ -326,26 +305,26 @@ protected:
      }
  };
  
-class myriadTestsTopK_nightly: public TopKTest
+class myriadTestsTopK_smoke: public TopKTest
  {
  };
  
-TEST_P(myriadTestsTopK_nightly, TopKv7)
+TEST_P(myriadTestsTopK_smoke, TopKv7)
  {
      testTopK(IRVersion::v7, true, true);
  }
  
-TEST_P(myriadTestsTopK_nightly, TopKv10_All)
+TEST_P(myriadTestsTopK_smoke, TopKv10_All)
  {
      testTopK(IRVersion::v10, true, true);
  }
  
-TEST_P(myriadTestsTopK_nightly, TopKv10_ArgMaxValues)
+TEST_P(myriadTestsTopK_smoke, TopKv10_ArgMaxValues)
  {
      testTopK(IRVersion::v10, true, false);
  }
  
-TEST_P(myriadTestsTopK_nightly, TopKv10_ArgMaxIndices)
+TEST_P(myriadTestsTopK_smoke, TopKv10_ArgMaxIndices)
  {
      testTopK(IRVersion::v10, false, true);
  }
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.cpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.cpp

index 3b254e9..78bfc18 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.cpp
@@ -4,7 +4,7 @@
  
  #include "myriad_layers_unsqueeze_test.hpp"
  
-INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsUnsqueeze,
+INSTANTIATE_TEST_CASE_P(accuracy, myriadLayersTestsUnsqueeze_smoke,
      ::testing::Combine(
          ::testing::ValuesIn(s_squeezeTensors),
          ::testing::ValuesIn(s_squeezeIndices)
diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.hpp b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.hpp

index 2b4ae6d..71604b1 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_unsqueeze_test.hpp
@@ -12,7 +12,7 @@
  using namespace InferenceEngine;
  
  typedef std::vector<int32_t> IndicesVector;
-typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, IndicesVector>> myriadLayersTestsUnsqueeze;
+typedef myriadLayerTestBaseWithParam<std::tuple<InferenceEngine::SizeVector, IndicesVector>> myriadLayersTestsUnsqueeze_smoke;
  
  static void ref_unsqueeze(const InferenceEngine::Blob::Ptr src,
                          InferenceEngine::Blob::Ptr dst) {
@@ -44,7 +44,7 @@ static void ref_unsqueeze(const InferenceEngine::Blob::Ptr src,
      ie_memcpy(dst_data, dst_size * sizeof(ie_fp16), src_data, src_size * sizeof(ie_fp16));
  }
  
-TEST_P (myriadLayersTestsUnsqueeze, Unsqueeze){
+TEST_P(myriadLayersTestsUnsqueeze_smoke, Unsqueeze){
      auto input = std::get<0>(GetParam());
      auto indices = std::get<1>(GetParam());
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp

index 27df402..1aced5f 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp
@@ -12,6 +12,7 @@
  #include "pool_ref.hpp"
  #include "ie_memcpy.h"
  #include <single_layer_common.hpp>
+#include <vpu/model/data_desc.hpp>
  #include "common_test_utils/common_layers_params.hpp"
  #include "vpu/utils/error.hpp"
  
@@ -1034,7 +1035,6 @@ namespace reduceImpl
              for (int i = 1; i < ndims; ++i)
                  offset = offset * dims[i] + indices[i];
          }
-
          return data[offset];
      }
  
@@ -1182,55 +1182,65 @@ namespace reduceImpl
      }
  }
  
-template<>
-void ref_reduce<ie_fp16>(const Blob::Ptr& in,
-                         const Blob::Ptr& axes,
-                         Blob::Ptr& out,
-                         int keep_dims,
-                         IReduceKernel<ie_fp16>* op)
+template void ref_reduce(const Blob::Ptr& in,
+                        const Blob::Ptr& axes,
+                        Blob::Ptr& out,
+                        int keep_dims,
+                        vpu::LayoutPreference layoutPreference,
+                        IReduceKernel<ie_fp16>* op);
+
+template void ref_reduce(const Blob::Ptr& in,
+                        const Blob::Ptr& axes,
+                        Blob::Ptr& out,
+                        int keep_dims,
+                        vpu::LayoutPreference layoutPreference,
+                        IReduceKernel<int32_t>* op);
+
+template<typename DataType>
+void ref_reduce(const Blob::Ptr& in,
+                const Blob::Ptr& axes,
+                Blob::Ptr& out,
+                int keep_dims,
+                vpu::LayoutPreference layoutPreference,
+                IReduceKernel<DataType>* op)
  {
      ASSERT_NE(in, nullptr);
      ASSERT_NE(axes, nullptr);
      ASSERT_NE(out, nullptr);
  
-    const int16_t* inData = in->cbuffer().as<const int16_t*>();
-    int16_t* outData = out->buffer().as<int16_t*>();
-
-    ASSERT_NE(inData, nullptr);
-    ASSERT_NE(outData, nullptr);
-
      const auto axesDims = axes->getTensorDesc().getDims();
      ASSERT_EQ(axesDims.size(), 1);
  
      const auto axesSize = axesDims[0];
-    const int32_t* axesData = axes->cbuffer().as<const int32_t*>();
-    ASSERT_TRUE(!(axesSize > 0) || (axesData != nullptr));
+    int32_t* axesData = axes->cbuffer().as<int32_t*>();
  
-    reduceImpl::refReduce(in, out, axesSize, axesData, keep_dims, op);
-}
+    if (layoutPreference == vpu::LayoutPreference::ChannelMinor) {
+        auto inDims = in->getTensorDesc().getDims();
+        const auto ndims = inDims.size();
+        auto newDims = inDims;
  
-template<>
-void ref_reduce<int32_t>(const Blob::Ptr& in,
-                         const Blob::Ptr& axes,
-                         Blob::Ptr& out,
-                         int keep_dims,
-                         IReduceKernel<int32_t>* op)
-{
-    ASSERT_NE(in, nullptr);
-    ASSERT_NE(axes, nullptr);
-    ASSERT_NE(out, nullptr);
+        const auto dimsOrder = vpu::DimsOrder::fromLayout(in->getTensorDesc().getLayout());
+        const auto defPerm = vpu::DimsOrder::fromNumDims(ndims).toPermutation();
  
-    const int32_t* inData = in->cbuffer().as<const int32_t*>();
-    int32_t* outData = out->buffer().as<int32_t*>();
+        for (int i = 0; i < ndims; ++i) {
+            auto newInd = ndims - 1 - dimsOrder.dimInd(defPerm[ndims - i - 1]);
+            newDims[newInd] = inDims[i];
+        }
  
-    ASSERT_NE(inData, nullptr);
-    ASSERT_NE(outData, nullptr);
+        in->getTensorDesc().setDims(newDims);
  
-    const auto axesDims = axes->getTensorDesc().getDims();
-    ASSERT_EQ(axesDims.size(), 1);
+        for (int i = 0; i < axesSize; ++i) {
+            axesData[i] = ndims - 1 - dimsOrder.dimInd(defPerm[ndims - axesData[i] - 1]);
+            newDims[axesData[i]] = keep_dims ? 1 : 0;
+        }
+
+        if (!keep_dims) {
+            newDims.erase(std::remove(newDims.begin(), newDims.end(), 0), newDims.end());
+        }
+
+        out->getTensorDesc().setDims(newDims);
+    }
  
-    const auto axesSize = axesDims[0];
-    const int32_t* axesData = axes->cbuffer().as<const int32_t*>();
      ASSERT_TRUE(!(axesSize > 0) || (axesData != nullptr));
  
      reduceImpl::refReduce(in, out, axesSize, axesData, keep_dims, op);
diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.hpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.hpp

index edbbe17..6c8a746 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.hpp
@@ -9,6 +9,9 @@
  
  #include <iomanip> // std::setw
  
+#include <vpu/utils/ie_helpers.hpp>
+#include <graph_transformer/include/vpu/model/data_desc.hpp>
+
  typedef std::map<std::string, std::string> ParamsStruct;
  
  typedef float (*eltwise_kernel)(float a, float b, float c);
@@ -273,6 +276,7 @@ void ref_reduce(const InferenceEngine::Blob::Ptr& src,
                  const InferenceEngine::Blob::Ptr& axes,
                  InferenceEngine::Blob::Ptr& dst,
                  int keep_dims,
+                vpu::LayoutPreference layoutPreference,
                  IReduceKernel<DataType>* op);
  
  void ref_topk(const InferenceEngine::Blob::Ptr& srcValues,
diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.cpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.cpp

index d589097..81196de 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.cpp
@@ -92,6 +92,15 @@ void vpuLayersTests::dumpPerformance() {
      }
  }
  
+bool vpuLayersTests::wasCustomLayerInferred() const {
+     auto perfMap = std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>{};
+    _inferRequest->GetPerformanceCounts(perfMap, nullptr);
+    const auto isCustomLayer = [&](const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>& info) {
+        return !strcmp(info.second.exec_type, "Custom");
+    };
+    return std::any_of(begin(perfMap), end(perfMap), isCustomLayer);
+}
+
  namespace {
  
  template<class TensorDescriptor>
@@ -240,7 +249,11 @@ bool vpuLayersTests::Infer() {
          return false;
      const auto st = _inferRequest->Infer(&_resp);
      EXPECT_EQ(InferenceEngine::StatusCode::OK, st) << _resp.msg;
-    //dumpPerformance();
+//    dumpPerformance();
+    if (!_config[VPU_CONFIG_KEY(CUSTOM_LAYERS)].empty()) {
+        EXPECT_TRUE(wasCustomLayerInferred())
+            << "CustomBindings.xml has been provided but Custom layer was not inferred";
+    }
      return true;
  }
  
diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.hpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.hpp

index e67d3ae..2f4c8d4 100644 (file)
--- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/vpu_layers_tests.hpp
@@ -69,6 +69,7 @@ protected:
      void TearDown() override;
      bool CheckMyriadX();
      void dumpPerformance();
+    bool wasCustomLayerInferred() const;
  
      // For historical reasons, gen-blob functions use to 'hack' blob layout:
      // replace NCHW with NHWC even if you explicitly setup layout preference
diff --git a/inference-engine/tests_deprecated/helpers/CMakeLists.txt b/inference-engine/tests_deprecated/helpers/CMakeLists.txt

index 281f283..1777ac9 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/helpers/CMakeLists.txt
@@ -37,7 +37,14 @@ function(add_helpers target_name)
  endfunction()
  
  add_helpers(${TARGET_NAME})
+
  target_link_libraries(${TARGET_NAME} PUBLIC commonTestUtils)
  
  add_helpers(${TARGET_NAME}_s USE_STATIC_IE)
+
  target_link_libraries(${TARGET_NAME}_s PUBLIC commonTestUtils_s)
+
+if (ENABLE_DATA)
+    add_dependencies(${TARGET_NAME} data)
+    add_dependencies(${TARGET_NAME}_s data)
+endif()
diff --git a/inference-engine/tests_deprecated/helpers/single_layer_common.hpp b/inference-engine/tests_deprecated/helpers/single_layer_common.hpp

index 622af44..7b23da4 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/single_layer_common.hpp
+++ b/inference-engine/tests_deprecated/helpers/single_layer_common.hpp
@@ -5,6 +5,7 @@
  #pragma once
  
  #include <ie_blob.h>
+#include <ie_core.hpp>
  #include <ie_layers_property.hpp>
  #include <precision_utils.h>
  #include <common_test_utils/xml_net_builder/xml_net_builder.hpp>
@@ -75,9 +76,8 @@ struct MapStrStr {
  };
  
  template<int Version = 3>
-inline InferenceEngine::details::CNNNetworkImplPtr
-buildSingleLayerNetworkCommon(InferenceEngine::details::IFormatParser *parser,
-                              const std::string &layerType,
+inline InferenceEngine::CNNNetwork
+buildSingleLayerNetworkCommon(const std::string &layerType,
                                const CommonTestUtils::InOutShapes &inOutShapes,
                                std::map<std::string, std::string> *params,
                                const std::string &layerDataName = "data",
@@ -85,8 +85,6 @@ buildSingleLayerNetworkCommon(InferenceEngine::details::IFormatParser *parser,
                                size_t weightsSize = 0,
                                size_t biasesSize = 0,
                                const InferenceEngine::TBlob<uint8_t>::Ptr &weights = nullptr) {
-    IE_ASSERT(parser);
-    testing::XMLHelper xmlHelper(parser);
      std::string precisionStr = precision.name();
      auto netBuilder = CommonTestUtils::XmlNetBuilder<Version>::buildNetworkWithOneInput("Mock", inOutShapes.inDims[0],
                                                                                  precisionStr);
@@ -105,10 +103,9 @@ buildSingleLayerNetworkCommon(InferenceEngine::details::IFormatParser *parser,
      } else {
          testContent = netBuilder.finish();
      }
-    xmlHelper.loadContent(testContent);
-    auto result = xmlHelper.parseWithReturningNetwork();
-    if (weights) xmlHelper.setWeights(weights);
-    return result;
+
+    InferenceEngine::Core ie;
+    return ie.ReadNetwork(testContent, weights);
  }
  
  void GenRandomDataCommon(InferenceEngine::Blob::Ptr blob);
diff --git a/inference-engine/tests_deprecated/helpers/tests_utils.hpp b/inference-engine/tests_deprecated/helpers/tests_utils.hpp

deleted file mode 100644 (file)

index eed7e14..0000000
--- a/inference-engine/tests_deprecated/helpers/tests_utils.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
-* \brief TODO: short file description
-* \file file_utils.h
-*/
-#pragma once
-
-#include <string>
-#include <gtest/gtest.h>
-
-namespace {
-    bool strContains(const std::string & str, const std::string & substr) {
-        return str.find(substr) != std::string::npos;
-    }
-    bool strDoesnotContain(const std::string & str, const std::string & substr) {
-        (void)strDoesnotContain;  // to overcome unused warning
-        return !strContains(str, substr);
-    }
-}
-
-#define ASSERT_STR_CONTAINS(str, substr) ASSERT_PRED2(&strContains, str, substr)
-#define ASSERT_STR_DOES_NOT_CONTAIN(str, substr) ASSERT_PRED2 (&strDoesnotContain, str, substr)
-#define EXPECT_STR_CONTAINS(str, substr) EXPECT_PRED2(&strContains, str, substr)
-\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp b/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp

index 2012c8b..ae2ee01 100644 (file)
--- a/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp
+++ b/inference-engine/tests_deprecated/mock_engine/mock_plugin.cpp
@@ -37,6 +37,7 @@ void MockPlugin::SetLogCallback(InferenceEngine::IErrorListener &listener) noexc
  }
  
  void MockPlugin::GetVersion(const Version *&versionInfo) noexcept {
+    versionInfo = &version;
  }
  
  StatusCode MockPlugin::AddExtension(IExtensionPtr extension, InferenceEngine::ResponseDesc *resp) noexcept {
diff --git a/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp b/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp

index c6e38f6..abf7934 100644 (file)
--- a/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp
+++ b/inference-engine/tests_deprecated/mock_engine/mock_plugin.hpp
@@ -14,6 +14,7 @@
  IE_SUPPRESS_DEPRECATED_START
  class MockPlugin : public InferenceEngine::IInferencePlugin {
      InferenceEngine::IInferencePlugin * _target = nullptr;
+    InferenceEngine::Version version;
  
  public:
      explicit MockPlugin(InferenceEngine::IInferencePlugin*target);
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt

index dac4a97..0f6a700 100644 (file)
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -167,7 +167,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
  endif()
  
-add_test(NAME ${TARGET_NAME}
-        COMMAND ${TARGET_NAME})
+add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME})
+set_property(TEST ${TARGET_NAME} PROPERTY LABELS IE)
  
  add_dependencies(${TARGET_NAME} mock_engine)
diff --git a/inference-engine/tests_deprecated/unit/cnn_network/cnn_net_reader_impl_test.cpp b/inference-engine/tests_deprecated/unit/cnn_network/cnn_net_reader_impl_test.cpp

index 1ffba47..7c940a6 100644 (file)
--- a/inference-engine/tests_deprecated/unit/cnn_network/cnn_net_reader_impl_test.cpp
+++ b/inference-engine/tests_deprecated/unit/cnn_network/cnn_net_reader_impl_test.cpp
@@ -3,7 +3,6 @@
  //
  
  #include <gtest/gtest.h>
-#include <parsers.h>
  #include <ie_cnn_net_reader_impl.h>
  #include <gmock/gmock-more-actions.h>
  #include "cnn_network_impl.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_copy_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_copy_tests.cpp

index eeccb27..6fa314b 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_copy_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_copy_tests.cpp
@@ -89,6 +89,6 @@ TEST_F(VPU_EliminateCopyTest, OneInputTwoConcats) {
      ASSERT_EQ(hwOutput->parentData(), outputCopy1);
  
      ASSERT_EQ(hwOutput->numConsumers(), 2);
-    ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::Concat; }));
+    ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::StubConcat; }));
      ASSERT_TRUE(contains(hwOutput->consumers(), [](const Stage& stage) { return stage->type() == StageType::Copy; }));
  }
diff --git a/inference-engine/tests_deprecated/unit/graph_tools/graph_copy_tests.cpp b/inference-engine/tests_deprecated/unit/graph_tools/graph_copy_tests.cpp

index 21370f4..0541458 100644 (file)
--- a/inference-engine/tests_deprecated/unit/graph_tools/graph_copy_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/graph_tools/graph_copy_tests.cpp
@@ -4,7 +4,7 @@
  
  #include <gtest/gtest.h>
  #include <graph_tools.hpp>
-#include "test_assertions.hpp"
+#include <common_test_utils/test_assertions.hpp>
  #include <unordered_set>
  #include <gmock/gmock-generated-function-mockers.h>
  #include <gmock/gmock-generated-matchers.h>
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_network_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_network_test.cpp

index f9ed79c..a3efb89 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_network_test.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_network_test.cpp
@@ -7,19 +7,69 @@
  
  using namespace InferenceEngine;
  
-class CNNNetworkTests : public ::testing::Test {
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-    }
-
-public:
-
-};
+using CNNNetworkTests = ::testing::Test;
  
  TEST_F(CNNNetworkTests, throwsOnInitWithNull) {
      std::shared_ptr<ICNNNetwork> nlptr = nullptr;
      ASSERT_THROW(CNNNetwork network(nlptr), InferenceEngine::details::InferenceEngineException);
  }
+
+TEST_F(CNNNetworkTests, throwsOnInitWithNullNgraph) {
+    std::shared_ptr<const ngraph::Function> nlptr = nullptr;
+    ASSERT_THROW(CNNNetwork network(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetPrecision) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getPrecision(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetOutputsInfo) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getOutputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetInputsInfo) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getInputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedLayerCount) {
+    CNNNetwork network;
+    ASSERT_THROW(network.layerCount(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetName) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getName(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedCastToICNNNetwork) {
+    CNNNetwork network;
+    ASSERT_THROW(auto & net = static_cast<ICNNNetwork&>(network), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedCastToICNNNetwork) {
+    const CNNNetwork network;
+    ASSERT_THROW(const auto & net = static_cast<const ICNNNetwork&>(network), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnUninitializedGetFunction) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedGetFunction) {
+    const CNNNetwork network;
+    ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedBegin) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getFunction(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CNNNetworkTests, throwsOnConstUninitializedGetInputShapes) {
+    CNNNetwork network;
+    ASSERT_THROW(network.getInputShapes(), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp

index d26ba87..311cc34 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp
@@ -291,3 +291,76 @@ TEST_F(InferRequestTests, failToSetInputWithEmptyDimensions) {
      auto exceptionMessage = getExceptionMessage([&]() { InferRequest->SetInput(blobMap); });
      ASSERT_EQ(_inputDataIsEmptyError, exceptionMessage.substr(0, _inputDataIsEmptyError.size()));
  }
+
+using InferRequestCPPTests = ::testing::Test;
+
+TEST_F(InferRequestCPPTests, throwsOnInitWithNull) {
+    IInferRequest::Ptr nlptr = nullptr;
+    ASSERT_THROW(InferRequest req(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBlob) {
+    InferRequest req;
+    ASSERT_THROW(req.SetBlob({}, {}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetBlob) {
+    InferRequest req;
+    ASSERT_THROW(req.GetBlob({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBlobPreproc) {
+    InferRequest req;
+    ASSERT_THROW(req.SetBlob({}, {}, {}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetPreProcess) {
+    InferRequest req;
+    ASSERT_THROW(req.GetPreProcess({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedInfer) {
+    InferRequest req;
+    ASSERT_THROW(req.Infer(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedGetPerformanceCounts) {
+    InferRequest req;
+    ASSERT_THROW(req.GetPerformanceCounts(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetInput) {
+    InferRequest req;
+    ASSERT_THROW(req.SetInput({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetOutput) {
+    InferRequest req;
+    ASSERT_THROW(req.SetOutput({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetBatch) {
+    InferRequest req;
+    ASSERT_THROW(req.SetBatch({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedStartAsync) {
+    InferRequest req;
+    ASSERT_THROW(req.StartAsync(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedWait) {
+    InferRequest req;
+    ASSERT_THROW(req.Wait({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedSetCompletionCallback) {
+    InferRequest req;
+    std::function<void(InferRequest, StatusCode)> f;
+    ASSERT_THROW(req.SetCompletionCallback(f), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(InferRequestCPPTests, throwsOnUninitializedCast) {
+    InferRequest req;
+    ASSERT_THROW(auto & ireq = static_cast<IInferRequest::Ptr&>(req), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp

index 35b6093..10c97f8 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp
@@ -66,3 +66,65 @@ TEST_F(ExecutableNetworkBaseTests, canCatchUnknownErrorInExport) {
      EXPECT_CALL(*mock_impl.get(), Export(_)).WillOnce(Throw(5));
      ASSERT_EQ(UNEXPECTED, exeNetwork->Export({}, nullptr));
  }
+
+using ExecutableNetworkTests = ::testing::Test;
+
+TEST_F(ExecutableNetworkTests, throwsOnInitWithNull) {
+    std::shared_ptr<IExecutableNetwork> nlptr = nullptr;
+    ASSERT_THROW(ExecutableNetwork exec(nlptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetOutputsInfo) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetOutputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetInputsInfo) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetInputsInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedExport) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.Export(std::string()), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedExportStream) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.Export(std::cout), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, nothrowsOnUninitializedCast) {
+    ExecutableNetwork exec;
+    ASSERT_NO_THROW(auto & enet = static_cast<IExecutableNetwork::Ptr&>(exec));
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetExecGraphInfo) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetExecGraphInfo(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedQueryState) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.QueryState(), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedSetConfig) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.SetConfig({{}}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetConfig) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetConfig({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetMetric) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetMetric({}), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ExecutableNetworkTests, throwsOnUninitializedGetContext) {
+    ExecutableNetwork exec;
+    ASSERT_THROW(exec.GetContext(), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp

index de0694f..43981f7 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
@@ -61,7 +61,7 @@ protected:
          mockNotEmptyNet.getOutputsInfo(outputsInfo);
          mockInferRequestInternal = make_shared<MockInferRequestInternal>(inputsInfo, outputsInfo);
          mockExeNetworkTS = make_shared<MockExecutableNetworkThreadSafe>();
-        EXPECT_CALL(*mock_plugin_impl.get(), LoadExeNetworkImpl(_, _, _)).WillOnce(Return(mockExeNetworkTS));
+        EXPECT_CALL(*mock_plugin_impl.get(), LoadExeNetworkImpl(_, _)).WillOnce(Return(mockExeNetworkTS));
          EXPECT_CALL(*mockExeNetworkTS.get(), CreateInferRequestImpl(_, _)).WillOnce(Return(mockInferRequestInternal));
          sts = plugin->LoadNetwork(exeNetwork, mockNotEmptyNet, {}, &dsc);
          ASSERT_EQ((int) StatusCode::OK, sts) << dsc.msg;
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp

index 9e45783..a63acb4 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
@@ -5,6 +5,7 @@
  #include <gtest/gtest.h>
  #include <gmock/gmock-spec-builders.h>
  #include <ie_version.hpp>
+#include <cpp/ie_plugin_cpp.hpp>
  #include "cpp_interfaces/base/ie_plugin_base.hpp"
  #include "unit_test_utils/mocks/cpp_interfaces/mock_plugin_impl.hpp"
  
@@ -84,3 +85,42 @@ TEST_F(PluginBaseTests, canCatchUnknownErrorInSetConfig) {
      const std::map <std::string, std::string> config;
      ASSERT_EQ(UNEXPECTED, plugin->SetConfig(config, nullptr));
  }
+
+using InferencePluginTests = testing::Test;
+
+TEST_F(InferencePluginTests, throwsOnNullptrCreation) {
+    InferenceEnginePluginPtr nulptr;
+    InferencePlugin plugin;
+    ASSERT_THROW(plugin = InferencePlugin(nulptr), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedGetVersion) {
+    InferencePlugin plg;
+    ASSERT_THROW(plg.GetVersion(), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedLoadNetwork) {
+    InferencePlugin plg;
+    QueryNetworkResult r;
+    ASSERT_THROW(plg.LoadNetwork(CNNNetwork(), {}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedImportNetwork) {
+    InferencePlugin plg;
+    ASSERT_THROW(plg.ImportNetwork({}, {}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedAddExtension) {
+    InferencePlugin plg;
+    ASSERT_THROW(plg.AddExtension(IExtensionPtr()), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, throwsOnUninitializedSetConfig) {
+    InferencePlugin plg;
+    ASSERT_THROW(plg.SetConfig({{}}), details::InferenceEngineException);
+}
+
+TEST_F(InferencePluginTests, nothrowsUninitializedCast) {
+    InferencePlugin plg;
+    ASSERT_NO_THROW(auto plgPtr = static_cast<InferenceEnginePluginPtr>(plg));
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/locked_memory_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/locked_memory_test.cpp

index 31cd978..6eccab9 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/locked_memory_test.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/locked_memory_test.cpp
@@ -5,23 +5,13 @@
  #include "tests_common.hpp"
  #include "unit_test_utils/mocks/mock_allocator.hpp"
  
-using namespace std;
-
-class LockedMemoryTest : public TestsCommon {
-protected:
-    unique_ptr<MockAllocator> createMockAllocator() {
-        return unique_ptr<MockAllocator>(new MockAllocator());
-    }
-};
-
  using namespace InferenceEngine;
  using namespace ::testing;
  
+using LockedMemoryTest = testing::Test;
  
  TEST_F(LockedMemoryTest, canUnlockMemoryAfterUsage) {
-
-    auto allocator = createMockAllocator();
-
+    std::unique_ptr<MockAllocator> allocator(new MockAllocator());
      char array [] = {1,2,3};
  
      EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
@@ -34,11 +24,8 @@ TEST_F(LockedMemoryTest, canUnlockMemoryAfterUsage) {
      }
  }
  
-
  TEST_F(LockedMemoryTest, canReadFromLockedMemory) {
-
-    auto allocator = createMockAllocator();
-
+    std::unique_ptr<MockAllocator> allocator(new MockAllocator());
      char array [] = {1,2,3,4,5};
  
      EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
@@ -50,11 +37,8 @@ TEST_F(LockedMemoryTest, canReadFromLockedMemory) {
      }
  }
  
-
  TEST_F(LockedMemoryTest, canWriteToLockedMemory) {
-
-    auto allocator = createMockAllocator();
-
+    std::unique_ptr<MockAllocator> allocator(new MockAllocator());
      char array [] = {1,2,3,4,5};
  
      EXPECT_CALL(*allocator.get(), lock((void*)1, _)).WillRepeatedly(Return((void*)array));
diff --git a/inference-engine/thirdparty/clDNN/api/tensor.hpp b/inference-engine/thirdparty/clDNN/api/tensor.hpp

index b662f01..cc2ddec 100644 (file)
--- a/inference-engine/thirdparty/clDNN/api/tensor.hpp
+++ b/inference-engine/thirdparty/clDNN/api/tensor.hpp
@@ -184,6 +184,7 @@ struct format {
          g_os_iyx_osv16,                               ///< format used for weights for 2D convolution
          g_os_iyx_osv32,                               ///< format used for weights for 2D convolution
          gs_oiyx_gsv16,                                ///< format used for weights for 2D convolution
+        gs_oizyx_gsv16,                               ///< format used for weights for 3D convolution
          gs_oiyx_gsv32,                                ///< format used for weights for 2D convolution
          g_is_os_zyx_osv16_isv16,                      ///< format used for grouped weights for blocked 3D deconvolution
          g_os_is_yx_osv16_isv4,
@@ -191,6 +192,12 @@ struct format {
          g_os_is_zyx_isv8_osv16_isv2,
          g_os_is_yx_isv8_osv16_isv2,
          g_os_is_zyx_isv16_osv16,
+        g_os_zyx_is_osv16_isv4,                       ///< format for imad deconvolution
+        g_os_zyx_is_osv16_isv16,                      ///< format for imad deconvolution
+        g_os_zyx_is_osv16_isv32,                      ///< format for imad deconvolution
+        g_os_zyx_is_osv32_isv4,                       ///< format for imad deconvolution
+        g_os_zyx_is_osv32_isv16,                      ///< format for imad deconvolution
+        g_os_zyx_is_osv32_isv32,                      ///< format for imad deconvolution
  
          format_num,  ///< number of format types
          any        = -1
@@ -264,6 +271,7 @@ struct format {
                  { os_is_yx_osv32_isv32p,                       { 1, 1, 1, 0, 0, "bfxy",   "bfxy?",      {}}},
                  { os_is_zyx_isv16_osv16,                       { 1, 1, 3, 0, 0, "bfzyx",  "bfxyz",      {{0, 16}, {1, 16}}}},
                  { is_os_zyx_osv16_isv16,                       { 1, 1, 3, 0, 0, "fbzyx",  "bfxyz",      {{0, 16}, {1, 16}}}},
+                { is_os_yx_osv16_isv16,                        { 1, 1, 2, 0, 0, "fbyx",   "bfxyz",      {{0, 16}, {1, 16}}}},
                  { os_is_osv32_isv32_swizzled_by_4,             { 1, 1, 0, 0, 0, "bfxy",   "bfxy?",      {{0, 32}, {1, 32}}}},
                  { os_is_zyx_isv8_osv16_isv2,                   { 1, 1, 3, 0, 0, "bfzyx",  "bfxyz",      {{1, 8}, {0, 16}, {1, 2}}}},
                  { os_zyxi_osv16,                               { 1, 1, 3, 0, 0, "bzyxf",  "bfxyz",      {{0, 16}}}},
@@ -275,6 +283,7 @@ struct format {
                  { g_os_iyx_osv16,                              { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{0, 16}}}},
                  { g_os_iyx_osv32,                              { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{0, 32}}}},
                  { gs_oiyx_gsv16,                               { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{8, 16}}}},
+                { gs_oizyx_gsv16,                              { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g",  {{8, 16}}}},
                  { gs_oiyx_gsv32,                               { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{8, 32}}}},
                  { gyxio,                                       { 1, 1, 2, 0, 1, "gyxfb",  "bfxy????g",  {}}},
                  { g_is_os_zyx_osv16_isv16,                     { 1, 1, 3, 0, 1, "gfbzyx", "bfxyz???g",  {{0, 16}, {1, 16}}}},
@@ -283,6 +292,12 @@ struct format {
                  { g_os_is_yx_isv8_osv16_isv2,                  { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{1, 8}, {0, 16}, {1, 2}}}},
                  { g_os_is_zyx_isv16_osv16,                     { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g",  {{0, 16}, {1, 16}}}},
                  { g_os_is_yx_osv16_isv4,                       { 1, 1, 2, 0, 1, "gbfxy",  "bfxy????g",  {{0, 16}, {1, 4}}}},
+                { g_os_zyx_is_osv16_isv4,                      { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 16}, {1, 4}}}},
+                { g_os_zyx_is_osv16_isv16,                     { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 16}, {1, 16}}}},
+                { g_os_zyx_is_osv16_isv32,                     { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 16}, {1, 32}}}},
+                { g_os_zyx_is_osv32_isv4,                      { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 32}, {1, 4}}}},
+                { g_os_zyx_is_osv32_isv16,                     { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 32}, {1, 16}}}},
+                { g_os_zyx_is_osv32_isv32,                     { 1, 1, 3, 0, 1, "gbzyxi", "bfxyz???g",  {{0, 32}, {1, 32}}}},
          };
          return traits.at(fmt);
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp

index cb29d22..02ade1b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
@@ -129,6 +129,12 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
      { WeightsLayout::gs_oi_yxs_gsv16_yxsv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
      { WeightsLayout::gs_oi_yxs_gsv32_yxsv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
      { WeightsLayout::g_os_is_yx_osv16_isv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
+    { WeightsLayout::g_os_zyx_is_osv16_isv4,                      {  1,  2,  3,   0,   4, -1, -1,  5 } },
+    { WeightsLayout::g_os_zyx_is_osv16_isv16,                     {  1,  2,  3,   0,   4, -1, -1,  5 } },
+    { WeightsLayout::g_os_zyx_is_osv16_isv32,                     {  1,  2,  3,   0,   4, -1, -1,  5 } },
+    { WeightsLayout::g_os_zyx_is_osv32_isv4,                      {  1,  2,  3,   0,   4, -1, -1,  5 } },
+    { WeightsLayout::g_os_zyx_is_osv32_isv16,                     {  1,  2,  3,   0,   4, -1, -1,  5 } },
+    { WeightsLayout::g_os_zyx_is_osv32_isv32,                     {  1,  2,  3,   0,   4, -1, -1,  5 } },
  }};
  
  NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
@@ -638,6 +644,30 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
              newDims[2] = RoundUp(newDims[2], 4);
              newDims[3] = RoundUp(newDims[3], 16);
              break;
+        case g_os_zyx_is_osv16_isv4:
+            newDims[0] = RoundUp(newDims[0], 4);
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
+        case g_os_zyx_is_osv16_isv16:
+            newDims[0] = RoundUp(newDims[0], 16);
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
+        case g_os_zyx_is_osv16_isv32:
+            newDims[0] = RoundUp(newDims[0], 32);
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
+        case g_os_zyx_is_osv32_isv4:
+            newDims[0] = RoundUp(newDims[0], 4);
+            newDims[4] = RoundUp(newDims[4], 32);
+            break;
+        case g_os_zyx_is_osv32_isv16:
+            newDims[0] = RoundUp(newDims[0], 16);
+            newDims[4] = RoundUp(newDims[4], 32);
+            break;
+        case g_os_zyx_is_osv32_isv32:
+            newDims[0] = RoundUp(newDims[0], 32);
+            newDims[4] = RoundUp(newDims[4], 32);
+            break;
          default:
              break;
      }
@@ -760,6 +790,12 @@ WeightsTensor WeightsTensor::TransformIgnorePadding(WeightsLayout l, WeightsType
          vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
          vec[Channelndex(l, WeightsChannelName::LX)] = LX().v;
          vec[Channelndex(l, WeightsChannelName::LY)] = LY().v;
+    } else if (src_channels == 4 && dst_channels == 5) {
+        vec[Channelndex(l, WeightsChannelName::X)] = X().v;
+        vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
+        vec[Channelndex(l, WeightsChannelName::Z)] = 1;
+        vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
+        vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
      } else {
          assert(0);
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h

index 4932bbc..7b7064f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
@@ -147,6 +147,14 @@ enum WeightsLayout {
      gs_oi_yxs_gsv32_yxsv4,               // grouped weights for depthwise IMAD convolution (b_fs_yx_fsv32 format)
  
      g_os_is_yx_osv16_isv4,
+
+    g_os_zyx_is_osv16_isv4,
+    g_os_zyx_is_osv16_isv16,
+    g_os_zyx_is_osv16_isv32,
+    g_os_zyx_is_osv32_isv4,
+    g_os_zyx_is_osv32_isv16,
+    g_os_zyx_is_osv32_isv32,
+
      WeightsLayoutCount                   // NUMBER OF ELEMENTS IN ENUM
  };
  
@@ -212,32 +220,7 @@ inline bool SimpleLayout(DataLayout l) {
      }
  }
  
-inline bool GroupedLayout(WeightsLayout l) {
-    switch (l) {
-        case WeightsLayout::goiyx:
-        case WeightsLayout::goizyx:
-        case WeightsLayout::g_os_iyx_osv16:
-        case WeightsLayout::g_os_iyx_osv32:
-        case WeightsLayout::gs_oiyx_gsv16:
-        case WeightsLayout::gs_oizyx_gsv16:
-        case WeightsLayout::gs_oiyx_gsv32:
-        case WeightsLayout::g_os_iyx_osv16_rotate_180:
-        case WeightsLayout::gyxio:
-        case WeightsLayout::gi_yxs_os_yxsv2_osv16:
-        case WeightsLayout::g_is_os_zyx_osv16_isv16:
-        case WeightsLayout::g_is_os_yx_osv16_isv16:
-        case WeightsLayout::g_os_is_zyx_isv8_osv16_isv2:
-        case WeightsLayout::g_os_is_yx_isv8_osv16_isv2:
-        case WeightsLayout::g_os_is_zyx_isv16_osv16:
-        case WeightsLayout::giy_xs_os_xsv2_osv16__ao32:
-        case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:
-        case WeightsLayout::gs_oi_yxs_gsv4_yxsv4:
-        case WeightsLayout::g_os_is_yx_osv16_isv4:
-            return true;
-        default:
-            return false;
-    }
-}
+inline bool GroupedLayout(WeightsLayout l);
  
  inline bool GroupedLayout(DataLayout) {
      return false;
@@ -588,5 +571,10 @@ private:
      static WeightsChannelArray weightsChannelArray;
      static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
  };
+
+inline bool GroupedLayout(WeightsLayout l) {
+    return WeightsTensor::DoesGroupDimExist(l);
+}
+
  }  // namespace Tensor
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp

index 41ab101..e85a6e0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@@ -26,17 +26,14 @@ ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activa
      DispatchData runInfo;
      std::vector<size_t> global;
      std::vector<size_t> local;
-    if (out.GetLayout() == DataLayout::bfzyx) {
-        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
-        local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
-    } else if (out.GetLayout() == DataLayout::yxfb) {
+    if (out.GetLayout() == DataLayout::yxfb) {
          global = {out.Feature().v * out.Batch().v, out.X().v, out.Y().v};
          local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
      } else if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) {
          global = {Align(out.Feature().v, 16) * out.Batch().v, out.X().v, out.Y().v};
          local = {16, 1, 1};
      } else {
-        global = {out.X().v, out.Y().v, out.Feature().v * out.Batch().v};
+        global = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
          local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
      }
  
@@ -77,6 +74,12 @@ bool ActivationKernelBase::Validate(const Params& p, const optional_params& o) c
          o.GetType() != KernelType::ACTIVATION) {
          return false;
      }
+    const activation_params& orgParams = static_cast<const activation_params&>(p);
+
+    for (auto& fused_op : orgParams.fused_ops) {
+        if (!IsFusedPrimitiveSupported(fused_op))
+            return false;
+    }
  
      return true;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp

index 29d7742..f48b0e2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.cpp
@@ -28,6 +28,7 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
      k.EnableOutputDataType(Datatype::INT32);
      k.EnableOutputDataType(Datatype::F16);
      k.EnableOutputDataType(Datatype::F32);
+    k.EnableDifferentTypes();
      k.EnableAllInputLayout();
      k.EnableAllOutputLayout();
      k.EnableTensorOffset();
@@ -75,34 +76,33 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
          return false;
      }
  
-    if (params.gradient) {
-        if (params.inputs[0].GetLayout() != params.inputs[1].GetLayout())
-            return false;
-    }
+    if (params.output.GetLayout() != params.inputs[0].GetLayout())
+        return false;
  
-    // Opt kernel supports fused activations without extra inputs, since
-    // it can't calculate correct offset for tensors with different layout.
-    for (auto& op : params.fused_ops) {
-        if (!op.tensors.empty()) {
-            for (auto& t : op.tensors) {
-                if (!(t == params.inputs[0]))
-                    return false;
-            }
-        }
-    }
+    if (!params.fused_ops.empty() && params.output.GetLayout() != DataLayout::bfyx &&
+        params.output.GetLayout() != DataLayout::bfzyx)
+        return false;
  
      return true;
  }
  
  JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& params, DispatchData kd) const {
      auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+    auto input_dt = params.inputs[0].GetDType();
  
      jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
      if (!params.fused_ops.empty()) {
-        auto input_dt = GetUnitType(params);
-        FusedOpsConfiguration conf = {"", {"x"}, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::LINEAR_OFFSET };
+        std::vector<std::string> idx_order;
+        if (params.inputs[0].GetDims().size() <= 4) {
+            idx_order = {"fo_b", "fo_f", "fo_y", "fo_x"};
+        } else if (params.inputs[0].GetDims().size() == 5) {
+            idx_order = {"fo_b", "fo_f", "fo_z", "fo_y", "fo_x"};
+        }
+        FusedOpsConfiguration conf =
+            {"", idx_order, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::TENSOR_COORD};
          jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
      }
+    jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
  
      return jit;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h

index 67a2a7c..e2db812 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
@@ -17,6 +17,8 @@
  
  #include "activation_kernel_base.h"
  
+#include <vector>
+
  namespace kernel_selector {
  class ActivationKernelOpt : public ActivationKernelBase {
  public:
@@ -32,5 +34,8 @@ protected:
      DispatchData SetDefault(const activation_params& arg) const override;
      bool Validate(const Params& p, const optional_params& o) const override;
      JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::QUANTIZE };
+    }
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp

index 41f9729..cc32319 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.cpp
@@ -31,6 +31,7 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
      k.EnableOutputDataType(Datatype::UINT8);
      k.EnableOutputDataType(Datatype::F16);
      k.EnableOutputDataType(Datatype::F32);
+    k.EnableDifferentTypes();
      k.EnableActivationAdditionalParamsAsInput();
      k.EnableAllInputLayout();
      k.EnableAllOutputLayout();
@@ -43,13 +44,20 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
  
  JitConstants ActivationKernelRef::GetJitConstants(const activation_params& params, DispatchData kd) const {
      auto jit = ActivationKernelBase::GetJitConstants(params, kd);
+    auto input_dt = params.inputs[0].GetDType();
  
      if (!params.fused_ops.empty()) {
-        auto input_dt = GetUnitType(params);
-        FusedOpsConfiguration conf = {"", {"batch", "feature", "y", "x"}, "dst", input_dt, 1 };
+        std::vector<std::string> idx_order;
+        if (params.inputs[0].GetDims().size() <= 4) {
+            idx_order = {"batch", "feature", "y", "x"};
+        } else if (params.inputs[0].GetDims().size() == 5) {
+            idx_order = {"batch", "feature", "z", "y", "x"};
+        }
+        FusedOpsConfiguration conf = {"", idx_order, "dst", input_dt, 1};
          jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
      }
  
+    jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
      return jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h

index a0d8850..ad15f0a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
@@ -17,6 +17,8 @@
  
  #include "activation_kernel_base.h"
  
+#include <vector>
+
  namespace kernel_selector {
  class ActivationKernelRef : public ActivationKernelBase {
  public:
@@ -26,5 +28,8 @@ public:
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
      ParamsKey GetSupportedKey() const override;
      JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::QUANTIZE };
+    }
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp

index aadb1c3..b0284db 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
@@ -118,8 +118,9 @@ bool ConvolutionKernel_b_fs_yx_fsv16_1x1::Validate(const Params& p, const option
          output.X().v != input.X().v || output.Y().v != input.Y().v || output.Feature().v % 16 != 0;
      const bool bFilterSize = params.filterSize.x != 1 || params.filterSize.y != 1;
      const bool bStride = params.stride.x != 1 || params.stride.y != 1;
+    const bool bPadding = input.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0;
  
-    if  (bOutputSizes || bFilterSize || bStride) {
+    if  (bOutputSizes || bFilterSize || bStride || bPadding) {
          return false;
      }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp

index 05f35c5..16782e0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_depthwise.cpp
@@ -55,6 +55,10 @@ bool ConvolutionKernel_b_fs_yx_fsv16_depthwise::Validate(const Params& p, const
      if (cp.inputs[0].Feature().v != cp.groups || cp.output.Feature().v != cp.groups)
          return false;
  
+    // Check that padding features doesn't miss-align the blocks
+    if (cp.inputs[0].Feature().pad.before % feature_block_size != 0 || cp.output.Feature().pad.before % feature_block_size != 0)
+        return false;
+
      return true;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp

index 4e26fc0..7562696 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
@@ -33,7 +33,7 @@ FusedOpsConfiguration GenerateFusedOpsConfiguration_f16(size_t conf_id, std::str
      if (is_vector)
          idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC)", "od", "oh", "(ow + " + std::to_string(conf_id * 8) + ")"};
      else
-        idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC + local_id)", "od", "oh", "(ow + i)"};
+        idx_order = {"(mb)", "(oc*OC_BLOCK + g*OC + local_id)", "od", "oh", "(ow + " + std::to_string(conf_id * 8) + " + i)"};
  
      return { suffix,
               idx_order,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp

index d216750..b15ad49 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -232,7 +232,8 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
                                         GetPreferredWeightsLayout(newParams),
                                         kd.weightsReorderParams,
                                         GetSupportedKey(),
-                                       newParams.groups);
+                                       newParams.groups,
+                                       newParams.transposed);
  
      if (!succeed) {
          return {};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp

index a15d21d..eb641bc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
@@ -63,7 +63,6 @@ ParamsKey ConvolutionKernel_bfyx_os_iyx_osv16::GetSupportedKey() const {
      k.EnableBatching();
      k.EnableSplitSupport();
      k.EnableDilation();
-    k.EnableTranspose();
      k.EnableGroupedConvolution();
      return k;
  }
@@ -237,11 +236,7 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut
  
  WeightsLayout ConvolutionKernel_bfyx_os_iyx_osv16::GetPreferredWeightsLayout(
          const convolution_params &params) const {
-    if (!params.transposed) {
-        return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16 : WeightsLayout::os_iyx_osv16;
-    } else {
-        return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16_rotate_180 : WeightsLayout::os_iyx_osv16_rotate_180;
-    }
+    return (params.groups > 1) ? WeightsLayout::g_os_iyx_osv16 : WeightsLayout::os_iyx_osv16;
  }
  
  KernelsData ConvolutionKernel_bfyx_os_iyx_osv16::GetKernelsData(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp

index 047148c..152fb25 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
@@ -63,7 +63,6 @@ ParamsKey ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetSupportedKey() const {
      k.EnableBatching();
      k.EnableSplitSupport();
      k.EnableDilation();
-    k.EnableTranspose();
      return k;
  }
  
@@ -235,11 +234,7 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetJitConstants(const con
  
  WeightsLayout ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetPreferredWeightsLayout(
          const convolution_params &params) const {
-    if (!params.transposed) {
-        return WeightsLayout::os_iyx_osv16;
-    } else {
-        return WeightsLayout::os_iyx_osv16_rotate_180;
-    }
+    return params.groups == 1 ? WeightsLayout::os_iyx_osv16 : WeightsLayout::g_os_iyx_osv16;
  }
  
  KernelsData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetKernelsData(const Params& params,
@@ -265,4 +260,4 @@ KernelsData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetKernelsDataForAutoTune(
      return res;
  }
  
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp

index 20e2c9d..53ed2c9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
@@ -194,7 +194,7 @@ bool ConvolutionKernel_imad::Validate(const Params& params, const optional_param
      }
  
      auto& newParams = static_cast<const convolution_params&>(params);
-    if ((newParams.inputs[0].Feature().v / newParams.groups) % 4 != 0)
+    if (newParams.groups > 1 && newParams.weights.IFM().v % 4 != 0)
          return false;
  
      size_t min_block_size_x = (newParams.weights.X().v - 1) * newParams.dilation.x + 1;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp

index 2e253be..44bd574 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp
@@ -67,10 +67,6 @@ ParamsKey convolution_params::GetParamsKey() const {
          k.EnableDepthwiseSeparableOpt();
      }
  
-    if (transposed) {
-        k.EnableTranspose();
-    }
-
      if (local_convolution) {
          k.EnableLocalConvolution();
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp

index 2b5c961..dbbc4a9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.cpp
@@ -52,13 +52,16 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16::SetDef
  
      const auto& out = params.output;
  
+    bool ver_bsv16_fsv16 = params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+        || params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16;
+
      auto x = out.X().v;
      auto y = out.Y().v;
      auto z = out.Z().v;
      auto f = Align(out.Feature().v, 16);
      auto b = out.Batch().v;
  
-    if (out.Batch().v % 16 == 0) {
+    if (ver_bsv16_fsv16) {
          if (params.depthwise_separable_opt) {
              kd.gws0 = x * y * z;
              kd.gws1 = f;
@@ -120,6 +123,17 @@ bool DeconvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optiona
      if (!DeconvolutionKernelBase::Validate(p, o)) {
          return false;
      }
+    auto& deconv_params = static_cast<const deconvolution_params&>(p);
+
+    if (deconv_params.output.GetLayout() != deconv_params.inputs[0].GetLayout())
+        return false;
+
+    const auto& params = static_cast<const deconvolution_params&>(p);
+    const auto feature_block_size = 16;
+
+    // Check that padding features doesn't miss-align the blocks
+    if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+        return false;
  
      return true;
  }
@@ -129,7 +143,10 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu
      auto output = params.output;
      auto jit = Parent::GetJitConstants(params);
  
-    if (output.Batch().v % 16 == 0) {
+    bool ver_bsv16_fsv16 = params.output.GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+        || params.output.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16;
+
+    if (ver_bsv16_fsv16) {
          jit.AddConstant(MakeJitConstant("VER_16MB16C", 1));
      } else {
          jit.AddConstant(MakeJitConstant("VER_8OW16C", 1));
@@ -150,7 +167,7 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu
          icb /= 2;
      }
  
-    if (output.Batch().v % 16 == 0) {
+    if (ver_bsv16_fsv16) {
          mb_block = 16;
          jit.AddConstant(MakeJitConstant("MB_BLOCK", mb_block));
          jit.AddConstant(MakeJitConstant("IC_BLOCK", ic_block));
@@ -218,6 +235,47 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const deconvolu
      jit.AddConstant(MakeJitConstant("LWS_1", runInfo.lws1));
      jit.AddConstant(MakeJitConstant("LWS_2", runInfo.lws2));
  
+    if (!params.fused_ops.empty()) {
+        auto fused_dt = GetActivationType(params);
+        std::vector<std::string> idx_order_block_c00;
+        std::vector<std::string> idx_order_block_c01;
+        std::vector<std::string> idx_order_block_ci;
+
+        if (params.output.Dimentions() <= 4) {
+            idx_order_block_c00 = { "mb", "(g * IC + gic * IC_BLOCK)", "ih", "iw" };
+            idx_order_block_c01 = { "(mb + 8)", "(g * IC + gic * IC_BLOCK)", "ih", "iw" };
+            idx_order_block_ci = { "mb", "(g * IC + gic * IC_BLOCK)", "ih", "(iw + i)" };
+        } else {
+            idx_order_block_c00 = { "mb", "(g * IC + gic * IC_BLOCK)", "id", "ih", "iw" };
+            idx_order_block_c01 = { "(mb + 8)", "(g * IC + gic * IC_BLOCK)", "id", "ih", "iw" };
+            idx_order_block_ci = { "mb", "(g * IC + gic * IC_BLOCK)", "id", "ih", "(iw + i)" };
+        }
+
+        FusedOpsConfiguration conf_c00 = {
+            "_BLOCK_C00",
+            idx_order_block_c00,
+            "blockC00",
+            fused_dt,
+            8,
+            LoadType::LT_ALIGNED_READ,
+            BoundaryCheck::ENABLED,
+            IndexType::TENSOR_COORD,
+            Tensor::DataChannelName::BATCH };
+        FusedOpsConfiguration conf_c01 = {
+            "_BLOCK_C01",
+            idx_order_block_c01,
+            "blockC01",
+            fused_dt,
+            8,
+            LoadType::LT_ALIGNED_READ,
+            BoundaryCheck::ENABLED,
+            IndexType::TENSOR_COORD,
+            Tensor::DataChannelName::BATCH };
+        FusedOpsConfiguration conf_ci = { "_BLOCK_CI", idx_order_block_ci, "blockC00[i]", fused_dt, 1, LoadType::LT_ALIGNED_READ };
+
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf_c00, conf_c01, conf_ci }));
+    }
+
      return jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.h

index ba7bd58..7785877 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16.h
@@ -31,7 +31,7 @@ public:
  
  protected:
      WeightsLayout GetPreferredWeightsLayout(const deconvolution_params& p) const override {
-        if (p.output.GetLayout() == DataLayout::b_fs_yx_fsv16)
+        if (p.output.Dimentions() == 4)
              return WeightsLayout::is_os_yx_osv16_isv16;
          else
              return WeightsLayout::is_os_zyx_osv16_isv16;
@@ -39,5 +39,14 @@ protected:
      bool Validate(const Params& p, const optional_params& o) const override;
      CommonDispatchData SetDefault(const deconvolution_params& arg) const override;
      JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp

index 083766f..cafa959 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.cpp
@@ -18,29 +18,108 @@
  #include "kernel_selector_utils.h"
  
  #include <algorithm>
+#include <vector>
+#include <string>
+#include <iostream>
  
  namespace kernel_selector {
  
  static const size_t sub_group_size = 16;
  static const size_t feature_block_size = 16;
+static const float max_reg_pressure = 3.f / 4.f;
  
-size_t DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetBlockSizeX(const deconvolution_params& params) const {
-    std::vector<size_t> blockWidths = {8, 4, 2, 1};
-    for (auto& blockSize : blockWidths)
-        if (params.output.X().v % blockSize == 0) {
-            return blockSize;
+float DeconvolutionKernel_b_fs_zyx_fsv16_dw::EstimateRegPressure(const deconvolution_params& params,
+                                                                 const dispatch_params& d_params) const {
+    size_t usage_bytes = 0;
+
+    usage_bytes += d_params.block_size_x * BytesPerElement(GetAccumulatorType(params));
+
+    if (d_params.preload_weights == weights_preload::all) {
+        usage_bytes += params.weights.X().v * params.weights.Y().v * params.weights.Z().v * BytesPerElement(params.weights.GetDType());
+    } else if (d_params.preload_weights == weights_preload::line) {
+        usage_bytes += params.weights.X().v * BytesPerElement(params.weights.GetDType());
+    } else {
+        usage_bytes += BytesPerElement(params.weights.GetDType());
+    }
+
+    if (d_params.preload_input == input_preload::line) {
+        size_t input_line_size = CeilDiv(d_params.block_size_x + params.weights.X().v - 1, params.stride.x);
+        usage_bytes += input_line_size * BytesPerElement(params.inputs[0].GetDType());
+    } else {
+        usage_bytes += BytesPerElement(params.inputs[0].GetDType());
+    }
+
+    constexpr size_t register_num = 128;
+    constexpr size_t register_bytes = 32;
+    constexpr size_t max_register_bytes = register_num * register_bytes;
+
+    return static_cast<float>(usage_bytes * sub_group_size) / static_cast<float>(max_register_bytes);
+}
+
+DeconvolutionKernel_b_fs_zyx_fsv16_dw::dispatch_params
+DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetDispatchParams(const deconvolution_params& params) const {
+    std::vector<dispatch_params> ordered_params;
+    if (params.inputs[0].GetDType() == Datatype::F16 || params.inputs[0].GetDType() == Datatype::F32) {
+        ordered_params = {
+            // Preload weights
+            dispatch_params{8, input_preload::none, weights_preload::all},
+            dispatch_params{4, input_preload::none, weights_preload::all},
+            dispatch_params{2, input_preload::none, weights_preload::all},
+            dispatch_params{1, input_preload::none, weights_preload::all},
+            // No preloading
+            dispatch_params{8, input_preload::none, weights_preload::none},
+            dispatch_params{4, input_preload::none, weights_preload::none},
+            dispatch_params{2, input_preload::none, weights_preload::none},
+            dispatch_params{1, input_preload::none, weights_preload::none},
+        };
+    } else {
+        ordered_params = {
+            dispatch_params{16, input_preload::line, weights_preload::line},
+            dispatch_params{8,  input_preload::line, weights_preload::line},
+            dispatch_params{4,  input_preload::line, weights_preload::line},
+            dispatch_params{16, input_preload::line, weights_preload::none},
+            dispatch_params{8,  input_preload::line, weights_preload::none},
+            dispatch_params{4,  input_preload::line, weights_preload::none},
+            dispatch_params{2,  input_preload::line, weights_preload::line},
+            dispatch_params{2,  input_preload::line, weights_preload::none},
+            dispatch_params{1,  input_preload::line, weights_preload::none},
+            dispatch_params{1,  input_preload::none, weights_preload::none},
+        };
+    }
+
+    dispatch_params best_params = dispatch_params{ 1,  input_preload::none, weights_preload::none };
+
+    for (auto& d_params : ordered_params) {
+        bool good_block_size_x = params.output.X().v % d_params.block_size_x == 0 || params.output.X().v > d_params.block_size_x * 3;
+        bool good_reg_pressure = EstimateRegPressure(params, d_params) <= max_reg_pressure;
+        // No support for no input preload and weights line preload in kernel
+        bool good_preloads = !(d_params.preload_input == input_preload::none && d_params.preload_weights == weights_preload::line);
+        // At least one input preload
+        bool full_input_preload = d_params.preload_input != input_preload::line ||
+                                  CeilDiv(d_params.block_size_x + params.filterSize.x - 1, params.stride.x) <= params.inputs[0].X().v;
+
+        if (good_block_size_x && good_reg_pressure && good_preloads && full_input_preload) {
+            best_params = d_params;
+            break;
          }
-    return 1;
+    }
+
+    return best_params;
  }
  
  ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetSupportedKey() const {
      ParamsKey k;
      k.EnableInputDataType(Datatype::F32);
-    k.EnableOutputDataType(Datatype::F32);
-    k.EnableInputWeightsType(WeightsType::F32);
      k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
+    k.EnableOutputDataType(Datatype::F32);
      k.EnableOutputDataType(Datatype::F16);
-    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+
+    k.EnableAllInputWeightsType();
      k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
      k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
      k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
@@ -53,6 +132,8 @@ ParamsKey DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetSupportedKey() const {
      k.EnableSubGroup();
      k.EnableSubGroupShort();
      k.EnableGroupedConvolution();
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
      return k;
  }
  
@@ -67,7 +148,7 @@ DeconvolutionKernelBase::DispatchData DeconvolutionKernel_b_fs_zyx_fsv16_dw::Set
      auto f = out.Feature().v;
      auto b = out.Batch().v;
  
-    kd.gws0 = (x / GetBlockSizeX(params)) * y * z;
+    kd.gws0 = CeilDiv(x, GetDispatchParams(params).block_size_x) * y * z;
      kd.gws1 = Align(f, feature_block_size);
      kd.gws2 = b;
  
@@ -85,7 +166,7 @@ bool DeconvolutionKernel_b_fs_zyx_fsv16_dw::Validate(const Params& p, const opti
          return false;
      }
  
-    const deconvolution_params& params = static_cast<const deconvolution_params&>(p);
+    const auto& params = static_cast<const deconvolution_params&>(p);
  
      if (params.groups == 1)
          return false;
@@ -93,6 +174,10 @@ bool DeconvolutionKernel_b_fs_zyx_fsv16_dw::Validate(const Params& p, const opti
      if (params.weights.IFM().v != 1 || params.weights.OFM().v != 1)
          return false;
  
+    // Check that padding features doesn't miss-align the blocks
+    if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+        return false;
+
      return true;
  }
  
@@ -101,11 +186,43 @@ JitConstants DeconvolutionKernel_b_fs_zyx_fsv16_dw::GetJitConstants(const deconv
      auto output = params.output;
      auto jit = Parent::GetJitConstants(params);
  
-    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", GetBlockSizeX(params)));
+    auto dp = GetDispatchParams(params);
+    auto& block_size_x = dp.block_size_x;
+
+    jit.AddConstant(MakeJitConstant("X_BLOCK_SIZE", block_size_x));
      jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size));
      if (params.output.Feature().v % feature_block_size != 0) {
          jit.AddConstant(MakeJitConstant("OUTPUT_LEFTOVERS", params.output.Feature().v % feature_block_size));
      }
+    jit.AddConstant(MakeJitConstant("INPUT_BLOCK_SIZE_X", CeilDiv(block_size_x + params.filterSize.x - 1, params.stride.x)));
+    jit.AddConstant(MakeJitConstant("PRELOAD_INPUT_LINE", dp.preload_input == input_preload::line));
+    jit.AddConstant(MakeJitConstant("PRELOAD_WEIGHTS", dp.preload_weights == weights_preload::all));
+    jit.AddConstant(MakeJitConstant("PRELOAD_WEIGHTS_LINE", dp.preload_weights == weights_preload::line));
+
+    if (!params.fused_ops.empty()) {
+        auto fused_dt = GetActivationType(params);
+        std::vector<std::string> idx_order;
+        if (params.output.Dimentions() <= 4) {
+            idx_order = {"b", "fg", "y", "x"};
+        } else {
+            idx_order = { "b", "fg", "z", "y", "x" };
+        }
+        auto boundary_check = BoundaryCheck::ENABLED;
+        if (params.output.Feature().v % feature_block_size == 0 && params.output.X().v % block_size_x == 0) {
+            boundary_check = BoundaryCheck::DISABLED;
+        }
+        FusedOpsConfiguration conf = {
+            "",
+            idx_order,
+            "dequantized",
+            fused_dt,
+            block_size_x,
+            LoadType::LT_ALIGNED_READ,
+            boundary_check,
+            IndexType::TENSOR_COORD,
+            Tensor::DataChannelName::X };
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
  
      return jit;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.h

index 518f6fc..4b44450 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_b_fs_zyx_fsv16_dw.h
@@ -39,6 +39,32 @@ protected:
      bool Validate(const Params& p, const optional_params& o) const override;
      CommonDispatchData SetDefault(const deconvolution_params& arg) const override;
      JitConstants GetJitConstants(const deconvolution_params& params) const override;
-    size_t GetBlockSizeX(const deconvolution_params& params) const;
+
+    enum class weights_preload {
+        none,
+        line,
+        all
+    };
+    enum class input_preload {
+        none,
+        line
+    };
+
+    struct dispatch_params {
+        size_t block_size_x;
+        input_preload preload_input;
+        weights_preload preload_weights;
+    };
+    dispatch_params GetDispatchParams(const deconvolution_params& params) const;
+    float EstimateRegPressure(const deconvolution_params& params, const dispatch_params& disp_params) const;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp

index a830139..4a7d89e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
@@ -55,6 +55,11 @@ bool DeconvolutionKernelBase::Validate(const Params& p, const optional_params& o
          return false;
      }
  
+    for (auto& fused_op : params.fused_ops) {
+        if (!IsFusedPrimitiveSupported(fused_op))
+            return false;
+    }
+
      return true;
  }
  
@@ -68,14 +73,15 @@ JitConstants DeconvolutionKernelBase::GetJitConstants(const deconvolution_params
                                          (dp.filterSize.y - 1 + padding.y) * input.Y().pitch;
      input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
  
-    jit.AddConstants({MakeJitConstant("STRIDE", dp.stride),
-                      MakeJitConstant("PADDING", dp.padding),
-                      MakeJitConstant("DILATION", dp.dilation),
-                      MakeJitConstant("FILTER_ARRAY_NUM", dp.split),
-                      MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
-                      MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwise_separable_opt),
-                      MakeJitConstant("FUSED_ELTWISE", dp.fused_eltwise),
-                      MakeJitConstant("GROUPED", (dp.groups > 1) ? 1 : 0)});
+    jit.AddConstants({ MakeJitConstant("STRIDE", dp.stride),
+                       MakeJitConstant("PADDING", dp.padding),
+                       MakeJitConstant("DILATION", dp.dilation),
+                       MakeJitConstant("FILTER_ARRAY_NUM", dp.split),
+                       MakeJitConstant("INPUT0_OFFSET_WITH_PADDING", input_offset_with_padding),
+                       MakeJitConstant("DEPTHWISE_SEPARABLE_OPT", dp.depthwise_separable_opt),
+                       MakeJitConstant("GROUPED", (dp.groups > 1) ? 1 : 0) });
+    jit.Merge(MakeTypeJitConstants(GetAccumulatorType(dp), "ACCUMULATOR"));
+    jit.Merge(MakeTypeJitConstants(GetActivationType(dp), "ACTIVATION"));
  
      return jit;
  }
@@ -114,7 +120,12 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
      KernelData kd = KernelData::Default<deconvolution_params>(params);
      deconvolution_params& newParams = *static_cast<deconvolution_params*>(kd.params.get());
  
-    bool succeed = UpdateWeightsParams(newParams, options, GetPreferredWeightsLayout(newParams), kd.weightsReorderParams);
+    bool succeed = UpdateWeightsParams(newParams,
+                                       options,
+                                       GetPreferredWeightsLayout(newParams),
+                                       kd.weightsReorderParams,
+                                       GetSupportedKey(),
+                                       newParams.groups);
  
      if (!succeed) {
          return {};
@@ -133,13 +144,33 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
                       entry_point,
                       DEFAULT,
                       true,
-                     !newParams.bias.empty());
+                     !newParams.bias.empty(),
+                     1,
+                     GetFusedPrimitiveInputsCount(params));
      kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0});
-    if (orgParams.fused_eltwise)
-        kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1});
  
      kd.estimatedTime = runInfo.efficiency;
  
      return {kd};
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+
+Datatype DeconvolutionKernelBase::GetAccumulatorType(const deconvolution_params& params) const {
+    if (params.inputs[0].GetDType() == Datatype::INT8 || params.inputs[0].GetDType() == Datatype::UINT8)
+        return Datatype::INT32;
+
+    // input is either fp32 or fp16
+    // for fp32->fp16 accumulate to fp16, otherwise accumulate to input type
+    if (params.output.GetDType() == Datatype::F16)
+        return Datatype::F16;
+
+    return params.inputs[0].GetDType();
+}
+
+Datatype DeconvolutionKernelBase::GetActivationType(const deconvolution_params& params) const {
+    auto accumulator_dt = GetAccumulatorType(params);
+    if (accumulator_dt == Datatype::INT32)
+        return Datatype::F32;
+    return accumulator_dt;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h

index 1a80205..a1da638 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
@@ -34,7 +34,6 @@ struct deconvolution_params : public weight_bias_params {
      uint32_t split = 1;
      uint32_t groups = 1;
      bool depthwise_separable_opt = false;
-    bool fused_eltwise = false;
  
      std::string to_string() const override;
  
@@ -89,5 +88,8 @@ protected:
              return (params.groups > 1) ? WeightsLayout::goizyx : WeightsLayout::oizyx;
      }
      bool Validate(const Params& p, const optional_params& o) const override;
+
+    virtual Datatype GetAccumulatorType(const deconvolution_params& params) const;
+    virtual Datatype GetActivationType(const deconvolution_params& params) const;
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp

index 94fa414..8ec74cd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.cpp
@@ -55,4 +55,23 @@ CommonDispatchData DeconvolutionKernel_bfyx_opt::SetDefault(const deconvolution_
      kd.efficiency = FORCE_PRIORITY_6;
      return kd;
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+
+JitConstants DeconvolutionKernel_bfyx_opt::GetJitConstants(const deconvolution_params& params) const {
+    auto jit = Parent::GetJitConstants(params);
+
+    if (!params.fused_ops.empty()) {
+        auto fused_dt = GetActivationType(params);
+        FusedOpsConfiguration conf = {
+            "",
+            {"batch_offset", "ofm_offset", "id_y", "id_x"},
+            "result",
+            fused_dt,
+            1,
+            LoadType::LT_UNALIGNED,
+            BoundaryCheck::DISABLED };
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+    return jit;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h

index 85c945c..1276cbf 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h
@@ -21,6 +21,7 @@ namespace kernel_selector {
  
  class DeconvolutionKernel_bfyx_opt : public DeconvolutionKernelBase {
  public:
+    using Parent = DeconvolutionKernelBase;
      DeconvolutionKernel_bfyx_opt() : DeconvolutionKernelBase("deconvolution_gpu_bfyx_opt") {}
      virtual ~DeconvolutionKernel_bfyx_opt() {}
  
@@ -28,5 +29,15 @@ public:
  
  protected:
      CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+    JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp

new file mode 100644 (file)

index 0000000..4c4e77a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.cpp
@@ -0,0 +1,285 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deconvolution_kernel_imad_along_f_tile_bfx.hpp"
+
+#include "kernel_selector_utils.h"
+
+#include <algorithm>
+#include <vector>
+#include <iostream>
+#include <string>
+
+namespace kernel_selector {
+
+namespace {
+    constexpr size_t simd = 16;
+}
+
+ParamsKey DeconvolutionKernel_imad_along_f_tile_bfx::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableInputWeightsType(WeightsType::INT8);
+    k.EnableInputWeightsType(WeightsType::UINT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
+    k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv32);
+
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+    k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+
+    k.EnableInputLayout(DataLayout::byxf_af32);
+    k.EnableOutputLayout(DataLayout::byxf_af32);
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableBatching();
+    k.EnableTensorPitches();
+    k.EnableTensorOffset();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableGroupedConvolution();
+
+    return k;
+}
+
+bool DeconvolutionKernel_imad_along_f_tile_bfx::Validate(const Params& p, const optional_params& o) const {
+    if (!Parent::Validate(p, o))
+        return false;
+
+    auto& params = static_cast<const deconvolution_params&>(p);
+    if (params.groups > 1 && params.weights.IFM().v % 4 != 0)
+        return false;
+
+    // Consider loosening at the cost of performance
+    if (params.groups > 1 && params.weights.OFM().v % simd != 0)
+        return false;
+
+    return true;
+}
+
+WeightsLayout DeconvolutionKernel_imad_along_f_tile_bfx::GetPreferredWeightsLayout(const deconvolution_params& params) const {
+    //                                isv,    osv
+    using layout_map_key = std::tuple<size_t, size_t>;
+    using layout_map = std::map<layout_map_key, WeightsLayout>;
+
+    layout_map lt_map = {
+        {layout_map_key((size_t)4,  (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv4 },
+        {layout_map_key((size_t)16, (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv16 },
+        {layout_map_key((size_t)32, (size_t)16), WeightsLayout::g_os_zyx_is_osv16_isv32 },
+        {layout_map_key((size_t)4,  (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv4 },
+        {layout_map_key((size_t)16, (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv16 },
+        {layout_map_key((size_t)32, (size_t)32), WeightsLayout::g_os_zyx_is_osv32_isv32 }};
+
+    auto tile_ifm = GetTileIFM(params);
+    auto tile_ofm_simd = GetTileOFM(params) * simd;
+    auto key = layout_map_key(tile_ifm, tile_ofm_simd);
+    auto it = lt_map.find(key);
+    if (it == lt_map.end()) {
+        // Params are not valid for this implementation, return anything to allow Validate to reject
+        return WeightsLayout::goizyx;
+    }
+    auto layout = it->second;
+    return layout;
+}
+
+DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_along_f_tile_bfx::SetDefault(const deconvolution_params& params) const {
+    auto dispatch = Parent::SetDefault(params);
+
+    auto tile_x = GetTileX(params);
+    auto tile_ofm = GetTileOFM(params);
+    auto tile_b = GetTileB(params);
+
+    std::vector<size_t> global = {
+         CeilDiv(params.output.X().v, tile_x) * params.output.Y().v * params.output.Z().v,
+         Align(CeilDiv(params.output.Feature().v, tile_ofm), simd),
+         CeilDiv(params.output.Batch().v, tile_b)
+    };
+
+    std::vector<size_t> local = { 1, simd, 1 };
+
+    dispatch.gws0 = global[0];
+    dispatch.gws1 = global[1];
+    dispatch.gws2 = global[2];
+
+    dispatch.lws0 = local[0];
+    dispatch.lws1 = local[1];
+    dispatch.lws2 = local[2];
+
+    // Currently most optimized for fsv16 formats
+    if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16) {
+        dispatch.efficiency = FORCE_PRIORITY_7;
+    } else {
+        dispatch.efficiency = FORCE_PRIORITY_8;
+    }
+
+    return dispatch;
+}
+
+JitConstants DeconvolutionKernel_imad_along_f_tile_bfx::GetJitConstants(const deconvolution_params& params) const {
+    auto jit = Parent::GetJitConstants(params);
+    auto tile_ifm = GetTileIFM(params);
+    auto tile_x = GetTileX(params);
+    auto tile_ofm = GetTileOFM(params);
+    auto tile_b = GetTileB(params);
+
+    jit.AddConstant(MakeJitConstant("TILE_IFM", tile_ifm));
+    jit.AddConstant(MakeJitConstant("TILE_X", tile_x));
+    jit.AddConstant(MakeJitConstant("TILE_OFM", tile_ofm));
+    jit.AddConstant(MakeJitConstant("TILE_B", tile_b));
+    jit.AddConstant(MakeJitConstant("SIMD", simd));
+
+    auto& in = params.inputs[0];
+    auto in_layout = in.GetLayout();
+
+    // Layout specific params
+    size_t input_tile_ifm_pitch = 0;
+    size_t input_in_tile_batch_pitch = 0;
+    size_t zyx_pitch_factor = in.Z().LogicalDimPadded() * in.Y().LogicalDimPadded() * in.X().LogicalDimPadded();
+
+    if (in_layout == DataLayout::b_fs_yx_fsv16 || in_layout == DataLayout::b_fs_zyx_fsv16) {
+        if (tile_ifm == 16) {
+            input_tile_ifm_pitch = zyx_pitch_factor * 16;
+        }
+        input_in_tile_batch_pitch = Align(in.Feature().LogicalDimPadded(), 16) * zyx_pitch_factor;
+    } else if (in_layout == DataLayout::b_fs_yx_fsv32 || in_layout == DataLayout::b_fs_yx_fsv32) {
+        if (tile_ifm == 32) {
+            input_tile_ifm_pitch = zyx_pitch_factor * 32;
+        }
+        input_in_tile_batch_pitch = Align(in.Feature().LogicalDimPadded(), 32) * zyx_pitch_factor;
+    } else if (in_layout == DataLayout::bs_fs_yx_bsv16_fsv16 || in_layout == DataLayout::bs_fs_zyx_bsv16_fsv16) {
+        if (tile_ifm == 16) {
+            input_tile_ifm_pitch = zyx_pitch_factor * 16 * 16;
+        }
+        input_in_tile_batch_pitch = 16;
+    } else if (in_layout == DataLayout::byxf_af32) {
+        input_tile_ifm_pitch = tile_ifm;
+        input_in_tile_batch_pitch = zyx_pitch_factor * Align(in.Feature().LogicalDimPadded(), 32);
+    }
+
+    jit.AddConstant(MakeJitConstant("INPUT_VALID_TILE_IFM_PITCH", input_tile_ifm_pitch != 0));
+    jit.AddConstant(MakeJitConstant("INPUT_TILE_IFM_PITCH", input_tile_ifm_pitch));
+    jit.AddConstant(MakeJitConstant("INPUT_IN_TILE_B_PITCH", input_in_tile_batch_pitch));
+
+    if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16 || params.output.GetLayout() == DataLayout::b_fs_zyx_fsv16) {
+        jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_X_STORE", true));
+    } else {
+        jit.AddConstant(MakeJitConstant("OUTPUT_NAIVE_STORE", true));
+    }
+
+    if (!params.fused_ops.empty()) {
+        auto fused_in_dt = GetActivationType(params);
+        std::vector<std::string> idx_order;
+        if (params.output.Dimentions() <= 4) {
+            idx_order = { "(out_b + ob)", "(out_f + of * SIMD)", "out_y", "(out_x + tx)" };
+        } else {
+            idx_order = { "(out_b + ob)", "(out_f + of * SIMD)", "out_z", "out_y", "(out_x + tx)" };
+        }
+        auto boundary_check = BoundaryCheck::DISABLED;
+        if (params.output.X().v % tile_x != 0
+            || params.output.Feature().v % (tile_ofm * simd) != 0
+            || params.output.Batch().v % tile_b != 0) {
+            boundary_check = BoundaryCheck::ENABLED;
+        }
+        std::vector<Tensor::DataChannelName> loop_axes = { Tensor::DataChannelName::X };
+        if (tile_b != 1) {
+            loop_axes.push_back(Tensor::DataChannelName::BATCH);
+        } else {
+            idx_order[0] = "out_b";
+        }
+
+        auto conf = FusedOpsConfiguration{ "",
+                                           idx_order,
+                                           "dequantized[ob][of][tx]",
+                                           fused_in_dt,
+                                           1,
+                                           LoadType::LT_UNALIGNED,
+                                           boundary_check,
+                                           IndexType::TENSOR_COORD,
+                                           Tensor::DataChannelName::X,
+                                           loop_axes,
+                                           true };
+
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+
+    return jit;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileIFM(const deconvolution_params& params) const {
+    size_t fsv = 4;
+    if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16
+        || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16
+        || params.inputs[0].GetLayout() == DataLayout::bs_fs_yx_bsv16_fsv16
+        || params.inputs[0].GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16) {
+        fsv = 16;
+    }
+    if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv32
+        || params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv32
+        || params.inputs[0].GetLayout() == DataLayout::byxf_af32) {
+        fsv = 32;
+    }
+
+    auto ifm = params.weights.IFM().v;
+    bool grouped = params.groups > 1;
+    auto pref_tile_ifm = std::min(fsv, ifm);
+
+    std::vector<size_t> allowed_tile_ifm = { 4, 16, 32 };
+    size_t tile_ifm = 1;
+    for (auto candidate : allowed_tile_ifm) {
+        if (candidate <= pref_tile_ifm
+            && (!grouped || ifm % candidate == 0))
+            tile_ifm = candidate;
+    }
+    return tile_ifm;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileOFM(const deconvolution_params& params) const {
+    // TODO Loosen divisibility requirement for tile ofm 2
+    if (params.weights.OFM().v % (simd * 2) == 0 && params.output.Batch().v % 2 != 0)
+        return 2;
+
+    return 1;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileX(const deconvolution_params& params) const {
+    constexpr size_t max_tile_x = simd;
+    if (params.output.X().v <= max_tile_x)
+        return params.output.X().v;
+
+    return max_tile_x;
+}
+
+size_t DeconvolutionKernel_imad_along_f_tile_bfx::GetTileB(const deconvolution_params& params) const {
+    if (params.output.Batch().v % 2 == 0)
+        return 2;
+
+    return 1;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.hpp

new file mode 100644 (file)

index 0000000..9732a1c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_along_f_tile_bfx.hpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "deconvolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class DeconvolutionKernel_imad_along_f_tile_bfx : public DeconvolutionKernelBase {
+public:
+    using Parent = DeconvolutionKernelBase;
+    DeconvolutionKernel_imad_along_f_tile_bfx() : DeconvolutionKernelBase("deconvolution_gpu_imad_along_f_tile_bfx") {}
+    virtual ~DeconvolutionKernel_imad_along_f_tile_bfx() = default;
+
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    bool Validate(const Params& p, const optional_params& o) const override;
+    WeightsLayout GetPreferredWeightsLayout(const deconvolution_params &params) const override;
+    CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+    JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
+
+    size_t GetTileIFM(const deconvolution_params& params) const;
+    size_t GetTileOFM(const deconvolution_params& params) const;
+    size_t GetTileX(const deconvolution_params& params) const;
+    size_t GetTileB(const deconvolution_params& params) const;
+};
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp

new file mode 100644 (file)

index 0000000..9edde95
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.cpp
@@ -0,0 +1,110 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deconvolution_kernel_imad_ref.hpp"
+
+#include "kernel_selector_utils.h"
+
+#include <vector>
+#include <string>
+
+namespace kernel_selector {
+
+ParamsKey DeconvolutionKernel_imad_ref::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableInputWeightsType(WeightsType::INT8);
+    k.EnableInputWeightsType(WeightsType::UINT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
+    k.EnableInputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableInputLayout(DataLayout::b_fs_zyx_fsv32);
+    k.EnableInputLayout(DataLayout::byxf_af32);
+    k.EnableAllOutputLayout();
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableBatching();
+    k.EnableTensorPitches();
+    k.EnableTensorOffset();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableGroupedConvolution();
+
+    return k;
+}
+
+WeightsLayout DeconvolutionKernel_imad_ref::GetPreferredWeightsLayout(const deconvolution_params&) const {
+    return WeightsLayout::g_os_zyx_is_osv32_isv4;
+}
+
+DeconvolutionKernelBase::DispatchData DeconvolutionKernel_imad_ref::SetDefault(const deconvolution_params& params) const {
+    auto dispatch = Parent::SetDefault(params);
+
+    std::vector<size_t> global = {
+         params.output.Feature().v,
+         params.output.X().v * params.output.Y().v * params.output.Z().v,
+         params.output.Batch().v
+    };
+
+    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+    dispatch.gws0 = global[0];
+    dispatch.gws1 = global[1];
+    dispatch.gws2 = global[2];
+
+    dispatch.lws0 = local[0];
+    dispatch.lws1 = local[1];
+    dispatch.lws2 = local[2];
+
+    dispatch.efficiency = FORCE_PRIORITY_9;
+
+    return dispatch;
+}
+
+JitConstants DeconvolutionKernel_imad_ref::GetJitConstants(const deconvolution_params& params) const {
+    auto jit = Parent::GetJitConstants(params);
+    auto tile_ifm = GetTileIFM(params);
+
+    jit.AddConstant(MakeJitConstant("TILE_IFM", tile_ifm));
+
+    if (!params.fused_ops.empty()) {
+        std::vector<std::string> idx_order;
+        if (params.output.Dimentions() <= 4) {
+            idx_order = { "out_b", "out_f", "out_y", "out_x" };
+        } else {
+            idx_order = { "out_b", "out_f", "out_z", "out_y", "out_x" };
+        }
+        auto conf = FusedOpsConfiguration{ "", idx_order, "dequantized", GetActivationType(params), 1, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED };
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+
+    return jit;
+}
+
+size_t DeconvolutionKernel_imad_ref::GetTileIFM(const deconvolution_params&) const {
+    return 4;
+}
+
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.hpp

new file mode 100644 (file)

index 0000000..297bb61
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_imad_ref.hpp
@@ -0,0 +1,47 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "deconvolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class DeconvolutionKernel_imad_ref : public DeconvolutionKernelBase {
+public:
+    using Parent = DeconvolutionKernelBase;
+    DeconvolutionKernel_imad_ref() : DeconvolutionKernelBase("deconvolution_gpu_imad_ref") {}
+    virtual ~DeconvolutionKernel_imad_ref() = default;
+
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    WeightsLayout GetPreferredWeightsLayout(const deconvolution_params &params) const override;
+    CommonDispatchData SetDefault(const deconvolution_params& params) const override;
+    JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
+
+    size_t GetTileIFM(const deconvolution_params& params) const;
+};
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp

index 90cb7f2..4f6bfc2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
@@ -20,10 +20,19 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
      ParamsKey k;
      k.EnableInputDataType(Datatype::F16);
      k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
      k.EnableInputWeightsType(WeightsType::F16);
      k.EnableInputWeightsType(WeightsType::F32);
+    k.EnableInputWeightsType(WeightsType::INT8);
+    k.EnableInputWeightsType(WeightsType::UINT8);
+
      k.EnableOutputDataType(Datatype::F16);
      k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+
      k.EnableInputLayout(DataLayout::yxfb);
      k.EnableInputLayout(DataLayout::bfyx);
      k.EnableInputLayout(DataLayout::byxf);
@@ -38,6 +47,7 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
      k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
      k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
      k.EnableOutputLayout(DataLayout::bs_fs_zyx_bsv16_fsv16);
+
      k.EnableTensorOffset();
      k.EnableTensorPitches();
      k.EnableBiasPerFeature();
@@ -47,6 +57,8 @@ ParamsKey DeconvolutionKernelRef::GetSupportedKey() const {
      k.EnableDepthwiseSeparableOpt();
      k.EnableGradient();
      k.EnableGroupedConvolution();
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
      return k;
  }
  
@@ -73,6 +85,19 @@ JitConstants DeconvolutionKernelRef::GetJitConstants(const deconvolution_params&
      if (params.output.Feature().v * params.output.Batch().v <= 16)
          jit.AddConstant(MakeJitConstant("DIM_ORDER_XYBF", 1));
  
+    if (!params.fused_ops.empty()) {
+        auto fused_dt = GetActivationType(params);
+        std::vector<std::string> idx_order;
+        if (params.output.Dimentions() <= 4) {
+            idx_order = { "batch_offset", "ofm_offset", "out_y", "out_x" };
+        } else {
+            idx_order = { "batch_offset", "ofm_offset", "out_z", "out_y", "out_x" };
+        }
+        FusedOpsConfiguration conf = { "", idx_order, "post_activation", fused_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED };
+
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+
      return jit;
  }
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h

index 34a4e2b..09c1dbd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h
@@ -29,5 +29,14 @@ public:
  protected:
      CommonDispatchData SetDefault(const deconvolution_params& params) const override;
      JitConstants GetJitConstants(const deconvolution_params& params) const override;
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE,
+            FusedOpType::QUANTIZE
+        };
+    }
  };
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_selector.cpp

index 6c2852c..b8a0248 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_selector.cpp
@@ -18,6 +18,8 @@
  #include "deconvolution_kernel_bfyx_opt.h"
  #include "deconvolution_kernel_b_fs_zyx_fsv16.h"
  #include "deconvolution_kernel_b_fs_zyx_fsv16_dw.h"
+#include "deconvolution_kernel_imad_ref.hpp"
+#include "deconvolution_kernel_imad_along_f_tile_bfx.hpp"
  
  namespace kernel_selector {
  deconvolution_kernel_selector::deconvolution_kernel_selector() {
@@ -25,6 +27,8 @@ deconvolution_kernel_selector::deconvolution_kernel_selector() {
      Attach<DeconvolutionKernel_bfyx_opt>();
      Attach<DeconvolutionKernel_b_fs_zyx_fsv16>();
      Attach<DeconvolutionKernel_b_fs_zyx_fsv16_dw>();
+    Attach<DeconvolutionKernel_imad_ref>();
+    Attach<DeconvolutionKernel_imad_along_f_tile_bfx>();
  }
  
  KernelsData deconvolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp

index c1bbf41..102fcf2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@@ -163,9 +163,18 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
  
      auto input0 = ewParams.inputs[0];
  
+    // Check that padding before features doesn't miss-align the blocks
+    auto feature_block_size = 16;
+    if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
+        return false;
+    }
+
      for (size_t i = 1; i < ewParams.inputs.size(); i++) {
          if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(ewParams.inputs[i] == input0))
              return false;
+        if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
+            return false;
+        }
      }
  
      return true;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp

index 01e52ff..31a2ac3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv16.cpp
@@ -99,6 +99,13 @@ bool PoolingKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_param
          return false;
      }
  
+    const auto& params = static_cast<const pooling_params&>(p);
+    const auto feature_block_size = 16;
+
+    // Check that padding features doesn't miss-align the blocks
+    if (params.inputs[0].Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0)
+        return false;
+
      return true;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h

index 250b6cb..388c0d6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h
@@ -54,6 +54,7 @@ struct quantize_fuse_params : fuse_params {
                           bool has_post_scale,
                           bool has_post_shift,
                           bool has_pre_shift,
+                         bool has_clamp,
                           bool per_tensor_input_range,
                           bool per_tensor_input_scale,
                           bool per_tensor_input_shift,
@@ -70,6 +71,7 @@ struct quantize_fuse_params : fuse_params {
      , has_post_scale(has_post_scale)
      , has_post_shift(has_post_shift)
      , has_pre_shift(has_pre_shift)
+    , has_clamp(has_clamp)
      , per_tensor_input_range(per_tensor_input_range)
      , per_tensor_input_scale(per_tensor_input_scale)
      , per_tensor_input_shift(per_tensor_input_shift)
@@ -86,6 +88,7 @@ struct quantize_fuse_params : fuse_params {
      bool has_post_scale;
      bool has_post_shift;
      bool has_pre_shift;
+    bool has_clamp;
  
      bool per_tensor_input_range;
      bool per_tensor_input_scale;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp

index 205cd16..b8536d7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -90,6 +90,10 @@ inline JitConstants MakeReorderWeightsJitConstants(const reorder_weights_params&
          MakeJitConstant("OUTPUT", output),
      };
  
+    if (params.rotate_180) {
+        jit.AddConstant(MakeJitConstant("REORDER_ROTATE", params.rotate_180));
+    }
+
      if (fp16Supported) {
          jit.Merge(MakeUnitTypeJitConstants(Datatype::F16));
      } else {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.h

index 6877538..f10460f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.h
@@ -63,6 +63,7 @@ struct reorder_weights_params : public Params {
      WeightsTensor input;
      WeightsTensor output;
      bool winograd = false;
+    bool rotate_180 = false;
  
      virtual ParamsKey GetParamsKey() const {
          ParamsKey k;
@@ -82,6 +83,10 @@ struct reorder_weights_params : public Params {
          if (winograd) {
              k.EnableWinogradReorder();
          }
+
+        if (rotate_180) {
+            k.EnableRotateReorder();
+        }
          return k;
      }
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.cpp

index 164ec00..7d158dd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.cpp
@@ -31,6 +31,7 @@ ParamsKey ReorderWeightsKernel::GetSupportedKey() const {
      k.EnableDifferentTypes();
      k.EnableTensorOffset();
      k.EnableTensorPitches();
+    k.EnableRotateReorder();
      return k;
  }
  
@@ -38,4 +39,4 @@ KernelsData ReorderWeightsKernel::GetKernelsData(const Params& params, const opt
      const reorder_weights_params& orgParams = static_cast<const reorder_weights_params&>(params);
      return GetCommonKernelsData(orgParams, options, DONT_USE_IF_HAVE_SOMETHING_ELSE);
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_opt.cl

index 026d717..6e34d06 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_opt.cl
@@ -18,32 +18,40 @@
  #include "include/data_types.cl"
  
  KERNEL(activation)(
-#if GRADIENT
-    __global UNIT_TYPE* input_grad,
-    __global UNIT_TYPE* output,
-    __global UNIT_TYPE* input
-#else
-    __global UNIT_TYPE* input, 
-    __global UNIT_TYPE* output
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output
+#if HAS_FUSED_OPS_DECLS
+    , FUSED_OPS_DECLS
  #endif
      )
  {
      const unsigned int x = (uint)get_global_id(0) * NUM_COLS_WI;
+#if OUTPUT_DIMS == 5
+    const unsigned int fo_x = x % OUTPUT_SIZE_X;
+    const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+    const unsigned int fo_z = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
+    const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM;
+    const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM);
+#elif OUTPUT_DIMS == 4
+    const unsigned int fo_x = x % OUTPUT_SIZE_X;
+    const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+    const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
+    const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM);
+#endif
  
      unsigned int input_offset  = x + INPUT0_OFFSET; 
      unsigned int output_offset = x + OUTPUT_OFFSET; 
  
-    typedef CAT(UNIT_TYPE, 4) type_t;
-#if GRADIENT
-    type_t g = ((__global type_t*) (input_grad + input_offset))[0];
-#endif
-    type_t v = ((__global type_t*) (input + input_offset))[0];
+    typedef CAT(INPUT0_TYPE, 4) input_t;
+    typedef CAT(OUTPUT_TYPE, 4) output_t;
  
-#if GRADIENT
-    v = ACTIVATION(g, v, ACTIVATION_PARAMS);
+    input_t v = ((__global input_t*) (input + input_offset))[0];
+
+    v = ACTIVATION_KERNEL(v, ACTIVATION_PARAMS_KERNEL);
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    *((__global output_t*)(output + output_offset)) = FUSED_OPS_RESULT;
  #else
-    v = ACTIVATION(v, ACTIVATION_PARAMS);
+    *((__global output_t*)(output + output_offset)) = v;
  #endif
-
-    *((__global type_t*)(output + output_offset)) = v;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl

index 9066086..9caafc4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/activation_ref.cl
@@ -26,17 +26,14 @@
  
  // TODO: move it from layout based to memory based
  KERNEL(activation)(
-#if GRADIENT
-    __global UNIT_TYPE* input_grad,
-    __global UNIT_TYPE* output_grad,
-    __global UNIT_TYPE* input
-#else
-    __global UNIT_TYPE* input,
-    __global UNIT_TYPE* output
-#endif
+    __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output
  #ifdef PARAMETERIZED
      , __global ADDITIONAL_PARAMS_TYPE* params
  #endif
+#if HAS_FUSED_OPS_DECLS
+    , FUSED_OPS_DECLS
+#endif
      )
  {
  #if OUTPUT_DIMS == 5
@@ -45,7 +42,7 @@ KERNEL(activation)(
      #define ORDER batch,feature,y,x
  #endif
  
-#if defined OUTPUT_LAYOUT_BFZYX
+#if OUTPUT_DIMS == 5
      const unsigned x = get_global_id(0);
      const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
      const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
@@ -82,12 +79,7 @@ KERNEL(activation)(
  #endif
  #endif
  
-#if GRADIENT
-    const unsigned src_grad_index = GET_INDEX(INPUT,0,ORDER);
-    const unsigned src_index = GET_INDEX(INPUT,1,ORDER);
-#else
      const unsigned src_index = GET_INDEX(INPUT,0,ORDER);
-#endif
      const unsigned dst_index = GET_INDEX(OUTPUT,,ORDER);
  
  #if defined PARAMETERIZED
@@ -105,16 +97,20 @@ KERNEL(activation)(
      #endif
      #define PARAMETERIZED_ACTIVATION_PARAMS NL_M_PARAMETERIZED, NL_N_PARAMETERIZED
  
-    #if GRADIENT
-        output_grad[dst_index] = ACTIVATION(input_grad[src_grad_index], input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+    INPUT0_TYPE dst = ACTIVATION_KERNEL(input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+    #if HAS_FUSED_OPS
+        FUSED_OPS;
+        output[dst_index] = FUSED_OPS_RESULT;
      #else
-        output[dst_index] = ACTIVATION(input[src_index], PARAMETERIZED_ACTIVATION_PARAMS);
+        output[dst_index] = dst;
      #endif
  #else
-    #if GRADIENT
-        output_grad[dst_index] = ACTIVATION(input_grad[src_grad_index], input[src_index], ACTIVATION_PARAMS);
+    INPUT0_TYPE dst = ACTIVATION_KERNEL(input[src_index], ACTIVATION_PARAMS);
+    #if HAS_FUSED_OPS
+        FUSED_OPS;
+        output[dst_index] = FUSED_OPS_RESULT;
      #else
-        output[dst_index] = ACTIVATION(input[src_index], ACTIVATION_PARAMS);
+        output[dst_index] = dst;
      #endif
  #endif
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl

index 302d75c..7c6b06d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl
@@ -112,9 +112,9 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
          __attribute__((opencl_unroll_hint(INPUT_BLOCK_SIZE)))
          for (int i = 0; i < INPUT_BLOCK_SIZE; i++)
          {
-            const uint in_elem = i * SUB_GROUP_SIZE + lid;
-            const uint xb = in_elem % INPUT_LINE_SIZE;
-            const uint yb = in_elem / INPUT_LINE_SIZE;
+            const int in_elem = i * SUB_GROUP_SIZE + lid;
+            const int xb = in_elem % INPUT_LINE_SIZE;
+            const int yb = in_elem / INPUT_LINE_SIZE;
              if (input_y + yb >= 0 && input_y + yb < INPUT0_SIZE_Y &&
                  input_x + xb >= 0 && input_x + xb < INPUT0_SIZE_X)
                  line_cache[ic * INPUT_BLOCK_SIZE + i] = input[input_offset +
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl

index fe1c406..b30b27a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_bfyx_to_b_fs_yx_fsv32.cl
@@ -292,7 +292,7 @@ KERNEL(convolution_mmad_bfyx_to_b_fs_yx_fsv32)(
  #else // OUTPUT_FEATURE_NUM > 16
          const uint dst_index = OUTPUT_GET_INDEX(b, fg*OSV + lid, y, x+i);
          if (x + i < OUTPUT_SIZE_X && fg*OSV + lid < OUTPUT_FEATURE_NUM) {
-            output[dst_index] = dst[ofm][i];
+            output[dst_index] = dst[0][i];
          }
  #endif // OUTPUT_FEATURE_NUM > 16
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_b_fs_zyx_fsv16_dw.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_b_fs_zyx_fsv16_dw.cl

index edb3795..9b4408a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_b_fs_zyx_fsv16_dw.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_b_fs_zyx_fsv16_dw.cl
@@ -15,19 +15,58 @@
  *******************************************************************************/
  
  #include "include/include_all.cl"
-#include "include/unit_type.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
  
  #define unroll_for __attribute__((opencl_unroll_hint)) for
  #define FEATURE_SLICE_SIZE 16
+
  #if X_BLOCK_SIZE == 1
-    #define BLOCK_TYPE UNIT_TYPE
-    #define DST_VAR dst
+    #define GET_VEC_ELEM(var, idx) var
+#else
+    #define GET_VEC_ELEM(var, idx) var[idx]
+#endif
+
+#define ACCUMULATOR_BLOCK_TYPE      MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, X_BLOCK_SIZE)
+#define ACTIVATION_BLOCK_TYPE       MAKE_VECTOR_TYPE(ACTIVATION_TYPE, X_BLOCK_SIZE)
+#define OUTPUT_BLOCK_TYPE           MAKE_VECTOR_TYPE(OUTPUT_TYPE, X_BLOCK_SIZE)
+
+#define TO_ACTIVATION_BLOCK_TYPE(x)     CAT(convert_, ACTIVATION_BLOCK_TYPE)(x)
+#define TO_OUTPUT_BLOCK_TYPE(x)         CAT(convert_, OUTPUT_BLOCK_TYPE)(x)
+
+#if FILTER_TYPE_SIZE == 1
+DECLARE_READ_BLOCK_16(load_weights, FILTER_TYPE)
+#elif FILTER_TYPE_SIZE == 2
+DECLARE_READ_BLOCK_8(load_weights, FILTER_TYPE)
+#else
+DECLARE_READ_BLOCK_4(load_weights, FILTER_TYPE)
+#endif
+
+#if OUTPUT_TYPE_SIZE == 1
+DECLARE_STORE_BLOCK_16(store_output, OUTPUT_TYPE)
  #else
-    #define BLOCK_TYPE CAT(UNIT_TYPE, X_BLOCK_SIZE)
-    #define DST_VAR dst[x_block]
+DECLARE_STORE_BLOCK_8(store_output, OUTPUT_TYPE)
+#endif
+
+#if PRELOAD_INPUT_LINE
+#   if INPUT0_TYPE_SIZE
+DECLARE_READ_BLOCK_16(preload_input, INPUT0_TYPE)
+#   else
+DECLARE_READ_BLOCK_8(preload_input, INPUT0_TYPE)
+#   endif
+#endif
+
+#if PRELOAD_WEIGHTS_LINE
+#   if FILTER_TYPE_SIZE == 1
+DECLARE_READ_BLOCK_16(preload_weights, FILTER_TYPE)
+#   else
+DECLARE_READ_BLOCK_8(preload_weights, FILTER_TYPE)
+#   endif
  #endif
  
  __attribute__((intel_reqd_sub_group_size(FEATURE_SLICE_SIZE))) // attr:no-format
+__attribute__((reqd_work_group_size(1, FEATURE_SLICE_SIZE, 1)))
  KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
          const  __global INPUT0_TYPE *input,
          __global OUTPUT_TYPE *output,
@@ -35,21 +74,26 @@ KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
  #if BIAS_TERM
          const __global BIAS_TYPE *bias,
  #endif
-        uint split_idx)
+#if HAS_FUSED_OPS_DECLS
+        FUSED_OPS_DECLS,
+#endif
+        uint split_idx
+        )
  {
      const uint zyx = (uint)get_global_id(0);
-    const uint x = (zyx % (OUTPUT_SIZE_X / X_BLOCK_SIZE)) * X_BLOCK_SIZE;
-#if INPUT0_LAYOUT_B_FS_YX_FSV16
-    const uint y = zyx / (OUTPUT_SIZE_X / X_BLOCK_SIZE);
+    const uint x = (zyx % (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE))) * X_BLOCK_SIZE;
+#if OUTPUT_DIMS <= 4
+    const uint y = zyx / (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE));
      const uint z = 0;
  #else
-    const uint zy = zyx / (OUTPUT_SIZE_X / X_BLOCK_SIZE);
+    const uint zy = zyx / (CEIL_DIV(OUTPUT_SIZE_X, X_BLOCK_SIZE));
      const uint y = zy % OUTPUT_SIZE_Y;
      const uint z = zy / OUTPUT_SIZE_Y;
  #endif
      const uint f_block = get_group_id(1);
      const uint sglid = get_sub_group_local_id();
-    const uint f = f_block * FEATURE_SLICE_SIZE + sglid;
+    const uint fg = f_block * FEATURE_SLICE_SIZE;
+    const uint f = fg + sglid;
      const uint b = (uint)get_global_id(2);
  
      const int input_x = x + PADDING_SIZE_X - (FILTER_SIZE_X - 1);
@@ -75,27 +119,87 @@ KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
  
      const uint filter_offset = f_block * FEATURE_SLICE_SIZE * FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z;
  
-#if BIAS_TERM
-    BLOCK_TYPE dst = (BLOCK_TYPE)(UNIT_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
+#if BIAS_TERM && ACCUMULATOR_IS_FP
+    ACCUMULATOR_BLOCK_TYPE dst = (ACCUMULATOR_BLOCK_TYPE)(DT_BIAS_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
  #else
-    BLOCK_TYPE dst = (BLOCK_TYPE)(UNIT_VAL_ZERO);
+    ACCUMULATOR_BLOCK_TYPE dst = (ACCUMULATOR_BLOCK_TYPE)(ACCUMULATOR_VAL_ZERO);
  #endif
  
-    UNIT_TYPE wei[FILTER_SIZE_Z * FILTER_SIZE_Y * FILTER_SIZE_X];
+#if PRELOAD_WEIGHTS
+    FILTER_TYPE wei[FILTER_SIZE_Z * FILTER_SIZE_Y * FILTER_SIZE_X];
+
+    FUNC_CALL(load_weights)(weights, filter_offset, FILTER_SIZE_X * FILTER_SIZE_Y * FILTER_SIZE_Z, wei);
+#endif
+
+    INPUT0_TYPE src_val = INPUT0_VAL_ZERO;
+
+#if PRELOAD_INPUT_LINE
+    int first_input_x = input_x;
+    if (first_input_x % STRIDE_SIZE_X != 0) {
+        if (first_input_x >= 0)
+            first_input_x = ALIGN(first_input_x, STRIDE_SIZE_X);
+        else
+            first_input_x = first_input_x / STRIDE_SIZE_X * STRIDE_SIZE_X;
+    }
+    first_input_x = first_input_x / STRIDE_SIZE_X;
  
      unroll_for (uint k_z = 0; k_z < FILTER_SIZE_Z; k_z++) {
+        const int input_offset_z = input_z + k_z;
+        const bool zero_z = (input_offset_z >= INPUT0_SIZE_Z * STRIDE_SIZE_Z) || (input_offset_z < 0) || ((input_offset_z % STRIDE_SIZE_Z) != 0);
          unroll_for (uint k_y = 0; k_y < FILTER_SIZE_Y; k_y++) {
-            unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
-                const uint wei_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
-                wei[wei_idx] = UNIT_BLOCK_READ(weights, filter_offset + k_z * FILTER_Z_PITCH * FEATURE_SLICE_SIZE
-                                                                      + k_y * FILTER_Y_PITCH * FEATURE_SLICE_SIZE
-                                                                      + k_x * FEATURE_SLICE_SIZE);
-            }
-        }
-    }
+            const int input_offset_y = input_y + k_y;
+            const bool zero_y = (input_offset_y >= INPUT0_SIZE_Y * STRIDE_SIZE_Y) || (input_offset_y < 0) || ((input_offset_y % STRIDE_SIZE_Y) != 0);
+            if (!zero_y && !zero_z) {
+                INPUT0_TYPE input_line[INPUT_BLOCK_SIZE_X] = { };
+                uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
+                uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
+                uint preload_input_offset = input_offset + fixed_input_offset_z * input_z_pitch +
+                                                           fixed_input_offset_y * input_y_pitch;
  
-    UNIT_TYPE src_val = UNIT_VAL_ZERO;
+                if (first_input_x >= 0) {
+                    FUNC_CALL(preload_input)(input, preload_input_offset + first_input_x * input_x_pitch, INPUT_BLOCK_SIZE_X, input_line);
+                } else {
+                    unroll_for (uint xi = 0; xi < INPUT_BLOCK_SIZE_X; ++xi) {
+                        if (first_input_x + xi >= 0) {
+                            input_line[xi] = DT_INPUT_BLOCK_READ(input, preload_input_offset + first_input_x * input_x_pitch + xi * input_x_pitch);
+                        } else {
+                            input_line[xi] = 0;
+                        }
+                    }
+                }
+
+#if PRELOAD_WEIGHTS_LINE
+            FILTER_TYPE wei[FILTER_SIZE_X] = { };
+            FUNC_CALL(preload_weights)(weights,
+                                       filter_offset + (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH * FEATURE_SLICE_SIZE
+                                                     + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH * FEATURE_SLICE_SIZE,
+                                       FILTER_SIZE_X,
+                                       wei);
+#endif
  
+                unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
+#   if PRELOAD_WEIGHTS
+                    const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
+                    FILTER_TYPE wei_val = wei[in_idx];
+#   elif PRELOAD_WEIGHTS_LINE
+                    FILTER_TYPE wei_val = wei[(FILTER_SIZE_X - k_x - 1)];
+#   else
+                    const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
+                    FILTER_TYPE wei_val = DT_FILTER_BLOCK_READ(weights, filter_offset + in_idx * FEATURE_SLICE_SIZE);
+#   endif
+                    unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
+                        const int input_offset_x = input_x + k_x + x_block;
+                        const bool zero_x = (input_offset_x >= INPUT0_SIZE_X * STRIDE_SIZE_X) || (input_offset_x < 0) || ((input_offset_x % STRIDE_SIZE_X) != 0);
+                        if (!zero_x) {
+                            src_val = input_line[(x_block + k_x) / STRIDE_SIZE_X];
+                            GET_VEC_ELEM(dst, x_block) += src_val * wei_val;
+                        }  // if !zero_x
+                    }  // for X_BLOCK_SIZE
+                }  // for FILTER_SIZE_X
+            }  // if !zero_y && !zero_z
+        }  // for FILTER_SIZE_Y
+    }  // for FILTER_SIZE_Z
+#else
      unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
          unroll_for (uint k_z = 0; k_z < FILTER_SIZE_Z; k_z++) {
              const int input_offset_z = input_z + k_z;
@@ -106,24 +210,41 @@ KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
                  unroll_for (uint k_x = 0; k_x < FILTER_SIZE_X; k_x++) {
                      const int input_offset_x = input_x + k_x + x_block;
                      const bool zero_x = (input_offset_x >= INPUT0_SIZE_X * STRIDE_SIZE_X) || (input_offset_x < 0) || ((input_offset_x % STRIDE_SIZE_X) != 0);
-                    const uint in_idx = k_z * FILTER_Z_PITCH + k_y * FILTER_Y_PITCH + k_x;
+                    const uint in_idx = (FILTER_SIZE_Z - k_z - 1) * FILTER_Z_PITCH + (FILTER_SIZE_Y - k_y - 1) * FILTER_Y_PITCH + (FILTER_SIZE_X - k_x - 1);
                      if (!zero_z && !zero_y && !zero_x) {
                          uint fixed_input_offset_x = (uint)input_offset_x / STRIDE_SIZE_X;
                          uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                          uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
  
-                        src_val = UNIT_BLOCK_READ(input, input_offset +
-                                                         fixed_input_offset_z * input_z_pitch +
-                                                         fixed_input_offset_y * input_y_pitch +
-                                                         fixed_input_offset_x * input_x_pitch);
-                        DST_VAR = mad(src_val, wei[in_idx], DST_VAR);
-                    }
-                }
-            }
-        }
-    }
+                        src_val = DT_INPUT_BLOCK_READ(input, input_offset +
+                                                             fixed_input_offset_z * input_z_pitch +
+                                                             fixed_input_offset_y * input_y_pitch +
+                                                             fixed_input_offset_x * input_x_pitch);
+#   if PRELOAD_WEIGHTS
+                        FILTER_TYPE wei_val = wei[in_idx];
+#   else
+                        FILTER_TYPE wei_val = DT_FILTER_BLOCK_READ(weights, filter_offset + in_idx * FEATURE_SLICE_SIZE);
+#   endif
+                        GET_VEC_ELEM(dst, x_block) += src_val * wei_val;
+                    }  // if !zero_z && !zero_y && !zero_x
+                }  // for FILTER_SIZE_X
+            }  // for FILTER_SIZE_Y
+        }  // for FILTER_SIZE_Z
+    }  // for X_BLOCK_SIZE
+#endif
  
-    dst = ACTIVATION(dst, ACTIVATION_PARAMS);
+    ACTIVATION_BLOCK_TYPE dequantized = TO_ACTIVATION_BLOCK_TYPE(dst);
+#if BIAS_TERM && !ACCUMULATOR_IS_FP
+    dequantized += TO_ACTIVATION_TYPE(DT_BIAS_BLOCK_READ(bias, f_block * FEATURE_SLICE_SIZE));
+#endif
+
+    OUTPUT_BLOCK_TYPE result;
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    result = FUSED_OPS_RESULT;
+#else
+    result = TO_OUTPUT_BLOCK_TYPE(ACTIVATION(dequantized, ACTIVATION_PARAMS));
+#endif
  
      const uint output_x_pitch = FEATURE_SLICE_SIZE;
      const uint output_y_pitch = output_x_pitch * (OUTPUT_PAD_BEFORE_SIZE_X +  OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X);
@@ -144,15 +265,32 @@ KERNEL(deconvolution_gpu_b_fs_zyx_fsv16_dw)(
      if ((f_block + 1) * FEATURE_SLICE_SIZE >= OUTPUT_FEATURE_NUM)
      {
          unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
+            if (OUTPUT_SIZE_X % X_BLOCK_SIZE != 0 && x + X_BLOCK_SIZE >= OUTPUT_SIZE_X && x_block >= OUTPUT_SIZE_X % X_BLOCK_SIZE)
+                break;
              if (f_block * FEATURE_SLICE_SIZE + sglid < OUTPUT_FEATURE_NUM)
-                output[output_offset + (x + x_block) * output_x_pitch + sglid] = DST_VAR;
+                output[output_offset + (x + x_block) * output_x_pitch + sglid] = GET_VEC_ELEM(result, x_block);
          }
      }
      else
  #endif //  OUTPUT_LEFTOVERS
+#if OUTPUT_SIZE_X % X_BLOCK_SIZE != 0
+    if (x + X_BLOCK_SIZE >= OUTPUT_SIZE_X) {
+        FUNC_CALL(store_output)(output, output_offset + x * output_x_pitch, OUTPUT_SIZE_X % X_BLOCK_SIZE, (OUTPUT_TYPE *)&result);
+    } else
+#endif
      {
-        unroll_for (uint x_block = 0; x_block < X_BLOCK_SIZE; x_block++) {
-            UNIT_BLOCK_WRITE(output, output_offset + (x + x_block) * output_x_pitch, DST_VAR);
-        }
+        FUNC_CALL(store_output)(output, output_offset + x * output_x_pitch, X_BLOCK_SIZE, (OUTPUT_TYPE *)&result);
      }
  }
+
+#undef unroll_for
+#undef FEATURE_SLICE_SIZE
+
+#undef GET_VEC_ELEM
+
+#undef ACCUMULATOR_BLOCK_TYPE
+#undef ACTIVATION_BLOCK_TYPE
+#undef OUTPUT_BLOCK_TYPE
+
+#undef TO_ACTIVATION_BLOCK_TYPE
+#undef TO_OUTPUT_BLOCK_TYPE
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl

index d34ef72..c36cb5a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl
@@ -24,13 +24,13 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
  #if BIAS_TERM
      const __global BIAS_TYPE* bias,
  #endif
-    uint split_idx
-#if FUSED_ELTWISE
-       , const __global UNIT_TYPE* fuse_input
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
  #endif
-       )
+    uint split_idx
+    )
  {
-    UNIT_TYPE result = UNIT_VAL_ZERO;
+    ACCUMULATOR_TYPE acc = ACCUMULATOR_VAL_ZERO;
  
      const uint b_f          = get_global_id(2);
      const uint batch_offset = b_f / OUTPUT_FEATURE_NUM;
@@ -93,7 +93,7 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
                      uint filter_idx = filter_offset + of*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                      for (uint h = 0; h < FILTER_OFM_NUM; h++)
                      {
-                        result = fma(input[input_idx], filter[filter_idx], result);
+                        acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
                          filter_idx += FILTER_OFM_PITCH;
                          input_idx += INPUT0_FEATURE_PITCH;
                      }
@@ -101,7 +101,7 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
                      uint filter_idx = filter_offset + of*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                      for (uint h = 0; h < FILTER_IFM_NUM; h++)
                      {
-                        result = fma(input[input_idx], filter[filter_idx], result);
+                        acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
                          filter_idx += FILTER_IFM_PITCH;
                          input_idx += INPUT0_FEATURE_PITCH;
                      }
@@ -110,20 +110,21 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
              }
          }
      }
+
+    ACTIVATION_TYPE result = TO_ACTIVATION_TYPE(acc);
  #if BIAS_TERM
      result += bias[ofm_offset];
  #endif
+    result = ACTIVATION(result, ACTIVATION_PARAMS);
+
      const uint out_split_offset = g * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
      const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + of*OUTPUT_FEATURE_PITCH + id_y*OUTPUT_Y_PITCH + id_x*OUTPUT_X_PITCH;
-#if FUSED_ELTWISE
-    const uint fused_index = INPUT1_OFFSET + g * INPUT1_FEATURE_PITCH * FILTER_OFM_NUM + batch_offset*INPUT1_BATCH_PITCH + of*INPUT1_FEATURE_PITCH + id_y*INPUT1_Y_PITCH + id_x*INPUT1_X_PITCH;
-#if !GRADIENT
-       output[dst_index] = ACTIVATION(result + fuse_input[fused_index], ACTIVATION_PARAMS);
-#else
-       output[dst_index] = result + fuse_input[fused_index];
-#endif
+
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    output[dst_index] = FUSED_OPS_RESULT;
  #else
-    output[dst_index] = ACTIVATION(result, ACTIVATION_PARAMS);
+    output[dst_index] = TO_OUTPUT_TYPE(result);
  #endif
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_along_f_tile_bfx.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_along_f_tile_bfx.cl

new file mode 100644 (file)

index 0000000..7bc15bd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_along_f_tile_bfx.cl
@@ -0,0 +1,297 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
+
+DECLARE_LOAD_CONTINOUS_4(load_input_ui, uint)
+DECLARE_LOAD_CONTINOUS_4(load_weights_ui, uint)
+
+#if OUTPUT_TYPE_SIZE == 1
+DECLARE_STORE_BLOCK_16(store_output, OUTPUT_TYPE)
+#elif OUTPUT_TYPE_SIZE == 2
+DECLARE_STORE_BLOCK_8(store_output, OUTPUT_TYPE)
+#else
+DECLARE_STORE_BLOCK_4(store_output, OUTPUT_TYPE)
+#endif
+
+#define FILTER_TYPE4 MAKE_VECTOR_TYPE(FILTER_TYPE, 4)
+#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+
+#define AS_FILTER_TYPE4 CAT(as_, FILTER_TYPE4)
+#define AS_INPUT_TYPE4 CAT(as_, INPUT_TYPE4)
+
+#define WEIGHTS_GET_INDEX(g, o, i, z, y, x)     GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(FILTER, g, o, i, z, y, x, (SIMD * TILE_OFM), TILE_IFM)
+#define WEIGHTS_TILE_IFM_PITCH                  (TILE_IFM * SIMD * TILE_OFM)
+#define WEIGHTS_IN_TILE_OFM_PITCH               (TILE_IFM * SIMD)
+
+__attribute__((reqd_work_group_size(1, SIMD, 1)))
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+KERNEL(deconvolution_gpu_imad_ref)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* restrict output,
+    const __global FILTER_TYPE* weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE* bias,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx
+) {
+    const __global uint* input_ui = (const __global uint*)input;
+    const __global uint* weights_ui = (const __global uint*)weights;
+
+    uint out_b = get_global_id(2) * TILE_B;
+    uint out_fg = get_group_id(1) * SIMD * TILE_OFM;
+    uint out_f = out_fg + get_sub_group_local_id();
+    uint out_x = (uint)get_global_id(0) * TILE_X % ALIGN(OUTPUT_SIZE_X, TILE_X);
+#if OUTPUT_DIMS <= 4
+    uint out_y = (uint)get_global_id(0) / CEIL_DIV(OUTPUT_SIZE_X, TILE_X);
+    uint out_z = 0;
+#elif OUTPUT_DIMS == 5
+    uint out_y = (uint)get_global_id(0) / CEIL_DIV(OUTPUT_SIZE_X, TILE_X) % OUTPUT_SIZE_Y;
+    uint out_z = (uint)get_global_id(0) / (CEIL_DIV(OUTPUT_SIZE_X, TILE_X) * OUTPUT_SIZE_Y);
+#endif
+    const uint sglid = get_sub_group_local_id();
+
+#if GROUPED
+    uint group = out_fg / FILTER_OFM_NUM;
+    uint ofm = out_fg % FILTER_OFM_NUM + sglid;
+#else
+    uint group = 0;
+    uint ofm = out_f;
+#endif
+    uint if_start = group * FILTER_IFM_NUM;
+
+    int in_x_start = (int)out_x + (PADDING_SIZE_X - FILTER_SIZE_X + 1);
+    int in_y_start = (int)out_y + (PADDING_SIZE_Y - FILTER_SIZE_Y + 1);
+    int in_z_start = (int)out_z + (PADDING_SIZE_Z - FILTER_SIZE_Z + 1);
+
+    uint fy_start = 0;
+    uint fy_end = FILTER_SIZE_Y;
+    uint fy_inc = STRIDE_SIZE_Y;
+    if (in_y_start < 0)
+        fy_start = -in_y_start;
+    else if (in_y_start % STRIDE_SIZE_Y != 0)
+        fy_start = STRIDE_SIZE_Y - in_y_start % STRIDE_SIZE_Y;
+    if (in_y_start + FILTER_SIZE_Y - 1 >= INPUT0_SIZE_Y * STRIDE_SIZE_Y)
+        fy_end = INPUT0_SIZE_Y * STRIDE_SIZE_Y - in_y_start;
+
+    uint fz_start = 0;
+    uint fz_end = FILTER_SIZE_Z;
+    uint fz_inc = STRIDE_SIZE_Z;
+    if (in_z_start < 0)
+        fz_start = -in_z_start;
+    else if (in_z_start % STRIDE_SIZE_Z != 0)
+        fz_start = STRIDE_SIZE_Z - in_z_start % STRIDE_SIZE_Z;
+    if (in_z_start + FILTER_SIZE_Z - 1 >= INPUT0_SIZE_Z * STRIDE_SIZE_Z)
+        fz_end = INPUT0_SIZE_Z * STRIDE_SIZE_Z - in_z_start;
+
+    ACCUMULATOR_TYPE acc[TILE_B][TILE_OFM][TILE_X] = { };
+    uint in[TILE_B][TILE_IFM / 4];
+    uint wei[TILE_OFM][TILE_IFM / 4];
+
+    for (uint fz = fz_start; fz < fz_end; fz += fz_inc) {
+        int in_z = in_z_start + fz;
+        uint fixed_in_z = in_z / STRIDE_SIZE_Z;
+
+        for (uint fy = fy_start; fy < fy_end; fy += fy_inc) {
+            int in_y = in_y_start + fy;
+            uint fixed_in_y = in_y / STRIDE_SIZE_Y;
+
+            for (uint fx = 0; fx < FILTER_SIZE_X; fx += 1) {
+                int in_x = in_x_start + fx + ((TILE_X == SIMD || sglid < TILE_X) ? sglid : 0);
+                bool zero_x = false;
+                zero_x |= in_x < 0;
+                zero_x |= in_x >= INPUT0_SIZE_X * STRIDE_SIZE_X;
+                zero_x |= in_x % STRIDE_SIZE_X != 0;
+                in_x = max(in_x, 0);
+                in_x = min(in_x, INPUT0_SIZE_X * STRIDE_SIZE_X);
+                uint fixed_in_x = in_x / STRIDE_SIZE_X;
+
+                uint weights_offset = WEIGHTS_GET_INDEX(group, ofm, 0, FILTER_SIZE_Z - fz - 1, FILTER_SIZE_Y - fy - 1, FILTER_SIZE_X - fx - 1) / 4;
+
+#if INPUT_VALID_TILE_IFM_PITCH
+#   if OUTPUT_DIMS <= 4
+                uint input_offset = INPUT0_GET_INDEX(out_b, if_start, fixed_in_y, fixed_in_x) / 4;
+#   elif OUTPUT_DIMS == 5
+                uint input_offset = INPUT0_GET_INDEX(out_b, if_start, fixed_in_z, fixed_in_y, fixed_in_x) / 4;
+#   endif
+#endif
+
+                for (uint fi = 0; fi < FILTER_IFM_NUM; fi += TILE_IFM) {
+                    // Load weights [TILE_OFM, TILE_IFM, 1, 1]
+                    __attribute__((opencl_unroll_hint))
+                    for (uint of = 0; of < TILE_OFM; ++of) {
+                        uint weights_idx = weights_offset + of * WEIGHTS_IN_TILE_OFM_PITCH / 4;
+                        FUNC_CALL(load_weights_ui)(weights_ui, weights_idx, TILE_IFM / 4, wei[of]);
+                    }
+                    weights_offset += WEIGHTS_TILE_IFM_PITCH / 4;
+
+                    // Load input [TILE_B, TILE_IFM, 1, 1]
+#if !INPUT_VALID_TILE_IFM_PITCH
+#   if OUTPUT_DIMS <= 4
+                    uint input_offset = INPUT0_GET_INDEX(out_b, if_start + fi, fixed_in_y, fixed_in_x) / 4;
+#   elif OUTPUT_DIMS == 5
+                    uint input_offset = INPUT0_GET_INDEX(out_b, if_start + fi, fixed_in_z, fixed_in_y, fixed_in_x) / 4;
+#   endif
+#endif
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ob = 0; ob < TILE_B; ++ob) {
+                        uint input_idx = input_offset + ob * INPUT_IN_TILE_B_PITCH / 4;
+                        FUNC_CALL(load_input_ui)(input_ui, input_idx, TILE_IFM / 4, in[ob]);
+                    }
+#if INPUT_VALID_TILE_IFM_PITCH
+                    input_offset += INPUT_TILE_IFM_PITCH / 4;
+#endif
+                    if (zero_x) {
+                        __attribute__((opencl_unroll_hint))
+                        for (uint ob = 0; ob < TILE_B; ++ob) {
+                            __attribute__((opencl_unroll_hint))
+                            for (uint ifp = 0; ifp < TILE_IFM / 4; ++ifp) {
+                                in[ob][ifp] = 0;
+                            }
+                        }
+                    }
+
+                    __attribute__((opencl_unroll_hint))
+                    for (uint ob = 0; ob < TILE_B; ++ob) {
+                        __attribute__((opencl_unroll_hint))
+                        for (uint of = 0; of < TILE_OFM; ++of) {
+                            __attribute__((opencl_unroll_hint))
+                            for (uint tx = 0; tx < TILE_X; ++tx) {
+                                __attribute__((opencl_unroll_hint))
+                                for (uint imad_it = 0; imad_it < TILE_IFM / 4; ++imad_it) {
+                                    uint in_val = intel_sub_group_shuffle(in[ob][imad_it], tx);
+                                    acc[ob][of][tx] = IMAD(acc[ob][of][tx], AS_INPUT_TYPE4(in_val), AS_FILTER_TYPE4(wei[of][imad_it]));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    ACTIVATION_TYPE dequantized[TILE_B][TILE_OFM][TILE_X];
+    __attribute__((opencl_unroll_hint))
+    for (uint ob = 0; ob < TILE_B; ++ob) {
+        __attribute__((opencl_unroll_hint))
+        for (uint of = 0; of < TILE_OFM; ++of) {
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+                dequantized[ob][of][tx] = TO_ACTIVATION_TYPE(acc[ob][of][tx]);
+            }
+        }
+    }
+
+#if BIAS_TERM
+    __attribute__((opencl_unroll_hint))
+    for (uint of = 0; of < TILE_OFM; ++of) {
+        BIAS_TYPE bias_val = bias[out_f + of * SIMD];
+        __attribute__((opencl_unroll_hint))
+        for (uint ob = 0; ob < TILE_B; ++ob) {
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+                dequantized[ob][of][tx] += TO_ACTIVATION_TYPE(bias_val);
+            }
+        }
+    }
+#endif
+
+    OUTPUT_TYPE result[TILE_B][TILE_OFM][TILE_X];
+    __attribute__((opencl_unroll_hint))
+    for (uint of = 0; of < TILE_OFM; ++of) {
+#if FUSED_OPS_CAN_USE_PRELOAD
+        FUSED_OPS_PRELOAD;
+#endif
+        __attribute__((opencl_unroll_hint))
+        for (uint ob = 0; ob < TILE_B; ++ob) {
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+#if HAS_FUSED_OPS
+#   if FUSED_OPS_CAN_USE_PRELOAD
+                FUSED_OPS_CALC;
+#   else
+                FUSED_OPS;
+#   endif
+                result[ob][of][tx] = FUSED_OPS_RESULT;
+#else
+                result[ob][of][tx] = TO_OUTPUT_TYPE(dequantized[ob][of][tx]);
+#endif
+            }
+        }
+    }
+
+    bool leftovers_x = OUTPUT_SIZE_X % TILE_X != 0 && out_x + TILE_X >= OUTPUT_SIZE_X;
+    bool leftovers_f = OUTPUT_FEATURE_NUM % SIMD != 0 && out_f + SIMD >= OUTPUT_FEATURE_NUM;
+
+#if OUTPUT_NAIVE_STORE
+    __attribute__((opencl_unroll_hint))
+    for (uint ob = 0; ob < TILE_B; ++ob) {
+        __attribute__((opencl_unroll_hint))
+        for (uint of = 0; of < TILE_OFM; ++of) {
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+                if ((leftovers_x && tx >= OUTPUT_SIZE_X % TILE_X) ||
+                    (leftovers_f && out_f + of * SIMD >= OUTPUT_FEATURE_NUM))
+                    break;
+#if OUTPUT_DIMS <= 4
+                uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_f + of * SIMD, out_y, out_x + tx);
+#elif OUTPUT_DIMS == 5
+                uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_f + of * SIMD, out_z, out_y, out_x + tx);
+#endif
+                output[output_idx] = result[ob][of][tx];
+            }
+        }
+    }
+#elif OUTPUT_BLOCK_X_STORE
+    __attribute__((opencl_unroll_hint))
+    for (uint ob = 0; ob < TILE_B; ++ob) {
+        __attribute__((opencl_unroll_hint))
+        for (uint of = 0; of < TILE_OFM; ++of) {
+#if OUTPUT_DIMS <= 4
+            uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_fg + of * SIMD, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+            uint output_idx = OUTPUT_GET_INDEX(out_b + ob, out_fg + of * SIMD, out_z, out_y, out_x);
+#endif
+            if (!leftovers_x && !leftovers_f) {
+                FUNC_CALL(store_output)(output, output_idx, TILE_X, result[ob][of]);
+            } else if (!leftovers_f) {
+                FUNC_CALL(store_output)(output, output_idx, OUTPUT_SIZE_X % TILE_X, result[ob][of]);
+            } else {
+                __attribute__((opencl_unroll_hint))
+                for (uint tx = 0; tx < TILE_X; ++tx) {
+                    if (out_f + of * SIMD < OUTPUT_FEATURE_NUM && out_x + tx < OUTPUT_SIZE_X) {
+                        output[output_idx + sglid + tx * SIMD] = result[ob][of][tx];
+                    }
+                }
+            }
+        }
+    }
+#endif
+}
+
+#undef FILTER_TYPE4
+#undef INPUT_TYPE4
+#undef AS_FILTER_TYPE4
+#undef AS_INPUT_TYPE4
+
+#undef WEIGHTS_GET_INDEX
+#undef WEIGHTS_TILE_IFM_PITCH
+#undef WEIGHTS_IN_TILE_OFM_PITCH
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_common.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_common.cl

new file mode 100644 (file)

index 0000000..902b7be
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_common.cl
@@ -0,0 +1,245 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/data_types.cl"
+
+#define CEIL_DIV(a, b) (((a) + ((b) - 1)) / (b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+#define VEC_TO_ARR_1(var, arr, idx)             \
+    arr[idx] = var
+#define VEC_TO_ARR_2(vec, arr, idx)             \
+    VEC_TO_ARR_1((vec).lo, arr, idx);           \
+    VEC_TO_ARR_1((vec).hi, arr, (idx) + 1)
+#define VEC_TO_ARR_4(vec, arr, idx)             \
+    VEC_TO_ARR_2((vec).lo, arr, idx);           \
+    VEC_TO_ARR_2((vec).hi, arr, (idx) + 2)
+#define VEC_TO_ARR_8(vec, arr, idx)             \
+    VEC_TO_ARR_4((vec).lo, arr, idx);           \
+    VEC_TO_ARR_4((vec).hi, arr, (idx) + 4)
+#define VEC_TO_ARR_16(vec, arr, idx)            \
+    VEC_TO_ARR_8((vec).lo, arr, idx);           \
+    VEC_TO_ARR_8((vec).hi, arr, (idx) + 8)
+
+#define ARR_TO_VEC_1(arr, var, idx)             \
+    var = arr[idx]
+#define ARR_TO_VEC_2(arr, vec, idx)             \
+    ARR_TO_VEC_1(arr, (vec).lo, idx);           \
+    ARR_TO_VEC_1(arr, (vec).hi, (idx) + 1)
+#define ARR_TO_VEC_4(arr, vec, idx)             \
+    ARR_TO_VEC_2(arr, (vec).lo, idx);           \
+    ARR_TO_VEC_2(arr, (vec).hi, (idx) + 2)
+#define ARR_TO_VEC_8(arr, vec, idx)             \
+    ARR_TO_VEC_4(arr, (vec).lo, idx);           \
+    ARR_TO_VEC_4(arr, (vec).hi, (idx) + 4)
+#define ARR_TO_VEC_16(arr, vec, idx)            \
+    ARR_TO_VEC_8(arr, (vec).lo, idx);           \
+    ARR_TO_VEC_8(arr, (vec).hi, (idx) + 8)
+
+#define DECLARE_LOAD_CONTINOUS_16(name, type)                                                                       \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) {                               \
+    uint i = 0;                                                                                                     \
+    for (; i + 16 <= size; i += 16) {                                                                               \
+        MAKE_VECTOR_TYPE(type, 16) tmp = ((const __global MAKE_VECTOR_TYPE(type, 16)*)(src + offset + i))[0];       \
+        VEC_TO_ARR_16(tmp, dst, i);                                                                                 \
+    }                                                                                                               \
+    if (size % 16 >= 8) {                                                                                           \
+        MAKE_VECTOR_TYPE(type, 8) tmp = ((const __global MAKE_VECTOR_TYPE(type, 8)*)(src + offset + i))[0];         \
+        VEC_TO_ARR_8(tmp, dst, i);                                                                                  \
+        i += 8;                                                                                                     \
+    }                                                                                                               \
+    if (size % 8 >= 4) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 4) tmp = ((const __global MAKE_VECTOR_TYPE(type, 4)*)(src + offset + i))[0];         \
+        VEC_TO_ARR_4(tmp, dst, i);                                                                                  \
+        i += 4;                                                                                                     \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp = ((const __global MAKE_VECTOR_TYPE(type, 2)*)(src + offset + i))[0];         \
+        VEC_TO_ARR_2(tmp, dst, i);                                                                                  \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        dst[i] = src[offset + i];                                                                                   \
+    }                                                                                                               \
+}
+
+#define DECLARE_LOAD_CONTINOUS_4(name, type)                                                                        \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) {                               \
+    uint i = 0;                                                                                                     \
+    for (; i + 4 <= size; i += 4) {                                                                                 \
+        MAKE_VECTOR_TYPE(type, 4) tmp = ((const __global MAKE_VECTOR_TYPE(type, 4)*)(src + offset + i))[0];         \
+        VEC_TO_ARR_4(tmp, dst, i);                                                                                  \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp = ((const __global MAKE_VECTOR_TYPE(type, 2)*)(src + offset + i))[0];         \
+        VEC_TO_ARR_2(tmp, dst, i);                                                                                  \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        dst[i] = src[offset + i];                                                                                   \
+    }                                                                                                               \
+}
+
+#define DECLARE_STORE_BLOCK_16(name, type)                                                                          \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) {                                     \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 16 <= size; i += 16) {                                                                               \
+        MAKE_VECTOR_TYPE(type, 16) tmp;                                                                             \
+        ARR_TO_VEC_16(src, tmp, i);                                                                                 \
+        BLOCK_WRITEN(type, 16, dst, offset + i * sg_size, tmp);                                                     \
+    }                                                                                                               \
+    if (size % 16 >= 8) {                                                                                           \
+        MAKE_VECTOR_TYPE(type, 8) tmp;                                                                              \
+        ARR_TO_VEC_8(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 8, dst, offset + i * sg_size, tmp);                                                      \
+        i += 8;                                                                                                     \
+    }                                                                                                               \
+    if (size % 8 >= 4) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 4) tmp;                                                                              \
+        ARR_TO_VEC_4(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp);                                                      \
+        i += 4;                                                                                                     \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp;                                                                              \
+        ARR_TO_VEC_2(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp);                                                      \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = src[i];                                                                                          \
+        BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp);                                                      \
+    }                                                                                                               \
+}
+
+#define DECLARE_STORE_BLOCK_8(name, type)                                                                           \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) {                                     \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 8 <= size; i += 8) {                                                                                 \
+        MAKE_VECTOR_TYPE(type, 8) tmp;                                                                              \
+        ARR_TO_VEC_8(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 8, dst, offset + i * sg_size, tmp);                                                      \
+    }                                                                                                               \
+    if (size % 8 >= 4) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 4) tmp;                                                                              \
+        ARR_TO_VEC_4(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp);                                                      \
+        i += 4;                                                                                                     \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp;                                                                              \
+        ARR_TO_VEC_2(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp);                                                      \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = src[i];                                                                                          \
+        BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp);                                                      \
+    }                                                                                                               \
+}
+
+#define DECLARE_STORE_BLOCK_4(name, type)                                                                           \
+inline void FUNC(name)(__global type* dst, uint offset, uint size, type* src) {                                     \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 4 <= size; i += 4) {                                                                                 \
+        MAKE_VECTOR_TYPE(type, 4) tmp;                                                                              \
+        ARR_TO_VEC_4(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 4, dst, offset + i * sg_size, tmp);                                                      \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp;                                                                              \
+        ARR_TO_VEC_2(src, tmp, i);                                                                                  \
+        BLOCK_WRITEN(type, 2, dst, offset + i * sg_size, tmp);                                                      \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = src[i];                                                                                          \
+        BLOCK_WRITEN(type, 1, dst, offset + i * sg_size, tmp);                                                      \
+    }                                                                                                               \
+}
+
+#define DECLARE_READ_BLOCK_16(name, type)                                                                           \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) {                               \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 16 <= size; i += 16) {                                                                               \
+        MAKE_VECTOR_TYPE(type, 16) tmp = BLOCK_READN(type, 16, src, offset + i * sg_size);                          \
+        VEC_TO_ARR_16(tmp, dst, i);                                                                                 \
+    }                                                                                                               \
+    if (size % 16 >= 8) {                                                                                           \
+        MAKE_VECTOR_TYPE(type, 8) tmp = BLOCK_READN(type, 8, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_8(tmp, dst, i);                                                                                  \
+        i += 8;                                                                                                     \
+    }                                                                                                               \
+    if (size % 8 >= 4) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_4(tmp, dst, i);                                                                                  \
+        i += 4;                                                                                                     \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_2(tmp, dst, i);                                                                                  \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size);                                                 \
+        dst[i] = tmp;                                                                                               \
+    }                                                                                                               \
+}
+
+#define DECLARE_READ_BLOCK_8(name, type)                                                                            \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) {                               \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 8 <= size; i += 8) {                                                                                 \
+        MAKE_VECTOR_TYPE(type, 8) tmp = BLOCK_READN(type, 8, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_8(tmp, dst, i);                                                                                  \
+    }                                                                                                               \
+    if (size % 8 >= 4) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_4(tmp, dst, i);                                                                                  \
+        i += 4;                                                                                                     \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_2(tmp, dst, i);                                                                                  \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size);                                                 \
+        dst[i] = tmp;                                                                                               \
+    }                                                                                                               \
+}
+
+#define DECLARE_READ_BLOCK_4(name, type)                                                                            \
+inline void FUNC(name)(const __global type* src, uint offset, uint size, type* dst) {                               \
+    uint i = 0;                                                                                                     \
+    const uint sg_size = get_max_sub_group_size();                                                                  \
+    for (; i + 4 <= size; i += 4) {                                                                                 \
+        MAKE_VECTOR_TYPE(type, 4) tmp = BLOCK_READN(type, 4, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_4(tmp, dst, i);                                                                                  \
+    }                                                                                                               \
+    if (size % 4 >= 2) {                                                                                            \
+        MAKE_VECTOR_TYPE(type, 2) tmp = BLOCK_READN(type, 2, src, offset + i * sg_size);                            \
+        VEC_TO_ARR_2(tmp, dst, i);                                                                                  \
+        i += 2;                                                                                                     \
+    }                                                                                                               \
+    if (size % 2 == 1) {                                                                                            \
+        type tmp = BLOCK_READN(type, 1, src, offset + i * sg_size);                                                 \
+        dst[i] = tmp;                                                                                               \
+    }                                                                                                               \
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_ref.cl

new file mode 100644 (file)

index 0000000..29f8865
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_imad_ref.cl
@@ -0,0 +1,179 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/data_types.cl"
+
+#include "deconvolution_gpu_imad_common.cl"
+
+DECLARE_LOAD_CONTINOUS_4(load_input_ui, uint)
+DECLARE_LOAD_CONTINOUS_4(load_weights_ui, uint)
+
+#define FILTER_TYPE4 MAKE_VECTOR_TYPE(FILTER_TYPE, 4)
+#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+
+#define AS_FILTER_TYPE4 CAT(as_, FILTER_TYPE4)
+#define AS_INPUT_TYPE4 CAT(as_, INPUT_TYPE4)
+
+#define WEIGHTS_GET_INDEX(g, o, i, z, y, x) GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(FILTER, g, o, i, z, y, x)
+
+KERNEL(deconvolution_gpu_imad_ref)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* restrict output,
+    const __global FILTER_TYPE* weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE* bias,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx
+) {
+    const __global uint* input_ui = (const __global uint*)input;
+    const __global uint* weights_ui = (const __global uint*)weights;
+
+    uint out_b = get_global_id(2);
+    uint out_f = get_global_id(0);
+    uint out_x = (uint)get_global_id(1) % OUTPUT_SIZE_X;
+#if OUTPUT_DIMS <= 4
+    uint out_y = (uint)get_global_id(1) / OUTPUT_SIZE_X;
+    uint out_z = 0;
+#elif OUTPUT_DIMS == 5
+    uint out_y = (uint)get_global_id(1) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+    uint out_z = (uint)get_global_id(1) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+#endif
+
+#if GROUPED
+    uint group = out_f / FILTER_OFM_NUM;
+    uint ofm = out_f % FILTER_OFM_NUM;
+#else
+    uint group = 0;
+    uint ofm = out_f;
+#endif
+    uint if_start = group * FILTER_IFM_NUM;
+
+    int in_x_start = (int)out_x + (PADDING_SIZE_X - FILTER_SIZE_X + 1);
+    int in_y_start = (int)out_y + (PADDING_SIZE_Y - FILTER_SIZE_Y + 1);
+    int in_z_start = (int)out_z + (PADDING_SIZE_Z - FILTER_SIZE_Z + 1);
+
+    uint fx_start = 0;
+    uint fx_end = FILTER_SIZE_X;
+    uint fx_inc = STRIDE_SIZE_X;
+    if (in_x_start < 0)
+        fx_start = -in_x_start;
+    else if (in_x_start % STRIDE_SIZE_X != 0)
+        fx_start = STRIDE_SIZE_X - in_x_start % STRIDE_SIZE_X;
+    if (in_x_start + FILTER_SIZE_X - 1 >= INPUT0_SIZE_X * STRIDE_SIZE_X)
+        fx_end = INPUT0_SIZE_X * STRIDE_SIZE_X - in_x_start;
+
+    uint fy_start = 0;
+    uint fy_end = FILTER_SIZE_Y;
+    uint fy_inc = STRIDE_SIZE_Y;
+    if (in_y_start < 0)
+        fy_start = -in_y_start;
+    else if (in_y_start % STRIDE_SIZE_Y != 0)
+        fy_start = STRIDE_SIZE_Y - in_y_start % STRIDE_SIZE_Y;
+    if (in_y_start + FILTER_SIZE_Y - 1 >= INPUT0_SIZE_Y * STRIDE_SIZE_Y)
+        fy_end = INPUT0_SIZE_Y * STRIDE_SIZE_Y - in_y_start;
+
+    uint fz_start = 0;
+    uint fz_end = FILTER_SIZE_Z;
+    uint fz_inc = STRIDE_SIZE_Z;
+    if (in_z_start < 0)
+        fz_start = -in_z_start;
+    else if (in_z_start % STRIDE_SIZE_Z != 0)
+        fz_start = STRIDE_SIZE_Z - in_z_start % STRIDE_SIZE_Z;
+    if (in_z_start + FILTER_SIZE_Z - 1 >= INPUT0_SIZE_Z * STRIDE_SIZE_Z)
+        fz_end = INPUT0_SIZE_Z * STRIDE_SIZE_Z - in_z_start;
+
+    ACCUMULATOR_TYPE acc = 0;
+    uint in[TILE_IFM / 4];
+    uint wei[TILE_IFM / 4];
+
+    for (uint fz = fz_start; fz < fz_end; fz += fz_inc) {
+        int in_z = in_z_start + fz;
+        uint fixed_in_z = in_z / STRIDE_SIZE_Z;
+
+        for (uint fy = fy_start; fy < fy_end; fy += fy_inc) {
+            int in_y = in_y_start + fy;
+            uint fixed_in_y = in_y / STRIDE_SIZE_Y;
+
+            for (uint fx = fx_start; fx < fx_end; fx += fx_inc) {
+                int in_x = in_x_start + fx;
+                uint fixed_in_x = in_x / STRIDE_SIZE_X;
+
+                for (uint fi = 0; fi < FILTER_IFM_NUM; fi += TILE_IFM) {
+                    // Load weights [1, TILE_IFM, 1, 1]
+                    uint weights_idx = WEIGHTS_GET_INDEX(group, ofm, fi, FILTER_SIZE_Z - fz - 1, FILTER_SIZE_Y - fy - 1, FILTER_SIZE_X - fx - 1);
+                    FUNC_CALL(load_weights_ui)(weights_ui, weights_idx / 4, TILE_IFM / 4, wei);
+
+                    // Load input [1, TILE_IFM, 1, 1]
+#if FILTER_GROUPS_NUM == 1 || FILTER_IFM_NUM % TILE_IFM == 0
+#   if OUTPUT_DIMS <= 4
+                    uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start, fixed_in_y, fixed_in_x);
+#   elif OUTPUT_DIMS == 5
+                    uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start, fixed_in_z, fixed_in_y, fixed_in_x);
+#   endif
+                    FUNC_CALL(load_input_ui)(input_ui, input_idx / 4, TILE_IFM / 4, in);
+#else
+                    for (uint tifm = 0; tifm < TILE_IFM; ++tifm) {
+#   if OUTPUT_DIMS <= 4
+                        uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start + tifm, fixed_in_y, fixed_in_x);
+#   elif OUTPUT_DIMS == 5
+                        uint input_idx = INPUT0_GET_INDEX(out_b, fi + if_start + tifm, fixed_in_z, fixed_in_y, fixed_in_x);
+#   endif
+                        ((INPUT0_TYPE*)(in))[tifm] = input[input_idx];
+                    }
+#endif
+
+                    __attribute__((opencl_unroll_hint))
+                    for (uint imad_it = 0; imad_it < TILE_IFM / 4; ++imad_it) {
+                        acc = IMAD(acc, AS_INPUT_TYPE4(in[imad_it]), AS_FILTER_TYPE4(wei[imad_it]));
+                    }
+                }
+            }
+        }
+    }
+
+    ACTIVATION_TYPE dequantized;
+    dequantized = TO_ACTIVATION_TYPE(acc);
+
+#if BIAS_TERM
+    BIAS_TYPE bias_val = bias[out_f];
+    dequantized += TO_ACTIVATION_TYPE(bias_val);
+#endif
+
+    OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    result = FUSED_OPS_RESULT;
+#else
+    result = TO_OUTPUT_TYPE(dequantized);
+#endif
+
+#if OUTPUT_DIMS <= 4
+    uint output_idx = OUTPUT_GET_INDEX(out_b, out_f, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+    uint output_idx = OUTPUT_GET_INDEX(out_b, out_f, out_z, out_y, out_x);
+#endif
+    output[output_idx] = result;
+}
+
+#undef FILTER_TYPE4
+#undef INPUT_TYPE4
+#undef AS_FILTER_TYPE4
+#undef AS_INPUT_TYPE4
+
+#undef WEIGHTS_GET_INDEX
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl

index 1929806..f7fd8ee 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
@@ -21,13 +21,13 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
  #if BIAS_TERM
      const __global BIAS_TYPE* bias,
  #endif
-    uint split_idx
-#if FUSED_ELTWISE
-       , const __global UNIT_TYPE* fuse_input
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
  #endif
-       )
+    uint split_idx
+    )
  {
-    UNIT_TYPE result = UNIT_VAL_ZERO;
+    ACCUMULATOR_TYPE acc = ACCUMULATOR_VAL_ZERO;
  
  #if DIM_ORDER_XYBF == 1
      const uint out_x        = get_global_id(0);
@@ -99,42 +99,47 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                              uint fixed_input_offset_x = (uint)input_offset_x / STRIDE_SIZE_X;
                              uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                              uint fixed_input_offset_z = (uint)input_offset_z / STRIDE_SIZE_Z;
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_B_FS_YX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
+
                              uint input_idx;
-#else
-                            uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH + (uint)fixed_input_offset_z*INPUT0_Z_PITCH;
+#if INPUT0_SIMPLE
+                            input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH + (uint)fixed_input_offset_z*INPUT0_Z_PITCH;
  #endif
+
  #if GRADIENT
                              uint filter_idx = filter_offset + of*FILTER_IFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
-                            for (uint h = 0; h < FILTER_OFM_NUM; h++)
-                            {
-#if INPUT0_LAYOUT_B_FS_ZYX_FSV16 || INPUT0_LAYOUT_BS_FS_ZYX_BSV16_FSV16
-                                input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
-#elif INPUT0_LAYOUT_BS_FS_YX_FSV16
+                            for (uint h = 0; h < FILTER_OFM_NUM; h++) {
+#if !INPUT0_SIMPLE
+#   if INPUT0_DIMS <= 4
                                  input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_y, fixed_input_offset_x);
+#   elif INPUT0_DIMS == 5
+                                input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+#   endif
  #endif
-                                result = fma(input[input_idx], filter[filter_idx], result);
+
+                                acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
                                  filter_idx += FILTER_OFM_PITCH;
-#if !INPUT0_LAYOUT_B_FS_ZYX_FSV16 && !INPUT0_LAYOUT_BS_FS_ZYX_BSV16_FSV16 && !INPUT0_LAYOUT_B_FS_YX_FSV16
+#if INPUT0_SIMPLE
                                  input_idx += INPUT0_FEATURE_PITCH;
  #endif
                              }
-#else
+#else // GRADIENT
                              uint filter_idx = filter_offset + of*FILTER_OFM_PITCH + (FILTER_SIZE_Z - k - 1)*FILTER_Z_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
-                            for (uint h = 0; h < FILTER_IFM_NUM; h++)
-                            {
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
-                                input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
+                            for (uint h = 0; h < FILTER_IFM_NUM; h++) {
+#if !INPUT0_SIMPLE
+#   if INPUT0_DIMS <= 4
                                  input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_y, fixed_input_offset_x);
+#   elif INPUT0_DIMS == 5
+                                input_idx = INPUT0_GET_INDEX(batch_offset, h + g*FILTER_IFM_NUM, fixed_input_offset_z, fixed_input_offset_y, fixed_input_offset_x);
+#   endif
  #endif
-                                result = fma(input[input_idx], filter[filter_idx], result);
+
+                                acc += TO_ACCUMULATOR_TYPE(input[input_idx]) * TO_ACCUMULATOR_TYPE(filter[filter_idx]);
                                  filter_idx += FILTER_IFM_PITCH;
-#if !OUTPUT_LAYOUT_B_FS_ZYX_FSV16 && !OUTPUT_LAYOUT_B_FS_YX_FSV16 && !OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
+#if INPUT0_SIMPLE
                                  input_idx += INPUT0_FEATURE_PITCH;
  #endif
                              }
-#endif
+#endif // GRADIENT
                          }
                      }
                  }
@@ -142,35 +147,27 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
          }
      }
  
+    ACTIVATION_TYPE pre_activation = TO_ACTIVATION_TYPE(acc);
  #if BIAS_TERM
-    result += bias[ofm_offset];
-#endif
-    const uint out_split_offset = g * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
-    const uint dst_index = OUTPUT_OFFSET + OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
-    const uint dst_index = OUTPUT_OFFSET + OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
-#else
-    const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + of*OUTPUT_FEATURE_PITCH + out_z*OUTPUT_Z_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;
+    pre_activation += TO_ACTIVATION_TYPE(bias[ofm_offset]);
  #endif
-#if FUSED_ELTWISE
-#if OUTPUT_LAYOUT_B_FS_ZYX_FSV16 || OUTPUT_LAYOUT_BS_FS_ZYX_BSV16_FSV16
-    const uint fused_index = INPUT1_OFFSET + INPUT1_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
-#elif OUTPUT_LAYOUT_B_FS_YX_FSV16
-    const uint fused_index = INPUT1_OFFSET + INPUT1_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
-#else
-    const uint fused_index = INPUT1_OFFSET + split_idx * INPUT1_FEATURE_PITCH * FILTER_OFM_NUM + batch_offset*INPUT1_BATCH_PITCH + of*INPUT1_FEATURE_PITCH + out_z*INPUT1_Z_PITCH + out_y*INPUT1_Y_PITCH + out_x*INPUT1_X_PITCH;
-#endif
-#if !GRADIENT
-       output[dst_index] = ACTIVATION(result + fuse_input[fused_index], ACTIVATION_PARAMS);
+    ACTIVATION_TYPE post_activation = ACTIVATION(pre_activation, ACTIVATION_PARAMS);
+
+    OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    result = FUSED_OPS_RESULT;
  #else
-       output[dst_index] = result + fuse_input[fused_index];
+    result = TO_OUTPUT_TYPE(post_activation);
  #endif
  
+#if OUTPUT_DIMS <= 4
+    const uint dst_index = OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_y, out_x);
+#elif OUTPUT_DIMS == 5
+    const uint dst_index = OUTPUT_GET_INDEX(batch_offset, g * FILTER_OFM_NUM + of, out_z, out_y, out_x);
  #else
-    output[dst_index] = ACTIVATION(result, ACTIVATION_PARAMS);
+#   error deconvolution_gpu_ref.cl - Unsupported number of output dimensions.
  #endif
  
+    output[dst_index] = result;
  }
-
-#undef ACTIVATION
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl

index 30e1ebf..12935b0 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gen9_common_conv_bwd_data.cl
@@ -15,6 +15,8 @@
  *******************************************************************************/
  
  #include "ocl_types.h"
+#include "include/fetch.cl"
+#include "include/data_types.cl"
  
  #if ID > 1
  #define CASE_3D 1
@@ -30,12 +32,16 @@ __attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) // attr:no-format
  #endif
  KERNEL(gen9_common_conv_bwd_data_kernel)(
          const  __global DATA_T *diff_dst,
-        __global DATA_T *diff_src,
+        __global DATA_T * restrict diff_src,
          const __global DATA_T *wei,
  #if WITH_BIAS
          const __global DATA_T *bias,
  #endif
-        uint split_idx)
+#if HAS_FUSED_OPS_DECLS
+        FUSED_OPS_DECLS,
+#endif
+        uint split_idx
+        )
  {
      const int input_offset = (INPUT0_PAD_BEFORE_FEATURE_NUM / OC_BLOCK) * OD_FULL * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK +
                               (INPUT0_PAD_BEFORE_SIZE_Z) * OH_FULL * OW_FULL * OC_BLOCK * MB_BLOCK +
@@ -235,6 +241,17 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
      blockC00 = ACTIVATION(blockC00, ACTIVATION_PARAMS);
      blockC01 = ACTIVATION(blockC01, ACTIVATION_PARAMS);
  
+#if HAS_FUSED_OPS
+    {
+        FUSED_OPS_BLOCK_C00;
+        blockC00 = FUSED_OPS_RESULT_BLOCK_C00;
+    }
+    {
+        FUSED_OPS_BLOCK_C01;
+        blockC01 = FUSED_OPS_RESULT_BLOCK_C01;
+    }
+#endif
+
      SAVE_SRC_DIFF(blockC00, src_write0, 0);
      SAVE_SRC_DIFF(blockC01, src_write0, 8);
  
@@ -426,6 +443,10 @@ KERNEL(gen9_common_conv_bwd_data_kernel)(
      for (int i = 0; i < IW_BLOCK; i++) {
          blockC00[i] = ACTIVATION(blockC00[i], ACTIVATION_PARAMS);
          if (iw + i >= IW) continue;
+#if HAS_FUSED_OPS
+        FUSED_OPS_BLOCK_CI;
+        blockC00[i] = FUSED_OPS_RESULT_BLOCK_CI;
+#endif
          BLOCK_WRITE((__global BLOCK_DATA_T *)(&(src_write0)[i * IC_BLOCK]),
                  AS_BLOCK_DATA_T(blockC00[i]));
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl

index 485d24b..f51dcb7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
@@ -118,18 +118,17 @@ inline uint FUNC(get_b_fs_yx_fsv_index)(uint b, uint f, uint y, uint x,
                                          uint f_pad_before, uint f_pad_after,
                                          uint y_pad_before, uint y_pad_after,
                                          uint x_pad_before, uint x_pad_after, uint alignment) {
-    const uint fs = f / alignment;
-    const uint fsv = f % alignment;
+    const uint feature = f + f_pad_before;
+    const uint fs = feature / alignment;
+    const uint fsv = feature % alignment;
      const uint x_pitch = alignment;
      const uint y_pitch = x_pitch * (x_pad_before +  x_size + x_pad_after);
      const uint total_f_size = f_pad_before + f_size + f_pad_after;
      const uint fs_pitch = y_pitch * (y_pad_before +  y_size + y_pad_after);
      const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
  
-    const uint fs_pad_before = f_pad_before / alignment;
-
      const uint output_offset =  b * b_pitch +
-                                (fs + fs_pad_before) * fs_pitch +
+                                fs * fs_pitch +
                                  (y_pad_before + y) * y_pitch +
                                  (x_pad_before + x) * x_pitch
                                  + fsv;
@@ -142,7 +141,7 @@ inline uint FUNC(get_b_fs_yx_fsv_index_safe)(uint b, uint f, uint y, uint x,
                                               uint f_pad_before, uint f_pad_after,
                                               uint y_pad_before, uint y_pad_after,
                                               uint x_pad_before, uint x_pad_after, uint alignment) {
-    const uint f_mod = f % f_size;
+    const uint f_mod = f_pad_before + (f % f_size);
      const uint fs = f_mod / alignment;
      const uint fsv = f_mod % alignment;
      const uint x_pitch = alignment;
@@ -151,10 +150,8 @@ inline uint FUNC(get_b_fs_yx_fsv_index_safe)(uint b, uint f, uint y, uint x,
      const uint fs_pitch = y_pitch * (y_pad_before +  y_size + y_pad_after);
      const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
  
-    const uint fs_pad_before = f_pad_before / alignment;
-
      const uint output_offset = b * b_pitch +
-                               (fs_pad_before + fs) * fs_pitch +
+                               fs * fs_pitch +
                                 (y_pad_before + (y % y_size)) * y_pitch +
                                 (x_pad_before + (x % x_size)) * x_pitch
                                 + fsv;
@@ -1103,8 +1100,9 @@ inline uint FUNC(get_b_fs_zyx_fsv_index)(uint b, uint f,  uint z, uint y, uint x
                                           uint x_pad_before, uint x_pad_after,
                                           uint alignment)
  {
-    const uint fs = f / alignment;
-    const uint fsv = f % alignment;
+    const uint feature = f + f_pad_before;
+    const uint fs = feature / alignment;
+    const uint fsv = feature % alignment;
      const uint x_pitch = alignment;
      const uint y_pitch = x_pitch * (x_pad_before + x_size + x_pad_after);
      const uint z_pitch = y_pitch * (y_pad_before + y_size + y_pad_after);
@@ -1112,10 +1110,8 @@ inline uint FUNC(get_b_fs_zyx_fsv_index)(uint b, uint f,  uint z, uint y, uint x
      const uint total_f_size = f_pad_before + f_size + f_pad_after;
      const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
  
-    const uint fs_pad_before = f_pad_before / alignment;
-
      const uint output_offset = b * b_pitch +
-                               (fs_pad_before + fs) * fs_pitch +
+                               fs * fs_pitch +
                                 (z_pad_before + z) * z_pitch +
                                 (y_pad_before + y) * y_pitch +
                                 (x_pad_before + x) * x_pitch
@@ -1131,7 +1127,7 @@ inline uint FUNC(get_b_fs_zyx_fsv_index_safe)(uint b, uint f,  uint z, uint y, u
                                                uint y_pad_before, uint y_pad_after,
                                                uint x_pad_before, uint x_pad_after,
                                                uint alignment) {
-    const uint f_mod = f % f_size;
+    const uint f_mod = f_pad_before + (f % f_size);
      const uint fs = f_mod / alignment;
      const uint fsv = f_mod % alignment;
      const uint x_pitch = alignment;
@@ -1141,10 +1137,8 @@ inline uint FUNC(get_b_fs_zyx_fsv_index_safe)(uint b, uint f,  uint z, uint y, u
      const uint total_f_size = f_pad_before + f_size + f_pad_after;
      const uint b_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
  
-    const uint fs_pad_before = f_pad_before / alignment;
-
      const uint output_offset = b * b_pitch +
-                               (fs_pad_before + fs) * fs_pitch +
+                               fs * fs_pitch +
                                 (z_pad_before + (z % z_size)) * z_pitch +
                                 (y_pad_before + (y % y_size)) * y_pitch +
                                 (x_pad_before + (x % x_size)) * x_pitch
@@ -1160,7 +1154,7 @@ inline uint FUNC(get_bs_fs_zyx_bsv_fsv_index_safe)(uint b, uint f, uint z, uint
                                                    uint y_pad_before, uint y_pad_after,
                                                    uint x_pad_before, uint x_pad_after, uint alignmentF, uint alignmentB) {
      const uint b_mod = b % b_size;
-    const uint f_mod = f % f_size;
+    const uint f_mod = f_pad_before + (f % f_size);
      const uint fs = f_mod / alignmentF;
      const uint fsv = f_mod % alignmentF;
      const uint bs = b_mod / alignmentB;
@@ -1172,10 +1166,8 @@ inline uint FUNC(get_bs_fs_zyx_bsv_fsv_index_safe)(uint b, uint f, uint z, uint
      const uint fs_pitch = z_pitch * (z_pad_before +  z_size + z_pad_after);
      const uint b_pitch = fs_pitch * ((total_f_size + alignmentF - 1) / alignmentF);
  
-    const uint fs_pad_before = f_pad_before / alignmentF;
-
      const uint output_offset = (bs * b_pitch) + (bsv * alignmentF) +
-                               (fs_pad_before + fs) * fs_pitch +
+                               fs * fs_pitch +
                                 (z_pad_before + (z % z_size)) * z_pitch +
                                 (y_pad_before + (y % y_size)) * y_pitch +
                                 (x_pad_before + (x % x_size)) * x_pitch
@@ -1191,8 +1183,9 @@ inline uint FUNC(get_bs_fs_zyx_bsv16_fsv16_index)(uint b, uint f,  uint z, uint
                                                    uint y_pad_before, uint y_pad_after,
                                                    uint x_pad_before, uint x_pad_after) {
      const uint alignment = 16;
-    const uint fs = f / alignment;
-    const uint fsv = f % alignment;
+    const uint feature = f + f_pad_before;
+    const uint fs = feature / alignment;
+    const uint fsv = feature % alignment;
      const uint bs = b / alignment;
      const uint bsv = b % alignment;
  
@@ -1204,10 +1197,8 @@ inline uint FUNC(get_bs_fs_zyx_bsv16_fsv16_index)(uint b, uint f,  uint z, uint
      const uint total_f_size = f_pad_before + f_size + f_pad_after;
      const uint bs_pitch = fs_pitch * ((total_f_size + alignment - 1) / alignment);
  
-    const uint fs_pad_before = f_pad_before / alignment;
-
      const uint output_offset = bs * bs_pitch +
-                               (fs_pad_before + fs) * fs_pitch +
+                               fs * fs_pitch +
                                 (z_pad_before + z) * z_pitch +
                                 (y_pad_before + y) * y_pitch +
                                 (x_pad_before + x) * x_pitch +
@@ -1510,6 +1501,56 @@ inline uint FUNC(get_gs_oi_yxs_gsv_yxsv4_index)(uint g, uint o, uint i, uint y,
          ((o) / (sub_group_size))*CAT(prefix, _OFM_PITCH)                               \
      )
  
+inline uint FUNC(get_g_os_zyx_is_osv_isv_index)(uint g, uint o, uint i, uint z, uint y, uint x,
+                                                uint g_size, uint o_size, uint i_size, uint z_size, uint y_size, uint x_size,
+                                                uint osv, uint isv) {
+    uint is_size = (i_size + isv - 1) / isv;
+    uint os_size = (o_size + osv - 1) / osv;
+
+    uint isv_index = i % isv;
+    uint osv_index = o % osv;
+    uint is_index = i / isv;
+    uint os_index = o / osv;
+
+    uint isv_pitch = 1;
+    uint osv_pitch = isv_pitch * isv;
+    uint is_pitch = osv_pitch * osv;
+    uint x_pitch = is_pitch * is_size;
+    uint y_pitch = x_pitch * x_size;
+    uint z_pitch = y_pitch * y_size;
+    uint os_pitch = z_pitch * z_size;
+    uint g_pitch = os_pitch * os_size;
+
+    uint index = 0;
+    index += isv_index * isv_pitch;
+    index += osv_index * osv_pitch;
+    index += is_index * is_pitch;
+    index += x * x_pitch;
+    index += y * y_pitch;
+    index += z * z_pitch;
+    index += os_index * os_pitch;
+    index += g * g_pitch;
+    return index;
+}
+
+#define GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, osv, isv)            \
+    FUNC_CALL(get_g_os_zyx_is_osv_isv_index)(                                               \
+    g, o, i, z, y, x,                                                                       \
+    CAT(tensor, _GROUPS_NUM),                                                               \
+    CAT(tensor, _OFM_NUM),                                                                  \
+    CAT(tensor, _IFM_NUM),                                                                  \
+    CAT(tensor, _SIZE_Z),                                                                   \
+    CAT(tensor, _SIZE_Y),                                                                   \
+    CAT(tensor, _SIZE_X),                                                                   \
+    osv, isv)
+
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(tensor, g, o, i, z, y, x)   GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 4)
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(tensor, g, o, i, z, y, x)  GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 16)
+#define GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(tensor, g, o, i, z, y, x)  GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 16, 32)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(tensor, g, o, i, z, y, x)   GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 4)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(tensor, g, o, i, z, y, x)  GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 16)
+#define GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(tensor, g, o, i, z, y, x)  GET_FILTER_G_OS_ZYX_IS_OSV_ISV_INDEX(tensor, g, o, i, z, y, x, 32, 32)
+
  #define DECLARE_SAMPLER const sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST
  
  #if FP16_UNIT_USED
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl

index 8fa7b51..4d53bc3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/reshape_dims.cl
@@ -125,15 +125,16 @@ inline uint8 FUNC(reshape_6_to_4)(uint o, uint i, uint w, uint z, uint y, uint x
      return (uint8)(0, dst_b, dst_f, 0, 0, dst_y, dst_x, 0);
  }
  
-inline uint8 FUNC(reshape_grouped_to_simple)(uint g, uint o, uint i, uint z, uint y, uint x, uint src_size_groups, uint dst_size_ofm)
+inline uint8 FUNC(reshape_grouped)(uint g, uint o, uint i, uint z, uint y, uint x, uint src_size_ofm, uint dst_size_ofm)
  {
-    const uint ofm_per_group = dst_size_ofm / src_size_groups;
-    const uint dst_ofm = g * ofm_per_group + (o % ofm_per_group);
+    const uint flat_ofm = g * src_size_ofm + o;
+    const uint dst_ofm = flat_ofm % dst_size_ofm;
+    const uint dst_g = flat_ofm / dst_size_ofm;
      const uint dst_ifm = i;
      const uint dst_z = z;
      const uint dst_y = y;
      const uint dst_x = x;
-    return (uint8)(0, dst_ofm, dst_ifm, 0, dst_z, dst_y, dst_x, 0);
+    return (uint8)(dst_g, dst_ofm, dst_ifm, 0, dst_z, dst_y, dst_x, 0);
  }
  
  inline uint8 FUNC(reshape_dims)(
@@ -178,11 +179,15 @@ inline uint8 FUNC(reshape_dims_with_groups)(
  {
      if (src_dims == 5 && dst_dims == 4)  // goiyx -> oiyx
      {
-        return FUNC_CALL(reshape_grouped_to_simple)(g, o, i, 0, y, x, src_size_groups, dst_size_ofm);
+        return FUNC_CALL(reshape_grouped)(g, o, i, 0, y, x, src_size_ofm, dst_size_ofm);
      }
-    else if (src_dims == 6 && dst_dims == 5)  // goizyx -> oizyx
+    else if (src_dims == 6 && dst_dims == 5)  // goizyx -> oizyx or goizyx -> goiyx
      {
-        return FUNC_CALL(reshape_grouped_to_simple)(g, o, i, z, y, x, src_size_groups, dst_size_ofm);
+        return FUNC_CALL(reshape_grouped)(g, o, i, z, y, x, src_size_ofm, dst_size_ofm);
+    }
+    else if (src_dims == 6 && dst_dims == 4) // goizyx -> oiyx
+    {
+        return FUNC_CALL(reshape_grouped)(g, o, i, 0, y, x, src_size_ofm, dst_size_ofm);
      }
  
      return (uint8)(g, o, i, w, z, y, x, 0);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl

index c5a130d..5bdb29e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
@@ -97,6 +97,18 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
      return GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(INPUT0, g, o, i, y, x);
  #elif defined INPUT0_LAYOUT_GS_OI_YXS_GSV32_YXSV4
      return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(INPUT0, g, o, i, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV4
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV16
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV16_ISV32
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV4
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV16
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(INPUT0, g, o, i, z, y, x);
+#elif defined INPUT0_LAYOUT_G_OS_ZYX_IS_OSV32_ISV32
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(INPUT0, g, o, i, z, y, x);
  #else
  #error reorder_weights.cl: input format - not supported
  #endif
@@ -214,6 +226,18 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
      return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(OUTPUT, g, o, i, y, x);
  #elif defined OUTPUT_LAYOUT_G_OS_IS_YX_OSV16_ISV4
      return GET_FILTER_G_OS_IS_YX_OSV16_ISV4_INDEX(OUTPUT, g, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV4
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV4_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV16
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV16_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV16_ISV32
+    return GET_FILTER_G_OS_ZYX_IS_OSV16_ISV32_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV4
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV4_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV16
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV16_INDEX(OUTPUT, g, o, i, z, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_ZYX_IS_OSV32_ISV32
+    return GET_FILTER_G_OS_ZYX_IS_OSV32_ISV32_INDEX(OUTPUT, g, o, i, z, y, x);
  #else
  #error reorder_weights.cl: output format - not supported
  #endif
@@ -240,35 +264,26 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE
  #if OUTPUT_GROUPS_NUM > 1
      const unsigned g = (uint)get_global_id(0) / OUTPUT_OFM_NUM;
      const unsigned o = (uint)get_global_id(0) % OUTPUT_OFM_NUM;
-    const unsigned i = (uint)get_global_id(1);
-#if OUTPUT_DIMS == 5
-    const unsigned z = 0;
-    const unsigned y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
-    const unsigned x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
-#elif OUTPUT_DIMS == 6
-    const unsigned zyx = get_global_id(2);
-    const unsigned x = zyx % INPUT0_SIZE_X;
-    const unsigned y = (zyx / INPUT0_SIZE_X) % INPUT0_SIZE_Y;
-    const unsigned z = (zyx / INPUT0_SIZE_X) / INPUT0_SIZE_Y;
-#endif
  #else
+    const unsigned g = 0;
      const unsigned o = (uint)get_global_id(0);
+#endif
+
      const unsigned i = (uint)get_global_id(1);
-    const unsigned g = 0;
-#if   OUTPUT_DIMS == 2
-    const unsigned z = 0;
-    const unsigned y = 0;
+
+#if   OUTPUT_DIMS == 2 || (OUTPUT_DIMS == 3 && OUTPUT_GROUPED)
      const unsigned x = 0;
-#elif OUTPUT_DIMS == 4
+    const unsigned y = 0;
+    const unsigned z = 0;
+#elif OUTPUT_DIMS == 4 || (OUTPUT_DIMS == 5 && OUTPUT_GROUPED)
+    const unsigned x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
+    const unsigned y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
      const unsigned z = 0;
-    const unsigned y = (uint)get_global_id(2) / INPUT0_SIZE_X;
-    const unsigned x = (uint)get_global_id(2) % INPUT0_SIZE_X;
-#elif OUTPUT_DIMS == 5
+#elif OUTPUT_DIMS == 5 || (OUTPUT_DIMS == 6 && OUTPUT_GROUPED)
      const unsigned zyx = get_global_id(2);
-    const unsigned x = zyx % INPUT0_SIZE_X;
-    const unsigned y = (zyx / INPUT0_SIZE_X) % INPUT0_SIZE_Y;
-    const unsigned z = (zyx / INPUT0_SIZE_X) / INPUT0_SIZE_Y;
-#endif
+    const unsigned x = zyx % OUTPUT_SIZE_X;
+    const unsigned y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
+    const unsigned z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
  #endif
  
  #if OUTPUT_GROUPS_NUM > 1 //  Add grouped macro instead this check
@@ -276,6 +291,14 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE
  #else
      uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, z, y, x);
  #endif
-    output[FUNC_CALL(get_output_index)(g, o, i, z, y, x)] = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6])]);
+
+    uint input_idx = FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6]);
+#if !REORDER_ROTATE
+    uint output_idx = FUNC_CALL(get_output_index)(g, o, i, z, y, x);
+#else
+    uint output_idx = FUNC_CALL(get_output_index)(g, o, i, OUTPUT_SIZE_Z - z - 1, OUTPUT_SIZE_Y - y - 1, OUTPUT_SIZE_X - x - 1);
+#endif
+
+    output[output_idx] = TO_OUTPUT_TYPE(input[input_idx]);
  }
  #endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp

index e6a3470..a1ad008 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -445,8 +445,8 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
              definitions.push_back({ safe_index_func_name, safe_index_func_val });
              definitions.push_back({ index_func_name, index_func_val });
          } else {
-            definitions.push_back({ safe_index_func_name, "f" });
-            definitions.push_back({ index_func_name, "f" });
+            definitions.push_back({ safe_index_func_name, "(f)" });
+            definitions.push_back({ index_func_name, "(f)" });
          }
      } else {
          definitions.push_back({ safe_index_func_name, safe_index_func_val });
@@ -1171,10 +1171,15 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
              auto in_lo = p->per_tensor_input_range ? Broadcast(std::to_string(p->in_lo), desc.tensors[0].GetDType(), vec_size) : GetInputVarName(0);
              auto in_hi = p->per_tensor_input_range ? Broadcast(std::to_string(p->in_hi), desc.tensors[0].GetDType(), vec_size) : GetInputVarName(1);
  
-            op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = min(max(" + in_lo + ", " + in_converted + "), " + in_hi + ");";
+            if (p->has_clamp) {
+                op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = min(max(" + in_lo + ", " + in_converted + "), " + in_hi + ");";
+            } else {
+                op_decls += "\\\n\t" + tmp_type + " " + tmp_var + " = " + in_converted + ";";
+            }
              op_decls += "\\\n\t" + tmp_var + " = " + tmp_var + "*" + pre_scale + ";";
              if (p->has_pre_shift)
                  op_decls += "\\\n\t" + tmp_var + " = " + tmp_var + " + " + pre_shift + ";";
+
              op_decls += "\\\n\t" + tmp_var + " = round(" + tmp_var + ");";
  
              bool need_round = (p->has_post_scale || p->has_post_shift) &&
@@ -1254,9 +1259,9 @@ std::string FusedOpsCodeGenerator::GetIdx(size_t input_id, idx_desc idx, bool sh
      }
  
      if (should_be_safe) {
-        return GetInputTensorName(input_id) + "_GET_INDEX_SAFE(" + idx_order +")";
+        return GetInputTensorName(input_id) + "_GET_INDEX_SAFE(" + idx_order + ")";
      } else {
-        return GetInputTensorName(input_id) + "_GET_INDEX(" + idx_order +")";
+        return GetInputTensorName(input_id) + "_GET_INDEX(" + idx_order + ")";
      }
  }
  
@@ -1349,11 +1354,10 @@ std::string FusedOpsCodeGenerator::GetInputVarName(size_t input_id) const {
  }
  
  std::string FusedOpsCodeGenerator::GetOutputVarName(std::string input_var) const {
-    static int i = 0;
      std::replace(input_var.begin(), input_var.end(), '[', '_');
      std::replace(input_var.begin(), input_var.end(), ']', '_');
      std::replace(input_var.begin(), input_var.end(), ' ', '_');
-    return input_var + "_" + std::to_string(i++);
+    return input_var + "_out";
  }
  
  std::string FusedOpsCodeGenerator::GetType(Datatype dt, size_t vec_size) const {
@@ -1396,7 +1400,7 @@ std::vector<size_t> FusedOpsCodeGenerator::GetRequiredInputs() const {
              auto p = std::dynamic_pointer_cast<quantize_fuse_params>(desc.op_params);
              if (p) {
                  std::vector<size_t> res = {};
-                if (!p->per_tensor_input_range) {
+                if (!p->per_tensor_input_range && p->has_clamp) {
                      res.push_back(0);
                      res.push_back(1);
                  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h

index 89dad7d..8814b35 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
@@ -306,7 +306,8 @@ public:
          std::string y;
          std::string x;
          size_t dims;
-        explicit idx_desc(std::vector<std::string> idx, DataTensor t) : b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
+        explicit idx_desc(std::vector<std::string> idx, DataTensor t)
+            : b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
              dims = idx.size();
              switch (dims) {
                  case 1: f = idx[0]; break;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp

index f0859bf..106ec8b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp
@@ -43,7 +43,8 @@ static WeightsType DataTypeToWeightsType(Datatype t) {
  static WeightsFormatSupportType CheckWeights(const weight_bias_params& newParams,
                                               WeightsType reqType,
                                               WeightsLayout reqLayouts,
-                                             const ParamsKey& paramsKey) {
+                                             const ParamsKey& paramsKey,
+                                             bool rotate) {
      // validate if weights type is image and if device supports requested sizes
      if (Tensor::IsImageType(reqLayouts)) {
          if (!CheckImageSize(newParams, reqLayouts))
@@ -59,8 +60,9 @@ static WeightsFormatSupportType CheckWeights(const weight_bias_params& newParams
      }
  
      reorderNeeded |= tensor.GetLayout() != reqLayouts;
+    reorderNeeded |= rotate;
  
-    if (reorderNeeded && !pitchesDifferFromLS) {
+    if (reorderNeeded && !pitchesDifferFromLS && !rotate) {
          reorderNeeded = !((reqLayouts == WeightsLayout::io && tensor.GetLayout() == WeightsLayout::iyxo) ||
                            (reqLayouts == WeightsLayout::oi && tensor.GetLayout() == WeightsLayout::oiyx));
      }
@@ -104,11 +106,12 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
                           WeightsLayout reqLayout,
                           WeightsReorderParams& weightsReorderParams,
                           const ParamsKey& paramsKey,
-                         size_t groups) {
+                         size_t groups,
+                         bool rotate) {
      const auto& optParams = static_cast<const weight_bias_optional_params&>(options);
      const auto inType = DataTypeToWeightsType(newParams.inputs[0].GetDType());
      const auto dtype = paramsKey.isEnabledDifferentInputWeightsTypes() ? newParams.weights.GetDType() : inType;
-    switch (CheckWeights(newParams, inType, reqLayout, paramsKey)) {
+    switch (CheckWeights(newParams, inType, reqLayout, paramsKey, rotate)) {
          case SUPPORTED:
              return true;
          case UNSUPPORTED:
@@ -124,6 +127,7 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
              r_params.layerID = newParams.layerID + "_reorder_";
              r_params.input = newParams.weights;
              r_params.output = newParams.weights.TransformIgnorePadding(reqLayout, dtype, groups, false);
+            r_params.rotate_180 = rotate;
              r_params.engineInfo = newParams.engineInfo;
  
              reorder_optional_params op;
@@ -132,7 +136,8 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
              if (kernels_data.empty()) {
                  throw std::runtime_error("No suitable kernel found for weights reorder from " +
                                           toString(r_params.input.GetLayout()) + " to " +
-                                         toString(r_params.output.GetLayout()));
+                                         toString(r_params.output.GetLayout()) +
+                                         (rotate ? " with rotate" : ""));
              }
  
              weightsReorderParams.engine = WeightsReorderParams::Engine::GPU;
@@ -228,13 +233,15 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) {
      if (params.inputs.size()) {
          no_pitch_same_dims = !params.inputs[0].PitchesDifferFromLogicalDims();
  
-        if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[0].Feature().v % 16 != 0)
+        if ((params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[0].Feature().v % 16 != 0) ||
+            (params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
              return false;
  
          for (size_t i = 1; i < params.inputs.size(); i++) {
              no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]);
  
-            if (params.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[i].Feature().v % 16 != 0)
+            if ((params.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv16 && params.inputs[i].Feature().v % 16 != 0) ||
+                (params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
                  return false;
          }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h

index 22f077a..9d1880c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h
@@ -30,7 +30,8 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
                           WeightsLayout layout,
                           WeightsReorderParams& weightsReorderParams,
                           const ParamsKey& paramsKey = ParamsKey(),
-                         size_t groups = 1);
+                         size_t groups = 1,
+                         bool rotate = false);
  JitConstants GetTensorFriendlyWorkGroupsJit(const DataTensor& t);
  std::vector<size_t> GetTensorFriendlyWorkGroups(const DataTensor& t);
  std::vector<size_t> GetOptimalLocalWorkGroupSizes(std::vector<size_t> gws, const EngineInfo& info);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp

index 3c35eea..322842f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
@@ -64,8 +64,9 @@ JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) c
          jit.AddConstant(MakeJitConstant("INPUT" + toCodeString(i), params.inputs[i]));
      }
  
+#if !NDEBUG
      jit.AddConstant(MakeJitConstant("LayerID", params.layerID));
-
+#endif
      return jit;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h

index 3d6abfd..ae2c23a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
@@ -50,6 +50,7 @@ public:
      virtual const std::string GetName() const { return kernelName; }
  
      static const primitive_db& get_db() { return db; }
+    static void ResetCounter() { counter = 0; }
  
  protected:
      static const primitive_db db;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp

index be16f4b..04d72b3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
@@ -370,6 +370,12 @@ std::string toString(WeightsLayout layout) {
          case WeightsLayout::gs_oi_yxs_gsv32_yxsv4:                       return "GS_OI_YXS_GSV32_YXSV4";
          case WeightsLayout::g_os_is_yx_isv16_osv16:                      return "G_OS_IS_YX_ISV16_OSV16";
          case WeightsLayout::g_os_is_yx_osv16_isv4:                       return "G_OS_IS_YX_OSV16_ISV4";
+        case WeightsLayout::g_os_zyx_is_osv16_isv4:                      return "G_OS_ZYX_IS_OSV16_ISV4";
+        case WeightsLayout::g_os_zyx_is_osv16_isv16:                     return "G_OS_ZYX_IS_OSV16_ISV16";
+        case WeightsLayout::g_os_zyx_is_osv16_isv32:                     return "G_OS_ZYX_IS_OSV16_ISV32";
+        case WeightsLayout::g_os_zyx_is_osv32_isv4:                      return "G_OS_ZYX_IS_OSV32_ISV4";
+        case WeightsLayout::g_os_zyx_is_osv32_isv16:                     return "G_OS_ZYX_IS_OSV32_ISV16";
+        case WeightsLayout::g_os_zyx_is_osv32_isv32:                     return "G_OS_ZYX_IS_OSV32_ISV32";
          default: throw std::invalid_argument("Failed to convert WeightsLayout " + std::to_string(layout) + " to string");
      }
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h

index 9ac8306..f548489 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
@@ -143,7 +143,6 @@ public:
                          uint32_t split : 1;
                          uint32_t dilation : 1;
                          uint32_t depthwise_separable_opt : 1;
-                        uint32_t transposed : 1;
                          uint32_t local : 1;
                          uint32_t grouped : 1;
                          uint32_t deformable : 1;
@@ -186,6 +185,7 @@ public:
                      } resample;
                      struct reorder_t {
                          uint32_t winograd : 1;
+                        uint32_t rotate : 1;
                      } reorder;
                      struct eltwise_t {
                          uint32_t stride : 1;
@@ -317,7 +317,6 @@ public:
      void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwise_separable_opt = 1; }
      void EnableLocalConvolution() { key.restrict.val.dedicated.conv.local = 1; }
      void EnableGroupedConvolution() { key.restrict.val.dedicated.conv.grouped = 1; }
-    void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
      void EnableInt8Quantization() { key.restrict.val.quantization = 1; }
      void EnableOutputCalibration() { key.restrict.val.output_calibration = 1; }
      void EnableDeformableMode() { key.restrict.val.dedicated.conv.deformable = 1; }
@@ -339,6 +338,7 @@ public:
      void EnableQuantizeScaleShiftOpt() { key.restrict.val.dedicated.quantize.scale_shift_opt = 1; }
  
      void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
+    void EnableRotateReorder() { key.restrict.val.dedicated.reorder.rotate = 1; }
      void EnableSoftmaxDim(SoftmaxDim d);
      void EnableConcatAxis(ConcatAxis a);
      void EnableReampleType(ResampleType a);
diff --git a/inference-engine/thirdparty/clDNN/src/activation.cpp b/inference-engine/thirdparty/clDNN/src/activation.cpp

index b7b0bbc..621a2d6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/activation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/activation.cpp
@@ -46,6 +46,10 @@ layout activation_inst::calc_output_layout(activation_node const& node) {
              CLDNN_ERROR_MESSAGE(node.id(), "Requested activation is not supported for integer type.");
      }
  
+    if (node.has_fused_primitives()) {
+        input_node_layout.data_type = node.get_fused_output_layout().data_type;
+    }
+
      return input_node_layout;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp

index fa64e15..d74aafc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
@@ -36,6 +36,15 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
      auto input_layout = node.input().get_output_layout();
      auto weights_layout = node.weights(0).get_output_layout();  // weights are stored after inputs
  
+    auto data_type = input_layout.data_type;
+    if ((input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) && !node.has_fused_primitives()) {
+        data_type = data_types::f32;
+    }
+
+    if (node.has_fused_primitives()) {
+        data_type = node.get_fused_output_layout().data_type;
+    }
+
      auto input_offset = desc->input_offset;
      auto strd = desc->stride;
      auto group = desc->groups;
@@ -71,7 +80,7 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
                             desc->output_size.spatial[0],
                             desc->output_size.spatial[1],
                             desc->output_size.spatial[2]);
-        return {input_layout.data_type, input_layout.format, output_size};
+        return {data_type, input_layout.format, output_size};
      }
  
      // compute output_dim <= stride * (input_size - 1) + kernel_size + 2 * input_offset;
@@ -98,7 +107,7 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
  
      tensor output_size(input_layout.size.batch[0],
                         number_of_features, x, y, z);
-    return {input_layout.data_type, input_layout.format, output_size};
+    return {data_type, input_layout.format, output_size};
  }
  
  std::string deconvolution_inst::to_string(deconvolution_node const& node) {
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp

index 9a3e1e1..3df3248 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
@@ -106,6 +106,9 @@ attach_crop_gpu::attach_crop_gpu() {
      implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
      implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
  
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
+    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
+
      implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
      implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
      implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp

index b2412ce..5b137d9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
@@ -32,7 +32,7 @@ struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution> {
  
  protected:
      // TODO: share it with convolution and fully connected
-    bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override {
+    bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
          bool res = true;
  
          CLDNN_ERROR_NOT_EQUAL(_outer.id(),
@@ -41,23 +41,6 @@ protected:
                                "padding mode",
                                0.0f,
                                "Unknown padding mode in deconvolution.");
-        // Check whether all memory elements use the same unit type (FP16 or FP32).
-        auto input_count = instance.inputs_memory_count();
-        auto input_data_type = 0 == input_count ?
-            instance.node.input().get_output_layout().data_type :
-            instance.input_memory().get_layout().data_type;
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
-                                        "Input memory",
-                                        input_data_type,
-                                        "output memory",
-                                        instance.output_memory().get_layout().data_type,
-                                        "");
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(),
-                                        "Input memory",
-                                        input_data_type,
-                                        "filter memory",
-                                        instance.weights_memory(0).get_layout().data_type,
-                                        "");
  
          return res;
      }
@@ -65,15 +48,10 @@ protected:
      kernel::kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance,
                                                          int32_t split) const override {
          kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
-        auto* desc = static_cast<const deconvolution*>(instance.desc().get());
-        int dep_size = static_cast<int>((desc->weights.size() + desc->bias.size() + 1));
  
          args.weights = (memory_impl::cptr) &instance.weights_memory(split);
          args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr);
  
-        if (static_cast<int>(instance.dependencies().size()) > dep_size)
-            args.inputs.emplace_back(&instance.dep_memory(dep_size));
-
          return args;
      }
  
@@ -86,27 +64,6 @@ public:
          const auto& primitive = arg.get_primitive();
          const auto& weights_layout = arg.weights(0).get_output_layout();
  
-        switch (weights_layout.fused_format()) {
-                // FP32 (float)
-            case fuse(data_types::f32, format::goiyx):
-            case fuse(data_types::f32, format::yxio):
-            case fuse(data_types::f32, format::gyxio):
-            case fuse(data_types::f32, format::goizyx):
-            case fuse(data_types::f16, format::goiyx):
-            case fuse(data_types::f16, format::yxio):
-            case fuse(data_types::f16, format::gyxio):
-            case fuse(data_types::f16, format::goizyx):
-            case fuse(data_types::f32, format::oiyx):
-            case fuse(data_types::f32, format::yxfb):
-            case fuse(data_types::f32, format::oizyx):
-            case fuse(data_types::f16, format::oiyx):
-            case fuse(data_types::f16, format::yxfb):
-            case fuse(data_types::f16, format::oizyx):
-                break;
-            default:
-                throw std::runtime_error("deconvolution weights format unsupported");
-        }
-
          const auto& weights_size = weights_layout.size;
  
          const auto& split = primitive->split();
@@ -146,11 +103,6 @@ public:
  
          deconv_params.gradient = primitive->gradient();
  
-        if (arg.get_dependencies().size() > primitive->weights.size() + primitive->bias.size() + 1) {
-            deconv_params.fused_eltwise = true;
-            deconv_params.inputs.push_back(convert_data_tensor(arg.fused_sum().get_output_layout()));
-        }
-
          auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
          auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
  
@@ -179,6 +131,8 @@ attach_deconvolution_gpu::attach_deconvolution_gpu() {
                                             deconvolution_gpu::create);
      implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
                                             deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+                                           deconvolution_gpu::create);
      implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
                                             deconvolution_gpu::create);
      implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
@@ -195,6 +149,30 @@ attach_deconvolution_gpu::attach_deconvolution_gpu() {
                                             deconvolution_gpu::create);
      implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
                                             deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+                                           deconvolution_gpu::create);
+    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+                                           deconvolution_gpu::create);
  }
  
  }  // namespace detail
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp b/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp

index d4072dc..1402e05 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp
@@ -61,7 +61,7 @@ device_info_internal::device_info_internal(const cl::Device& device) {
  
      supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos;
  
-    supports_imad = true;
+    supports_imad = dev_name.find("Gen12") != std::string::npos;
      supports_immad = false;
  
      dev_type = static_cast<uint32_t>(device.getInfo<CL_DEVICE_TYPE>());
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h

index b9898a7..11a76b6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
@@ -53,6 +53,11 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
            _outer(arg),
            _device_info(arg.get_program().get_engine().get_context()->get_device_info()),
            _kernel_data(kd) {
+        // weights reorder params got copied to parent, clear in _kernel_data to release shared ptr
+        _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
+        _kernel_data.weightsReorderParams.cpuKernel = nullptr;
+        _kernel_data.weightsReorderParams.clKernel = nullptr;
+
          _kernels.reserve(kd.kernels.size());
          for (size_t i = 0; i < kd.kernels.size(); ++i) {
              gpu::kernel kernel(_outer.get_program().get_engine().get_context(),
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp

index 8e19ddd..d9c90f8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
@@ -48,13 +48,15 @@ post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bi
  template<typename T>
  void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
      auto offsets = get_weights_bias_offset(node);
+    auto* impl = node.get_selected_impl().get();
+    auto output_layout = node.get_output_layout();
+    auto& weights_reorder_params = impl->_weights_reorder_params;
+
      for (auto i = offsets.weights_offset; i < offsets.bias_offset; i++) {
          auto& weights_node = node.get_dependency(i);
-        auto* impl = node.get_selected_impl().get();
-        auto output_layout = node.get_output_layout();
          auto weights_layout = weights_node.get_output_layout();
  
-        auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, impl->_weights_reorder_params);
+        auto reorders = _rf.get_weights_reorder(weights_node.id(), weights_layout, weights_reorder_params);
  
          for (auto& reorder : reorders) {
              // insert new generic_layer node to topology
@@ -64,9 +66,15 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
              g_node.get_output_layout(false);
              g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
          }
-        // set the old output layout and do not invalidate users as change of weights will not affect output layout
-        node.set_output_layout(output_layout, false);
      }
+
+    // Reset weights reorder params to not keep source code pointer
+    weights_reorder_params.engine = kernel_selector::generic_kernel_params::Engine::NONE;
+    weights_reorder_params.clKernel = nullptr;
+    weights_reorder_params.cpuKernel = nullptr;
+
+    // set the old output layout and do not invalidate users as change of weights will not affect output layout
+    node.set_output_layout(output_layout, false);
  }
  
  void post_optimize_weights::run(program_impl& p) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp

index 59ca913..9e18272 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
@@ -41,8 +41,10 @@ void pre_replace_deconv::run(program_impl& p) {
              if (!p.get_options().get<build_option_type::optimize_data>()->enabled())
                  continue;
  
+            auto& deconv_node = node->as<deconvolution>();
+            auto& weights_node = deconv_node.weights();
              auto deconv_prim = node->as<deconvolution>().typed_desc();
-            tensor filter_size = { 1, 1, 1, 1, 1 };
+            tensor filter_size = weights_node.get_output_layout().size;
              auto weights = deconv_prim->weights;
  
              std::vector<primitive_id> weights_vec;
@@ -52,29 +54,34 @@ void pre_replace_deconv::run(program_impl& p) {
              for (auto& weights_id : weights_vec) {
                  auto weights_iter = p.nodes_map.find(weights_id);
                  if (weights_iter == p.nodes_map.end())  continue;
-
-                auto weights_node_ptr = weights_iter->second;
-                // get filter spatial sizes for input offset adjustment, perform this only once as all filters should
-                // have same size
-                if (weights_id == weights_vec[0])
-                    filter_size = weights_node_ptr->get_output_layout().size;
              }
  
              // limit optimization to stride = 1
-            if (deconv_prim->stride.spatial[0] == 1 && deconv_prim->stride.spatial[1] == 1 && !deconv_prim->gradient()) {
+            bool unit_stride = std::all_of(deconv_prim->stride.spatial.begin(),
+                                           deconv_prim->stride.spatial.end(),
+                                           [](tensor::value_type v) { return v == 1; });
+            if (unit_stride && !deconv_prim->gradient()) {
                  primitive_id deconv_id = node->id();
                  auto& input_node = node->get_dependency(0);
-
-                // disable for 5D
-                if (cldnn::format::dimension(input_node.get_output_layout().format) == 5)
+                auto groups = deconv_node.get_groups();
+
+                bool perform_opt = false;
+                // fp16 and fp32 bfyx implementation supports transposed convolution
+                perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 &&
+                               (input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) &&
+                               !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
+                                _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16));
+                // int8/uint8 input
+                perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8) &&
+                               // imad convolution kernel limitation for groups
+                               (groups == 1 || weights_node.get_output_layout().size.feature[0] % 4 == 0 ||
+                                groups == static_cast<uint32_t>(input_node.get_output_layout().size.feature[0])) &&
+                               // no uint8/int8 3D convolution support
+                               input_node.get_output_layout().format.dimension() == 4;
+
+                if (!perform_opt)
                      continue;
  
-                // Disable for blocked formats
-                if ((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
-                    _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16)) {
-                    continue;
-                }
-
                  primitive_id input_id = deconv_prim->input[0];
  
                  // setting convolution parameters based on deconvolution params
@@ -110,27 +117,35 @@ void pre_replace_deconv::run(program_impl& p) {
                      }
                  }
                  auto rename_id = deconv_id + "_tmp";
+                auto was_output = node->is_output();
+                if (was_output) {
+                    node->set_output(false);
+                    auto& outputs = p.get_outputs();
+                    outputs.erase(std::remove(outputs.begin(), outputs.end(), node.get()), outputs.end());
+                }
                  p.rename(*node, rename_id);
  
                  // create convolution primitive
                  if (biases.size() != 0) {
                      auto conv_prim = std::make_shared<convolution>(deconv_id,
-                        input_id,
-                        weights_vec,
-                        bias_vec,
-                        stride,
-                        input_offset,
-                        tensor{ 1, 1, 1, 1 },
-                        output_padding);
+                                                                   input_id,
+                                                                   weights_vec,
+                                                                   bias_vec,
+                                                                   groups,
+                                                                   stride,
+                                                                   input_offset,
+                                                                   tensor{ 1, 1, 1, 1 },
+                                                                   output_padding);
                      p.get_or_create(conv_prim);
                  } else {
                      auto conv_prim = std::make_shared<convolution>(deconv_id,
-                        input_id,
-                        weights_vec,
-                        stride,
-                        input_offset,
-                        tensor{ 1, 1, 1, 1 },
-                        output_padding);
+                                                                   input_id,
+                                                                   weights_vec,
+                                                                   groups,
+                                                                   stride,
+                                                                   input_offset,
+                                                                   tensor{ 1, 1, 1, 1 },
+                                                                   output_padding);
                      p.get_or_create(conv_prim);
                  }
  
@@ -170,8 +185,10 @@ void pre_replace_deconv::run(program_impl& p) {
                      p.nodes_map.erase(rename_id);
                  }
  
-                update_processing_order = true;
-
+                if (was_output) {
+                    conv_node->set_output(true);
+                    p.get_outputs().push_back(conv_node);
+                }
  
                  p.mark_if_data_flow(*conv_node);
                  conv_node->recalc_output_layout(true);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp

index 66d1ce4..b48728b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -60,7 +60,7 @@ void prepare_primitive_fusing::run(program_impl& p) {
      fuse_sigmoid_mul_to_swish(p);
      fuse_simple_primitives(p);
      fuse_activations(p);
-    fuse_skip_layers(p);
+    optimize_fused_ops(p);
  }
  
  void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program_impl &p) {
@@ -230,45 +230,6 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
      }
  }
  
-void prepare_primitive_fusing::fuse_skip_layers(program_impl& p) {
-    // This loop tries fusing eltwise (sum) with deconvolution
-    auto itr = p.get_processing_order().begin();
-    while (itr != p.get_processing_order().end()) {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        program_helpers::do_for_types<eltwise>(*node, [&p](eltwise_node& node) {
-            if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
-                return;
-
-            // both inputs should be deconvolutions
-            if (!(node.input(0).is_type<deconvolution>() && node.input(1).is_type<deconvolution>())) {
-                return;
-            }
-
-            auto& to_fuse_with = node.input(0);
-            int to_fuse_index = 1;
-
-            // remove dependencies and users of elwtise that is going to be extracted
-            p.add_connection(node.input(to_fuse_index), to_fuse_with);
-            p.remove_connection(node.input(to_fuse_index), node);
-
-            p.get_processing_order().erase(&to_fuse_with);
-            p.get_processing_order().insert(&node, &to_fuse_with);
-
-            if (!node.get_fused_activations_funcs().empty()) {
-                for (size_t i = 0; i < node.get_fused_activations_funcs().size(); i++) {
-                    to_fuse_with.add_fused_activation(node.get_fused_activations_funcs()[i],
-                                                      node.get_fused_activations_params()[i]);
-                }
-            }
-            to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
-
-            p.extract_and_remove(node);
-        });
-    }
-}
-
  void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
      bool recalc_processing_order = false;
  
@@ -389,6 +350,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
  
              should_fuse |= input_data.is_type<mvn>();
  
+            should_fuse |= input_data.is_type<deconvolution>();
+
              if (!should_fuse)
                  return;
  
@@ -424,6 +387,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
  
              should_fuse |= input_data.is_type<mvn>() && mvn_supports_fusings(input_data.as<mvn>());
  
+            should_fuse |= input_data.is_type<deconvolution>();
+
              if (!should_fuse)
                  return;
  
@@ -488,6 +453,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
              should_fuse |= input_data.is_type<mvn>() && mvn_supports_fusings(input_data.as<mvn>()) &&
                             quantize_node.get_scale_shift_opt();
  
+            should_fuse |= input_data.is_type<activation>() && quantize_node.get_scale_shift_opt();
+
+            should_fuse |= input_data.is_type<deconvolution>() && quantize_node.get_scale_shift_opt() &&
+                            // fp16/fp32 optimized kernels don't support chaning data type
+                           (input_data.get_dependency(0).get_output_layout().data_type == data_types::u8 ||
+                            input_data.get_dependency(0).get_output_layout().data_type == data_types::i8 ||
+                            input_data.get_output_layout().data_type == out_layout.data_type);
+
              if (!should_fuse)
                  return;
  
@@ -507,10 +480,12 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
              auto parent2 = parents[1];
  
              bool can_fuse_parent1 = (parent1->is_type<convolution>() && conv_supports_fusings(parent1->as<convolution>())) ||
-                                    (parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>()));
+                                    (parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>())) ||
+                                    (parent1->is_type<deconvolution>());
  
              bool can_fuse_parent2 = (parent2->is_type<convolution>() && conv_supports_fusings(parent2->as<convolution>())) ||
-                                    (parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>()));
+                                    (parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>())) ||
+                                    (parent2->is_type<deconvolution>());
  
              std::vector<bool> can_fuse_parents = { can_fuse_parent1, can_fuse_parent2 };
  
@@ -577,6 +552,49 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
          p.get_processing_order().calc_processing_order(p);
  }
  
+void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end()) {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        if (!node->has_fused_primitives())
+            continue;
+
+        // TODO: try more optimizations:
+        // 1. clamp optimization
+        // 2. fuse conv bias to quantize shift
+        auto& fused_prims = node->get_fused_primitives();
+
+        // Drop relu if the next fused op is quantize with u8 output and no in_shift
+        auto fp_itr = fused_prims.begin();
+        while (fp_itr != fused_prims.end()) {
+            auto curr_itr = fp_itr++;
+            if (fp_itr == fused_prims.end())
+                break;
+
+            auto& fp = *curr_itr;
+            auto& fp_next = *fp_itr;
+
+            if (fp.node->is_type<activation>() && fp_next.node->is_type<quantize>()) {
+                auto& activation_node = fp.node->as<activation>();
+                auto& quantize_node = fp_next.node->as<quantize>();
+                bool can_skip = activation_node.get_primitive()->activation_function == activation_func::relu &&
+                                activation_node.get_primitive()->additional_params.a == 0.0f &&
+                                fp.deps.empty() &&
+                                (quantize_node.get_output_layout().data_type == data_types::u8 ||
+                                 quantize_node.get_output_layout().data_type == data_types::i8) &&
+                                quantize_node.get_scale_shift_opt() &&
+                                !quantize_node.get_need_pre_shift();
+
+                if (can_skip) {
+                    fused_prims.erase(curr_itr);
+                }
+            }
+        }
+    }
+}
+
  void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program_node* node) {
      // make sure this convolution have only 1 user and it's depth_to_space
      // make sure convolution is not an output
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp

index bb81b6d..5eb670f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@@ -145,9 +145,12 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
                  return offset;
              };
  
+            bool has_negative_scales = false;
              bool need_post_scale = false;
              bool need_post_shift = false;
              bool need_pre_shift = false;
+            auto out_dt = quantize_node.get_output_layout().data_type;
+            bool need_clamp = levels != 256 || (out_dt != data_types::u8 && out_dt != data_types::i8);
              bool per_tensor_in_scale = true;
              bool per_tensor_in_shift = true;
              bool per_tensor_in_range = true;
@@ -193,6 +196,9 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
                                      if (data_output_shift[s_offset] != 0.0f) {
                                          need_post_shift = true;
                                      }
+                                    if (data_input_scale[s_offset] < 0.0f) {
+                                        has_negative_scales = true;
+                                    }
                                  }
                              }
                          }
@@ -255,6 +261,9 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
                                      if (half_to_float(data_output_shift[s_offset]) != 0.0f) {
                                          need_post_shift = true;
                                      }
+                                    if (half_to_float(data_input_scale[s_offset]) < 0.0f) {
+                                        has_negative_scales = true;
+                                    }
                                  }
                              }
                          }
@@ -288,6 +297,10 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
                      throw std::runtime_error("prepare_quantization: Unsupported precision of quantize output values");
              }
  
+            if (has_negative_scales) {
+                return;
+            }
+
              layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
              float zero = 0.f;
              auto in_scale_prim = std::make_shared<data>(quantize_node.id() + "_in_scale", memory::attach(dummy_layout, &zero, 1));
@@ -348,6 +361,10 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) {
                  quantize_node.set_input_shift_val(in_shift_val);
              }
  
+            if (need_clamp) {
+                quantize_node.set_need_clamp();
+            }
+
              if (per_tensor_in_range) {
                  quantize_node.set_per_tensor_input_range();
                  quantize_node.set_input_lo_val(in_lo_val);
diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h

index 34f8c96..46ea009 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
@@ -200,8 +200,8 @@ private:
      void fuse_sigmoid_mul_to_swish(program_impl &p);
      void fuse_reorders(program_impl& p);
      void fuse_activations(program_impl& p);
-    void fuse_skip_layers(program_impl& p);
      void fuse_simple_primitives(program_impl &p);
+    void optimize_fused_ops(program_impl &p);
      layout_optimizer& _lo;
  };
  
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h

index 6896c98..d43f280 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
@@ -59,7 +59,7 @@ struct primitive_impl {
      virtual bool validate(const primitive_inst& instance) const = 0;
      std::string get_kernel_name() const { return _kernel_name; }
      // TODO: added a derived class for weights reordering (maybe for all static data reordering)
-    const kernel_selector::weights_reorder_params _weights_reorder_params;
+    kernel_selector::weights_reorder_params _weights_reorder_params;
      // class typed_primitive_gpu_impl override this with return false;
      virtual bool is_cpu() const { return true; }
  
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h

index c3a4204..63130af 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/program_node.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h
@@ -297,6 +297,7 @@ public:
      }
  
      const std::vector<fused_primitive_desc>& get_fused_primitives() const { return fused_prims; }
+    std::vector<fused_primitive_desc>& get_fused_primitives() { return fused_prims; }
  
      size_t get_fused_inputs_count() const {
          size_t count = 0;
diff --git a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h

index 8af7614..6e05a4f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
@@ -35,10 +35,12 @@ public:
      program_node& input(size_t index = 0) const { return get_dependency(index); }
      size_t inputs_count() const { return get_dependencies().size(); }
      bool get_scale_shift_opt() const { return scale_shift_opt; }
+    bool get_need_pre_shift() { return need_pre_shift; }
      void set_scale_shift_opt() { scale_shift_opt = true; }
      void set_need_post_scale() { need_post_scale = true; }
      void set_need_post_shift() { need_post_shift = true; }
      void set_need_pre_shift() { need_pre_shift = true; }
+    void set_need_clamp() { need_clamp = true; }
      void set_per_tensor_input_scale() { per_tensor_input_scale = true; }
      void set_per_tensor_input_shift() { per_tensor_input_shift = true; }
      void set_per_tensor_input_range() { per_tensor_input_range = true; }
@@ -57,6 +59,7 @@ public:
                                                                         need_post_scale,
                                                                         need_post_shift,
                                                                         need_pre_shift,
+                                                                       need_clamp,
                                                                         per_tensor_input_range,
                                                                         per_tensor_input_scale,
                                                                         per_tensor_input_shift,
@@ -79,6 +82,7 @@ private:
      bool need_post_scale = false;
      bool need_post_shift = false;
      bool need_pre_shift = false;
+    bool need_clamp = false;
  
      bool per_tensor_input_range = false;
      bool per_tensor_input_scale = false;
diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h

index f4e5157..f2d8353 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
+++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
@@ -205,6 +205,18 @@ inline std::string fmt_to_str(format fmt) {
              return "g_os_is_zyx_isv16_osv16";
          case format::g_os_is_yx_osv16_isv4:
              return "g_os_is_yx_osv16_isv4";
+        case format::g_os_zyx_is_osv16_isv4:
+            return "g_os_zyx_is_osv16_isv4";
+        case format::g_os_zyx_is_osv16_isv16:
+            return "g_os_zyx_is_osv16_isv16";
+        case format::g_os_zyx_is_osv16_isv32:
+            return "g_os_zyx_is_osv16_isv32";
+        case format::g_os_zyx_is_osv32_isv4:
+            return "g_os_zyx_is_osv32_isv4";
+        case format::g_os_zyx_is_osv32_isv16:
+            return "g_os_zyx_is_osv32_isv16";
+        case format::g_os_zyx_is_osv32_isv32:
+            return "g_os_zyx_is_osv32_isv32";
          default:
              return "unknown (" + std::to_string(fmt.value) + ")";
      }
diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp

index a10f926..84fd606 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -308,6 +308,8 @@ kernel_selector::weights_layout to_weights_layout(format f) {
              return kernel_selector::weights_layout::g_os_iyx_osv32;
          case format::gs_oiyx_gsv16:
              return kernel_selector::weights_layout::gs_oiyx_gsv16;
+        case format::gs_oizyx_gsv16:
+            return kernel_selector::weights_layout::gs_oizyx_gsv16;
          case format::gs_oiyx_gsv32:
              return kernel_selector::weights_layout::gs_oiyx_gsv32;
          case format::gyxio:
@@ -324,6 +326,18 @@ kernel_selector::weights_layout to_weights_layout(format f) {
              return kernel_selector::weights_layout::g_os_is_zyx_isv16_osv16;
          case format::g_os_is_yx_osv16_isv4:
              return kernel_selector::weights_layout::g_os_is_yx_osv16_isv4;
+        case format::g_os_zyx_is_osv16_isv4:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4;
+        case format::g_os_zyx_is_osv16_isv16:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16;
+        case format::g_os_zyx_is_osv16_isv32:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv16_isv32;
+        case format::g_os_zyx_is_osv32_isv4:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv4;
+        case format::g_os_zyx_is_osv32_isv16:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv16;
+        case format::g_os_zyx_is_osv32_isv32:
+            return kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32;
          default:
              throw std::invalid_argument("Unable to convert tensor layout " + fmt_to_str(f) + " to weights layout");
      }
@@ -417,6 +431,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
              return cldnn::format::g_os_iyx_osv32;
          case kernel_selector::weights_layout::gs_oiyx_gsv16:
              return cldnn::format::gs_oiyx_gsv16;
+        case kernel_selector::weights_layout::gs_oizyx_gsv16:
+            return cldnn::format::gs_oizyx_gsv16;
          case kernel_selector::weights_layout::gs_oiyx_gsv32:
              return cldnn::format::gs_oiyx_gsv32;
          case kernel_selector::weights_layout::gyxio:
@@ -433,6 +449,18 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
              return cldnn::format::g_os_is_zyx_isv16_osv16;
          case kernel_selector::weights_layout::os_is_yx_osv16_isv4:
              return cldnn::format::g_os_is_yx_osv16_isv4;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv4:
+            return cldnn::format::g_os_zyx_is_osv16_isv4;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv16:
+            return cldnn::format::g_os_zyx_is_osv16_isv16;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv16_isv32:
+            return cldnn::format::g_os_zyx_is_osv16_isv32;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv4:
+            return cldnn::format::g_os_zyx_is_osv32_isv4;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv16:
+            return cldnn::format::g_os_zyx_is_osv32_isv16;
+        case kernel_selector::weights_layout::g_os_zyx_is_osv32_isv32:
+            return cldnn::format::g_os_zyx_is_osv32_isv32;
          default:
              return cldnn::format::bfyx;
      }
@@ -487,6 +515,14 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
          new_vals[3] = align_to(vals[3], 4);
          new_vals[2] = align_to(vals[2], 8);
      }
+    if (ks_layout == kernel_selector::Tensor::bs_fs_yx_bsv16_fsv16) {
+        new_vals[0] = align_to(vals[0], 16);
+        new_vals[1] = align_to(vals[1], 16);
+    }
+    if (ks_layout == kernel_selector::Tensor::bs_fs_zyx_bsv16_fsv16) {
+        new_vals[0] = align_to(vals[0], 16);
+        new_vals[1] = align_to(vals[1], 16);
+    }
  
      for (size_t i = 0; i < vec.size(); i++) {
          const size_t tensor_index = vec.size() - 1 - i;
@@ -505,6 +541,15 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
          pitch *= (reserved_in_mem_count + lp + up);
      }
  
+    if (ks_layout == kernel_selector::Tensor::bs_fs_yx_bsv16_fsv16) {
+        vec[2].pitch = (vec[0].v * vec[1].v) * 16;
+        vec[3].pitch = vec[2].pitch * vec[2].v;
+    }
+    if (ks_layout == kernel_selector::Tensor::bs_fs_zyx_bsv16_fsv16) {
+        vec[3].pitch = (vec[0].v * vec[1].v * vec[2].v) * 16;
+        vec[4].pitch = vec[3].pitch * vec[3].v;
+    }
+
      const int feature_index =
          kernel_selector::DataTensor::Channelndex(ks_layout, kernel_selector::Tensor::DataChannelName::FEATURE);
      vec[feature_index].v /= split;
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp

index bf9ce7f..02d5dfc 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -425,7 +425,7 @@ bool layout_optimizer::convolution_b_fs_zyx_fsv16_opt(layout const &input_layout
          (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
          ((input_layout.size.feature[0] / conv->split()) % 16 == 0 || input_layout.size.feature[0] == 3) &&
          weights_layout.data_type == input_layout.data_type &&
-        (weights_layout.size.batch[0] % 16 == 0 || weights_layout.size.batch[0] % 8 == 0) &&
+        (weights_layout.size.batch[0] % 16 == 0 || (weights_layout.size.batch[0] == 8 && conv->groups > 1)) &&
          conv->dilation == tensor(1))
          return true;
      return false;
@@ -477,6 +477,12 @@ bool layout_optimizer::deconvolution_b_fs_zyx_fsv16_opt(layout const &input_layo
          (input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
          deconv->split() == 1)
          return true;
+
+    if (input_layout.format.dimension() == 5 &&
+        (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) &&
+        deconv->split() == 1)
+        return true;
+
      return false;
  }
  
@@ -489,6 +495,12 @@ bool layout_optimizer::deconvolution_b_fs_yx_fsv16_opt(layout const &input_layou
          deconv->split() == 1 &&
          (deconv->groups == 1 || (static_cast<int>(deconv->groups) == weights_layout.size.group[0])))
          return true;
+
+    if (input_layout.format.dimension() == 4 &&
+        (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8) &&
+        deconv->split() == 1)
+        return true;
+
      return false;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp

index 4d2c049..bade80b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -62,6 +62,8 @@
  
  #include "gpu/ocl_toolkit.h"
  
+#include "kernel_base.h"
+
  #include <algorithm>
  #include <fstream>
  #include <iostream>
@@ -95,6 +97,7 @@ program_impl::program_impl(engine_impl& engine_ref,
      : engine(&engine_ref),
        options(options),
        processing_order() {
+    kernel_selector::KernelBase::ResetCounter();
      set_options();
      pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
      prepare_nodes(topology);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp

deleted file mode 100644 (file)

index 2637144..0000000
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#include <cmath>
-#include <gtest/gtest.h>
-#include <algorithm>
-#include "api/memory.hpp"
-#include <api/input_layout.hpp>
-#include "api/activation_grad.hpp"
-#include <api/topology.hpp>
-#include <api/network.hpp>
-#include <api/engine.hpp>
-#include <api/data.hpp>
-#include "test_utils/test_utils.h"
-#include "test_utils/float16.h"
-
-using namespace cldnn;
-using namespace tests;
-
-TEST(activation_grad_f16_fw_gpu, basic_bfyx_all_functions)
-{
-    //  Input:
-    //  1 -2 -3  4  5
-    //  2  2  3  4 -6
-    //  3 -3  3  5  1
-    //  1  1  1 -1  1
-    //
-    //  a: 0.5, b: 2.5
-    //
-
-    const auto& engine = get_test_engine();
-
-    auto input_grad = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
-    auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
-    auto input_params = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 2, 1 } });
-    set_values(input_grad,
-    { FLOAT16(1.0f), FLOAT16(-2.0f),FLOAT16(-3.0f), FLOAT16(4.0f), FLOAT16(5.0f),
-      FLOAT16(2.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(-6.0f),
-      FLOAT16(3.0f), FLOAT16(-3.0f),FLOAT16(3.0f), FLOAT16(5.0f), FLOAT16(1.0f),
-      FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(-1.0f), FLOAT16(1.0f) });
-
-    set_values(input,
-    { FLOAT16(12.0f), FLOAT16(-22.0f), FLOAT16(-32.0f), FLOAT16(42.0f), FLOAT16(52.0f),
-        FLOAT16(22.0f),FLOAT16(22.0f), FLOAT16(32.0f), FLOAT16(42.0f), FLOAT16(-62.0f),
-        FLOAT16(32.0f), FLOAT16(-32.0f), FLOAT16(32.0f), FLOAT16(52.0f), FLOAT16(12.0f),
-        FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(12.0f), FLOAT16(-12.0f), FLOAT16(12.0f) });
-
-    std::vector<activation_grad_func> funcs = {
-        activation_grad_func::none,
-        activation_grad_func::relu,
-        activation_grad_func::relu_negative_slope,
-    };
-
-    activation_additional_params params = { 0.5f, 2.5f };
-    set_values(input_params, { FLOAT16(params.a), FLOAT16(params.b) });
-
-    for (uint8_t i = 0; i < 2; i++)
-    {
-        for (auto func : funcs)
-        {
-            topology topology(input_layout("input_grad", input_grad.get_layout()));
-            topology.add(data("input", input));
-
-            if (i == 0)
-            {
-                topology.add(activation_grad("activation_grad", "input_grad", "input", func, params));
-            }
-            else
-            {
-                topology.add(data("input_params", input_params));
-                topology.add(activation_grad("activation_grad", "input_grad", "input", "input_params", func));
-            }
-
-            network network(engine, topology);
-            network.set_input_data("input_grad", input_grad);
-            auto outputs = network.execute();
-            EXPECT_EQ(outputs.size(), size_t(1));
-            EXPECT_EQ(outputs.begin()->first, "activation_grad");
-
-            auto output_memory = outputs.at("activation_grad").get_memory();
-            auto output_layout = output_memory.get_layout();
-            auto output_ptr = output_memory.pointer<uint16_t>();
-            auto input_grad_ptr = input_grad.pointer<uint16_t>();
-            auto input_ptr = input.pointer<uint16_t>();
-
-            int y_size = output_layout.size.spatial[1];
-            int x_size = output_layout.size.spatial[0];
-            int f_size = output_layout.size.feature[0];
-            int b_size = output_layout.size.batch[0];
-            EXPECT_EQ(output_layout.format, format::bfyx);
-            EXPECT_EQ(y_size, 4);
-            EXPECT_EQ(x_size, 5);
-            EXPECT_EQ(f_size, 1);
-            EXPECT_EQ(b_size, 1);
-
-            std::vector<float> out;
-
-            for (size_t i = 0; i < output_layout.get_linear_size(); ++i)
-            {
-                switch (func)
-                {
-                case activation_grad_func::none:
-                    EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]), float16_to_float32(output_ptr[i]));
-                    break;
-                case activation_grad_func::relu:
-                    EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * (float16_to_float32(input_ptr[i]) > 0), float16_to_float32(output_ptr[i]));
-                    break;
-                case activation_grad_func::relu_negative_slope:
-                    EXPECT_FLOAT_EQ(float16_to_float32(input_grad_ptr[i]) * ((float16_to_float32(input_ptr[i]) > 0) + params.a * (float16_to_float32(input_ptr[i]) <= 0)), float16_to_float32(output_ptr[i]));
-                    break;
-                default:
-                    break;
-                }
-            }
-        }
-    }
-}
-
-TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
-{
-    //  Input:
-    //  1 -2 -3  4  5
-    //  2  2  3  4 -6
-    //  3 -3  3  5  1
-    //  1  1  1 -1  1
-    //
-    //  a: 0.5, b: 2.5
-    //
-
-    const auto& engine = get_test_engine();
-
-    auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
-    auto input_params = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
-    set_values(input_grad,
-    { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f,
-        2.0f, 2.0f, 3.0f, 4.0f, -6.0f,
-        3.0f, -3.0f, 3.0f, 5.0f, 1.0f,
-        1.0f, 1.0f, 1.0f, -1.0f, 1.0f });
-
-    set_values(input,
-    { 12.0f, -22.0f, -32.0f, 42.0f, 52.0f,
-        22.0f, 22.0f, 32.0f, 42.0f, -62.0f,
-        32.0f, -32.0f, 32.0f, 52.0f, 12.0f,
-        12.0f, 12.0f, 12.0f, -12.0f, 12.0f });
-
-    std::vector<activation_grad_func> funcs = {
-        activation_grad_func::none,
-        activation_grad_func::relu,
-        activation_grad_func::relu_negative_slope,
-    };
-
-    activation_additional_params params = { 0.5f, 2.5f };
-    set_values(input_params, { params.a, params.b });
-
-    for (uint8_t i = 0; i < 2; i++)
-    {
-        for (auto func : funcs)
-        {
-            topology topology(input_layout("input_grad", input_grad.get_layout()));
-            topology.add(data("input", input));
-
-            if (i == 0)
-            {
-                topology.add(activation_grad("activation_grad", "input_grad", "input", func, params));
-            }
-            else
-            {
-                topology.add(data("input_params", input_params));
-                topology.add(activation_grad("activation_grad", "input_grad", "input", "input_params", func));
-            }
-
-            network network(engine, topology);
-            network.set_input_data("input_grad", input_grad);
-            auto outputs = network.execute();
-            EXPECT_EQ(outputs.size(), size_t(1));
-            EXPECT_EQ(outputs.begin()->first, "activation_grad");
-
-            auto output_memory = outputs.at("activation_grad").get_memory();
-            auto output_layout = output_memory.get_layout();
-            auto output_ptr = output_memory.pointer<float>();
-            auto input_grad_ptr = input_grad.pointer<float>();
-            auto input_ptr = input.pointer<float>();
-
-            int y_size = output_layout.size.spatial[1];
-            int x_size = output_layout.size.spatial[0];
-            int f_size = output_layout.size.feature[0];
-            int b_size = output_layout.size.batch[0];
-            EXPECT_EQ(output_layout.format, format::bfyx);
-            EXPECT_EQ(y_size, 4);
-            EXPECT_EQ(x_size, 5);
-            EXPECT_EQ(f_size, 1);
-            EXPECT_EQ(b_size, 1);
-
-            std::vector<float> out;
-
-            for (size_t i = 0; i < output_layout.get_linear_size(); ++i)
-            {
-                switch (func)
-                {
-                case activation_grad_func::none:
-                    EXPECT_FLOAT_EQ(input_grad_ptr[i], output_ptr[i]);
-                    break;
-                case activation_grad_func::relu:
-                    EXPECT_FLOAT_EQ(input_grad_ptr[i] * (input_ptr[i] > 0), output_ptr[i]);
-                    break;
-                case activation_grad_func::relu_negative_slope:
-                    EXPECT_FLOAT_EQ(input_grad_ptr[i] * ((input_ptr[i] > 0) + params.a * (input_ptr[i] <= 0)), output_ptr[i]);
-                    break;
-                default:
-                    break;
-                }
-            }
-        }
-    }
-}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp

index 9678e61..37c57a6 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp
@@ -182,6 +182,280 @@ TEST(concat_gpu, mixed_input_types_5d) {
      }
  }
  
+TEST(concat_gpu, i8_optimization_with_pool) {
+    const auto& engine = get_test_engine();
+
+    auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}});
+    auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}});
+
+
+    set_values<int8_t>(input0, { 11, 12, 13,
+                         14, 12, 12,
+                         13, -14, 13,
+                         13, -13, 15,
+                         16, -16, -13,
+                         -14, 12, 11,
+                         16, -14, -13,
+                         18, -13, -15, });
+    set_values<int8_t>(input1, { 11, 12, 13,
+                         15, 12, 12,
+                         13, 14, 12,
+                         13, 13, 15,
+                         12, 14, 13,
+                         14, 17, 18,
+                         13, 14, 11,
+                         13, 13, 15 });
+
+
+    VF<int8_t> output_vec = {13, 13, 13, 13, 15, 15,
+                        16, 15, 16, 14, 13, 14,
+                        13, 14, 13, 18, 16, 18,
+                        16, 15, 16, 15, 18, 14,
+                        18, 14, -13, 15};
+
+    layout reorder_layout(data_types::i8, format::yxfb, {7, 2, 2, 1});
+    topology topology(input_layout("input0", input0.get_layout()),
+                      input_layout("input1", input1.get_layout()),
+                      pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+                      pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+                      concatenation("concat",
+                                    {"pool0", "pool1"},
+                                    concatenation::concatenation_axis::along_f,
+                                    data_types::i8,
+                                    padding{{0, 0, 0, 0}, 0}),
+                      reorder("reorder", "concat", reorder_layout));
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+    network network(engine, topology, options);
+    network.set_input_data("input0", input0);
+    network.set_input_data("input1", input1);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "reorder");
+
+    auto output_memory = outputs.at("reorder").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<int8_t>();
+
+    int y_size = output_layout.size.spatial[0];
+    int x_size = output_layout.size.spatial[1];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 7);
+    EXPECT_EQ(x_size, 2);
+    EXPECT_EQ(f_size, 2);
+    EXPECT_EQ(b_size, 1);
+
+    for (size_t x = 0; x < output_layout.count(); ++x) {
+        EXPECT_EQ(output_vec[x], output_ptr[x]);
+    }
+}
+
+TEST(concat_gpu, i8_optimization_with_conv) {
+    //  Filter : 3x2x3
+    //  Stride : 2x1
+    //  Input1  : 4x5
+    //  Input2  : 4x5
+    //  Input3  : 4x5
+    //  Concat output  : 3x4x5
+    //  Conv input  : 3x4x5
+    //  Output : 2x3
+    //
+    //  Input0:
+    //  1  2  3  -4  5
+    //  2  2  3  4  -6
+    //  -3  3  3  5  1
+    //  -1  1  1  1  -1
+    //  Input1:
+    //  5  5  3  -4  5
+    //  2  -2  5  4  6
+    //  6  1  3  5  1
+    //  1  2  -3  -4  5
+    //  Input2:
+    //  -2  1  3  2  -5
+    //  1  2  -2  4  2
+    //  3  5  3  -3  1
+    //  5  4  3  2  1
+    //
+    //  Filter:
+    //  1  2  1     1  2  1     1  2  1
+    //  2  1  2     2  1  2     2  1  2
+    //
+    //  Output:
+    // 53  54  30
+    // 52  47  37
+    const auto& engine = get_test_engine();
+
+    auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+    auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+    auto input2 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}});
+    auto weights = memory::allocate(engine, { data_types::i8, format::bfyx, { 1, 3, 3, 2 } });
+
+    set_values<int8_t>(weights, { 1, 2, 1,
+                          2, 1, 2, 1, 2, 1,
+                          2, 1, 2, 1, 2, 1,
+                          2, 1, 2 });
+
+    set_values<int8_t>(input0, {  1, 2, 3, -4, 5,
+                          2, 2, 3, 4, -6,
+                          -3, 3, 3, 5, 1,
+                          -1, 1, 1, 1, -1 });
+    set_values<int8_t>(input1, { 5, 5, 3, -4, 5,
+                         2, -2, 5, 4, 6,
+                         6, 1, 3, 5, 1,
+                         1, 2, -3, -4, 5 });
+    set_values<int8_t>(input2, {  -2, 1, 3, 2, -5,
+                          1, 2, -2, 4, 2,
+                          3, 5, 3, -3, 1,
+                          5, 4, 3, 2, 1 });
+    
+    VF<int8_t> output_vec = { 53, 54, 30, 52, 47, 37 };
+
+    
+    layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 2, 3});
+    topology topology(input_layout("input0", input0.get_layout()),
+                      input_layout("input1", input1.get_layout()),
+                      input_layout("input2", input2.get_layout()),
+                      concatenation("concat",
+                                    {"input0", "input1", "input2"},
+                                    concatenation::concatenation_axis::along_f,
+                                    data_types::i8,
+                                    padding{{0, 0, 0, 0}, 0}),
+                      data("weights", weights),
+                      convolution("conv", "concat", { "weights" }, { 1,1,1,2 }),
+                      reorder("output", "conv", reorder_layout));
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+    network network(engine, topology, options);
+    network.set_input_data("input0", input0);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.at("output").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<int8_t>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::bfyx);
+    EXPECT_EQ(y_size, 2);
+    EXPECT_EQ(x_size, 3);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (size_t x = 0; x < output_layout.count(); ++x) {
+        EXPECT_EQ(output_vec[x], output_ptr[x]);
+    }
+}
+
+TEST(concat_gpu, i8_optimization_with_pool_conv) {
+    //  Filter : 32x2x1
+    //  Input offset : 0x0x-1x0
+    //  Stride : 1x1
+    //  Input0  : 16x3x2
+    //  Input1  : 16x3x2
+    //  Output : 1x1x3
+    //
+    //  Input0:
+    // -3 6 0 2 -1 -1 6 0 5 4 1 6 2 4 0 5 
+    // -2 -1 1 0 2 3 3 3 6 2 4 7 3 6 7 -1 
+    // 7 7 5 -3 1 -1 5 4 0 3 -2 6 2 5 2 4 
+    // 5 -1 3 6 2 0 -3 -1 0 3 0 -1 1 6 1 6 
+    // 5 -2 2 -1 5 6 3 4 1 0 6 6 7 2 6 3 
+    // 6 7 -1 5 5 6 -1 0 -1 5 5 2 3 -1 -3 4 
+    //
+    //  Input1:
+    //  4 -2 0 0 6 2 0 4 6 4 4 4 -3 -1 4 -3 
+    //  1 0 -1 5 -1 1 4 2 7 7 0 2 3 4 -1 3 
+    //  7 7 2 -3 -1 5 -2 2 6 -3 0 7 0 3 3 3 
+    //  -1 0 -2 -2 7 -3 -3 -1 5 0 3 4 0 -1 2 5 
+    //  2 -1 2 -3 0 -3 -3 2 4 3 3 5 5 7 5 1 
+    //  2 2 -3 6 6 7 1 -1 -2 5 1 -1 4 5 -3 -2 
+    //
+    // Filters:
+    // -1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0,
+    // 0, -2, -2, -2, -2, -1, 2, 1, 2, -1, -1, 0, 2, -2, -2, 1,
+    // 0, -2, 0, 1, -2, -1, -2, 0, -1, -1, -2, 1, -2, 0, 1, 2,
+    // 2, 2, 2, -2, 0, 2, 1, -2, -1, -1, 0, -2, 2, -1, 2, -1
+    //
+    //  Output:
+    //  -14, -35, -10
+
+    const auto& engine = get_test_engine();
+
+    auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}});
+    auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}});
+    auto weights = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 32, 2, 1}});
+
+    set_values<int8_t>(weights, {-1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0, 0, -2, -2, -2, -2, -1, 2, 1, 2, -1, -1, 0, 2, -2, -2, 1,
+                                0, -2, 0, 1, -2, -1, -2, 0, -1, -1, -2, 1, -2, 0, 1, 2, 2, 2, 2, -2, 0, 2, 1, -2, -1, -1, 0, -2, 2, -1, 2, -1});
+
+    set_values<int8_t>(input0, {-3, 6, 0, 2, -1, -1, 6, 0, 5, 4, 1, 6, 2, 4, 0, 5,
+                                -2, -1, 1, 0, 2, 3, 3, 3, 6, 2, 4, 7, 3, 6, 7, -1,
+                                7, 7, 5, -3, 1, -1, 5, 4, 0, 3, -2, 6, 2, 5, 2, 4,
+                                5, -1, 3, 6, 2, 0, -3, -1, 0, 3, 0, -1, 1, 6, 1, 6, 
+                                5, -2, 2, -1, 5, 6, 3, 4, 1, 0, 6, 6, 7, 2, 6, 3,
+                                6, 7, -1, 5, 5, 6, -1, 0, -1, 5, 5, 2, 3, -1, -3, 4 });
+
+    set_values<int8_t>(input1, { 4, -2, 0, 0, 6, 2, 0, 4, 6, 4, 4, 4, -3, -1, 4, -3,
+                                 1, 0, -1, 5, -1, 1, 4, 2, 7, 7, 0, 2, 3, 4, -1, 3,
+                                 7, 7, 2, -3, -1, 5, -2, 2, 6, -3, 0, 7, 0, 3, 3, 3,
+                                 -1, 0, -2, -2, 7, -3, -3, -1, 5, 0, 3, 4, 0, -1, 2, 5,
+                                 2, -1, 2, -3, 0, -3, -3, 2, 4, 3, 3, 5, 5, 7, 5, 1,
+                                 2, 2, -3, 6, 6, 7, 1, -1, -2, 5, 1, -1, 4, 5, -3, -2});
+    
+    VF<int8_t> output_vec = { -14, -35, -10 };
+
+    layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 3, 1});
+    topology topology(input_layout("input0", input0.get_layout()),
+                      input_layout("input1", input1.get_layout()),
+                      pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+                      pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}),
+                      concatenation("concat",
+                                    {"pool0", "pool1"},
+                                    concatenation::concatenation_axis::along_f,
+                                    data_types::i8,
+                                    padding{{0, 0, 0, 0}, 0}),
+                      data("weights", weights),
+                      convolution("conv", "concat", {"weights"}, {1, 1, 1, 1}, {0, 0, -1, 0}),
+                      reorder("output", "conv", reorder_layout) );
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+    network network(engine, topology, options);
+    network.set_input_data("input0", input0);
+    network.set_input_data("input1", input1);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.at("output").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<int8_t>();
+
+    int y_size = output_layout.size.spatial[0];
+    int x_size = output_layout.size.spatial[1];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::bfyx);
+    EXPECT_EQ(y_size, 3);
+    EXPECT_EQ(x_size, 1);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (size_t x = 0; x < output_layout.count(); ++x) {
+        EXPECT_EQ(output_vec[x], output_ptr[x]);
+    }
+}
+
  using TestParamType_concat = ::testing::tuple<size_t,   // 0 - Input Batch size
          std::vector<size_t>,                            // 1 - Inputs Features Sizes
          size_t,                                         // 2 - Input Y Size
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp

index eda32d6..50ab7f3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
@@ -19,6 +19,7 @@
  #include "api/memory.hpp"
  #include <api/input_layout.hpp>
  #include "api/crop.hpp"
+#include <api/eltwise.hpp>
  #include <api/topology.hpp>
  #include <api/network.hpp>
  #include <api/engine.hpp>
@@ -794,3 +795,84 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) {
          }
      }
  }
+
+// batch size, input feature, crop out feature, (in_out format, crop format)
+using crop_test_params = std::tuple<size_t, size_t, size_t, std::pair<cldnn::format,cldnn::format>>;
+
+class crop_gpu : public ::testing::TestWithParam<crop_test_params> {};
+
+TEST_P(crop_gpu, pad_test) {
+    auto p = GetParam();
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = std::get<0>(p);
+    auto feature_num = std::get<1>(p);
+    auto x_size = 1;
+    auto y_size = 1;
+    auto z_size = 1;
+
+    auto crop_batch_num = batch_num;
+    auto crop_feature_num_1 = std::get<2>(p);
+    auto crop_x_size = 1;
+    auto crop_y_size = 1;
+    auto crop_z_size = 1;
+    auto feature_offset_1 = feature_num - crop_feature_num_1;
+
+    auto in_out_format = std::get<3>(p).first;
+    auto crop_format = std::get<3>(p).second;
+
+    auto input = memory::allocate(engine, { data_types::f32, in_out_format, { tensor(spatial(x_size, y_size, z_size), feature(feature_num), batch(batch_num)) } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(reorder("reorder", "input", crop_format, data_types::f32));
+    topology.add(crop("crop1", "reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size, crop_z_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0,0), batch(0)) }));
+    topology.add(reorder("out", "crop1", in_out_format, data_types::f32));
+
+    std::vector<float> input_vec; 
+    std::vector<float> res; 
+    std::vector<float> input_data;
+    std::vector<float> res_data;
+    for (size_t i = 0; i < feature_num; i++) {
+        input_data.push_back(static_cast<float>(i));
+    }
+    for (size_t i = 0; i < crop_feature_num_1; i++) {
+        res_data.push_back(input_data[feature_offset_1 + i]);
+    }
+    for (size_t i = 0; i < batch_num; i++) {
+        input_vec.insert(input_vec.end(), input_data.begin(), input_data.end());
+        res.insert(res.end(), res_data.begin(), res_data.end());
+    }
+    set_values(input, input_vec);
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+
+    network network(engine, topology, bo);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("out").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < res.size(); i++)
+        EXPECT_EQ(output_ptr[i], res[i]);
+}
+
+static std::vector<std::pair<cldnn::format,cldnn::format>> formats = {
+    std::make_pair<cldnn::format, cldnn::format>(format::bfyx, format::b_fs_yx_fsv16),
+    std::make_pair<cldnn::format, cldnn::format>(format::bfzyx, format::b_fs_zyx_fsv16),
+    std::make_pair<cldnn::format, cldnn::format>(format::bfyx, format::bs_fs_yx_bsv16_fsv16),
+    std::make_pair<cldnn::format, cldnn::format>(format::bfzyx, format::bs_fs_zyx_bsv16_fsv16),
+    };
+static std::vector<size_t> batches = {1, 8, 16, 17};
+static std::vector<size_t> in_features = {18, 24, 32};
+static std::vector<size_t> crop_features = {4, 8, 12, 17};
+
+INSTANTIATE_TEST_CASE_P(crop_test, crop_gpu,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(batches),
+                                ::testing::ValuesIn(in_features),
+                                ::testing::ValuesIn(crop_features),
+                                ::testing::ValuesIn(formats)
+                                ), );
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp

index 3a1210e..289d9c4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
@@ -28,10 +28,116 @@
  #include "test_utils/test_utils.h"
  #include "test_utils/float16.h"
  #include "api/reorder.hpp"
+#include "src/include/to_string_utils.h"
+
+namespace cldnn {
+template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
+}
  
  using namespace cldnn;
  using namespace tests;
  
+template <typename InputT>
+struct deconvolution_traits {
+    using accumulator_type = InputT;
+};
+
+template <>
+struct deconvolution_traits<uint8_t> {
+    using accumulator_type = int;
+};
+
+template <>
+struct deconvolution_traits<int8_t> {
+    using accumulator_type = int;
+};
+
+template <>
+struct deconvolution_traits<FLOAT16> {
+    using accumulator_type = float;
+};
+
+template<typename T>
+T kahan_summation(std::vector<T> &input) {
+    T sum = 0;
+    T c = 0;
+    for (T x : input) {
+        T y = x - c;
+        T t = sum + y;
+        c = (t - sum) - y;
+        sum = t;
+    }
+    return sum;
+}
+
+template <typename InputT, typename WeightsT, typename OutputT, typename AccumulatorT = typename deconvolution_traits<InputT>::accumulator_type>
+VVVF<OutputT> reference_deconvolution(
+    const VVVVF<InputT>& input,    // fyx dimensions order
+    const VVVVF<WeightsT>& weights,
+    float bias,
+    tensor stride,
+    tensor offset,
+    size_t input_f_start
+) {
+    auto ifm = weights.size();
+    auto filter_z = static_cast<int>(weights[0].size());
+    auto filter_y = static_cast<int>(weights[0][0].size());
+    auto filter_x = static_cast<int>(weights[0][0][0].size());
+
+    auto in_z = static_cast<int>(input[0].size());
+    auto in_y = static_cast<int>(input[0][0].size());
+    auto in_x = static_cast<int>(input[0][0][0].size());
+
+    auto stride_x = stride.spatial[0];
+    auto stride_y = stride.spatial[1];
+    auto stride_z = stride.spatial[2];
+
+    auto offset_x = offset.spatial[0];
+    auto offset_y = offset.spatial[1];
+    auto offset_z = offset.spatial[2];
+
+    int out_x = 2 * offset_x + (in_x - 1) * stride_x + filter_x;
+    int out_y = 2 * offset_y + (in_y - 1) * stride_y + filter_y;
+    int out_z = 2 * offset_z + (in_z - 1) * stride_z + filter_z;
+    VVVF<OutputT> output(static_cast<size_t>(out_z), VVF<OutputT>(static_cast<size_t>(out_y), VF<OutputT>(static_cast<size_t>(out_x))));
+
+    for (int oz = 0; oz < out_z; ++oz) {
+        for (int oy = 0; oy < out_y; ++oy) {
+            for (int ox = 0; ox < out_x; ++ox) {
+                VF<AccumulatorT> values;
+                for (int fz = 0; fz < filter_z; ++fz) {
+                    int iz = oz - filter_z + 1 - offset_z + fz;
+                    if (iz < 0 || iz >= in_z * stride_z || iz % stride_z != 0)
+                        continue;
+                    iz = iz / stride_z;
+
+                    for (int fy = 0; fy < filter_y; ++fy) {
+                        int iy = oy - filter_y + 1 - offset_y + fy;
+                        if (iy < 0 || iy >= in_y * stride_y || iy % stride_y != 0)
+                            continue;
+                        iy = iy / stride_y;
+
+                        for (int fx = 0; fx < filter_x; ++fx) {
+                            int ix = ox - filter_x + 1 - offset_x + fx;
+                            if (ix < 0 || ix >= in_x * stride_x || ix % stride_x != 0)
+                                continue;
+                            ix = ix / stride_x;
+
+                            for (size_t ifi = 0; ifi < ifm; ++ifi) {
+                                auto in_val = input[input_f_start + ifi][iz][iy][ix];
+                                auto wei_val = weights[ifi][filter_z - fz - 1][filter_y - fy - 1][filter_x - fx - 1];
+                                values.push_back(static_cast<AccumulatorT>(in_val) * static_cast<AccumulatorT>(wei_val));
+                            }
+                        }
+                    }
+                }
+                output[oz][oy][ox] = static_cast<OutputT>(kahan_summation<AccumulatorT>(values)) + static_cast<OutputT>(bias);
+            }
+        }
+    }
+    return output;
+}
+
  TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
      //  Filter : 2x2
      //  Input  : 2x2
@@ -2257,3 +2363,500 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1_pad
          EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]) << " index=" << i;
      }
  }
+
+struct deconvolution_random_test_params {
+    data_types input_type;
+    format::type input_format;
+    tensor input_size;
+    data_types weights_type;
+    format::type weights_format;
+    tensor weights_size;
+    tensor strides;
+    tensor input_offset;
+    bool with_bias;
+    data_types output_type;
+    cldnn::implementation_desc deconv_desc;
+
+    static std::string print_params(const testing::TestParamInfo<deconvolution_random_test_params>& param_info) {
+        auto& param = param_info.param;
+        auto to_string_neg = [](int v) {
+            if (v >= 0) {
+                return std::to_string(v);
+            } else {
+                return "m" + std::to_string(-v);
+            }
+        };
+
+        auto print_tensor = [&](const tensor& size) {
+            return to_string_neg(size.batch[0]) + "x" +
+                to_string_neg(size.feature[0]) + "x" +
+                to_string_neg(size.spatial[0]) + "x" +
+                to_string_neg(size.spatial[1]) + "x" +
+                to_string_neg(size.spatial[2]);
+        };
+
+        // construct a readable name
+        return "in_" + dt_to_str(param.input_type) +
+            "_" + fmt_to_str(param.input_format) +
+            "_" + print_tensor(param.input_size) +
+            "_wei_" + dt_to_str(param.weights_type) +
+            "_" + fmt_to_str(param.weights_format) +
+            "_" + print_tensor(param.weights_size) +
+            (param.with_bias ? "_bias" : "") +
+            "_s_" + print_tensor(param.strides) +
+            "_off_" + print_tensor(param.input_offset) +
+            "_out_" + dt_to_str(param.output_type) +
+            (!param.deconv_desc.kernel_name.empty() ? "_kernel_" + param.deconv_desc.kernel_name : "") +
+            (param.deconv_desc.output_format != format::any ? "_fmt_" + fmt_to_str(param.deconv_desc.output_format) : "");
+    }
+};
+
+template <typename T>
+struct typed_comparator {
+    static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, T ref, T val) {
+        return ::testing::internal::EqHelper<false>::Compare(lhs_expr, rhs_expr, ref, val);
+    }
+};
+
+template <>
+struct typed_comparator<float> {
+    static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, float ref, float val) {
+        return ::testing::internal::CmpHelperFloatingPointEQ<float>(lhs_expr, rhs_expr, ref, val);
+    }
+};
+
+template <>
+struct typed_comparator<FLOAT16> {
+    static ::testing::AssertionResult compare(const char* lhs_expr, const char* rhs_expr, FLOAT16 ref, FLOAT16 val) {
+        double abs_error = std::abs(0.05 * (double)ref);
+        return ::testing::internal::DoubleNearPredFormat(lhs_expr, rhs_expr, "5 percent", (double)ref, (double)val, abs_error);
+    }
+};
+
+template <typename T>
+struct type_test_ranges {
+    static constexpr int min = -1;
+    static constexpr int max = 1;
+};
+
+template <>
+struct type_test_ranges<uint8_t> {
+    static constexpr int min = 0;
+    static constexpr int max = 255;
+};
+
+template <>
+struct type_test_ranges<int8_t> {
+    static constexpr int min = -127;
+    static constexpr int max = 127;
+};
+
+#define TYPED_ASSERT_EQ(ref, val)                                                       \
+    ASSERT_PRED_FORMAT2(typed_comparator<decltype(ref)>::compare, ref, val)
+
+#define TYPED_EXPECT_EQ(ref, val)                                                       \
+    EXPECT_PRED_FORMAT2(typed_comparator<decltype(ref)>::compare, ref, val)
+
+template <typename InputT, typename WeightsT, typename OutputT>
+class deconvolution_random_test_base {
+public:
+    template <typename T>
+    void set_memory(cldnn::memory& mem, const VVVVVF<T>& data) {
+        auto ptr = mem.pointer<T>();
+
+        auto b = data.size();
+        auto f = data[0].size();
+        auto z = data[0][0].size();
+        auto y = data[0][0][0].size();
+        auto x = data[0][0][0][0].size();
+
+        for (size_t bi = 0; bi < b; ++bi) {
+            for (size_t fi = 0; fi < f; ++fi) {
+                for (size_t zi = 0; zi < z; ++zi) {
+                    for (size_t yi = 0; yi < y; ++yi) {
+                        for (size_t xi = 0; xi < x; ++xi) {
+                            auto coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+                            auto offset = mem.get_layout().get_linear_offset(coords);
+                            ptr[offset] = data[bi][fi][zi][yi][xi];
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename T>
+    void set_memory_weights(cldnn::memory& mem, const VVVVVVF<T>& data) {
+        auto ptr = mem.pointer<T>();
+
+        auto g = data.size();
+        auto b = data[0].size();
+        auto f = data[0][0].size();
+        auto z = data[0][0][0].size();
+        auto y = data[0][0][0][0].size();
+        auto x = data[0][0][0][0][0].size();
+
+        for (size_t gi = 0; gi < g; ++gi) {
+            for (size_t bi = 0; bi < b; ++bi) {
+                for (size_t fi = 0; fi < f; ++fi) {
+                    for (size_t zi = 0; zi < z; ++zi) {
+                        for (size_t yi = 0; yi < y; ++yi) {
+                            for (size_t xi = 0; xi < x; ++xi) {
+                                auto coords = cldnn::tensor(group(gi), batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+                                auto offset = mem.get_layout().get_linear_offset(coords);
+                                ptr[offset] = data[gi][bi][fi][zi][yi][xi];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename T>
+    VVVVVF<T> generate_random(cldnn::tensor size) {
+        return generate_random_5d<T>(
+            size.batch[0],
+            size.feature[0],
+            size.spatial[2],
+            size.spatial[1],
+            size.spatial[0],
+            type_test_ranges<T>::min,
+            type_test_ranges<T>::max);
+    }
+
+    template <typename T>
+    VVVVVVF<T> generate_random_weights(cldnn::tensor size) {
+        return generate_random_6d<T>(
+            size.group[0],
+            size.batch[0],
+            size.feature[0],
+            size.spatial[2],
+            size.spatial[1],
+            size.spatial[0],
+            type_test_ranges<T>::min,
+            type_test_ranges<T>::max);
+    }
+
+    void run(cldnn::engine eng, const deconvolution_random_test_params& params, cldnn::build_options build_opts) {
+        uint32_t groups = params.weights_size.group[0];
+        size_t ifm = params.weights_size.feature[0];
+        size_t ofm = params.weights_size.batch[0];
+
+        auto input_data = generate_random<InputT>(params.input_size);
+        auto weights_data = generate_random_weights<WeightsT>(params.weights_size);
+
+        auto in_layout = cldnn::layout(cldnn::type_to_data_type<InputT>::value, params.input_format, params.input_size);
+        auto wei_layout = cldnn::layout(cldnn::type_to_data_type<WeightsT>::value, params.weights_format, params.weights_size);
+
+        auto wei_mem = cldnn::memory::allocate(eng, wei_layout);
+        auto in_mem = cldnn::memory::allocate(eng, in_layout);
+
+        this->set_memory_weights(wei_mem, weights_data);
+        this->set_memory(in_mem, input_data);
+
+        auto topo = cldnn::topology(
+            cldnn::input_layout("input", in_layout),
+            cldnn::data("weights", wei_mem)
+        );
+
+        VF<OutputT> bias_data;
+
+        if (params.with_bias) {
+            auto bias_size = cldnn::tensor(feature(params.weights_size.batch[0] * params.weights_size.group[0]));
+            auto bias_lay = cldnn::layout(cldnn::type_to_data_type<OutputT>::value, cldnn::format::bfyx, bias_size);
+            auto bias_mem = cldnn::memory::allocate(eng, bias_lay);
+            bias_data = generate_random_1d<OutputT>(bias_lay.size.feature[0], -1, 1);
+            set_values(bias_mem, bias_data);
+            topo.add(cldnn::data("bias", bias_mem));
+            topo.add(cldnn::deconvolution("deconv", "input", { "weights" }, { "bias" }, groups, params.strides, params.input_offset));
+        } else {
+            topo.add(cldnn::deconvolution("deconv", "input", { "weights" }, groups, params.strides, params.input_offset));
+        }
+
+        if (!params.deconv_desc.kernel_name.empty() || params.deconv_desc.output_format != cldnn::format::any) {
+            build_opts.set_option(cldnn::build_option::force_implementations({ { "deconv", params.deconv_desc } }));
+        }
+
+        auto net = cldnn::network(eng, topo, build_opts);
+        net.set_input_data("input", in_mem);
+
+        auto result = net.execute();
+
+        std::string kernel;
+        for (auto i : net.get_primitives_info()) {
+            if (i.original_id == "deconv")
+                kernel = i.kernel_id;
+        }
+
+        auto out_mem = result.at("deconv").get_memory();
+
+        // Compare results
+        {
+            auto ptr = out_mem.pointer<OutputT>();
+
+            auto b = static_cast<size_t>(out_mem.get_layout().size.batch[0]);
+            auto of = static_cast<size_t>(out_mem.get_layout().size.feature[0]);
+
+            for (size_t bi = 0; bi < b; ++bi) {
+                for (size_t fi = 0; fi < of; ++fi) {
+                    size_t group = fi / ofm;
+                    auto reference = reference_deconvolution<InputT, WeightsT, OutputT>(
+                        input_data[bi],
+                        weights_data[group][fi % ofm],
+                        bias_data.empty() ? 0.f : static_cast<float>(bias_data[fi]),
+                        params.strides,
+                        params.input_offset,
+                        group * ifm);
+
+                    ASSERT_EQ(reference.size(), out_mem.get_layout().size.spatial[2]);
+                    ASSERT_EQ(reference[0].size(), out_mem.get_layout().size.spatial[1]);
+                    ASSERT_EQ(reference[0][0].size(), out_mem.get_layout().size.spatial[0]);
+
+                    for (size_t zi = 0; zi < reference.size(); zi++) {
+                        for (size_t yi = 0; yi < reference[0].size(); yi++) {
+                            for (size_t xi = 0; xi < reference[0][0].size(); xi++) {
+                                auto ref_val = reference[zi][yi][xi];
+                                auto out_coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0));
+                                auto out_offset = out_mem.get_layout().get_linear_offset(out_coords);
+                                auto out_val = ptr[out_offset];
+                                TYPED_ASSERT_EQ(ref_val, out_val)
+                                    << "at b=" << bi << ", f=" << fi << ", z=" << zi << ", y=" << yi << ", x=" << xi << std::endl
+                                    << "  kernel: " << kernel;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+};
+
+#undef TYPED_ASSERT_EQ
+#undef TYPED_EXPECT_EQ
+
+class deconvolution_random_test : public testing::TestWithParam<deconvolution_random_test_params> {
+protected:
+    void SetUp() override {
+        eng = get_test_engine();
+        build_opts.set_option(cldnn::build_option::optimize_data(true));
+    }
+
+    void run() {
+        auto params = GetParam();
+        switch (params.input_type) {
+        case data_types::f32:
+            run_typed_in<float>();
+            break;
+        case data_types::f16:
+            run_typed_in<FLOAT16>();
+            break;
+        case data_types::i8:
+            run_typed_in<int8_t>();
+            break;
+        case data_types::u8:
+            run_typed_in<uint8_t>();
+            break;
+        default:
+            break;
+        }
+    }
+
+    cldnn::engine eng;
+    cldnn::build_options build_opts;
+
+private:
+    template <typename InputT, typename WeightsT, typename OutputT>
+    void run_typed() {
+        auto& params = GetParam();
+        deconvolution_random_test_base<InputT, WeightsT, OutputT> test;
+        test.run(eng, params, build_opts);
+    }
+
+    template <typename InputT, typename WeightsT>
+    void run_typed_in_wei() {
+        auto& params = GetParam();
+        switch (params.output_type) {
+        case data_types::f32:
+            run_typed<InputT, WeightsT, float>();
+            break;
+        case data_types::f16:
+            run_typed<InputT, WeightsT, FLOAT16>();
+            break;
+        default:
+            break;
+        }
+    }
+
+    template <typename InputT>
+    void run_typed_in() {
+        auto& params = GetParam();
+        switch (params.weights_type) {
+        case data_types::f32:
+            run_typed_in_wei<InputT, float>();
+            break;
+        case data_types::f16:
+            run_typed_in_wei<InputT, FLOAT16>();
+            break;
+        case data_types::i8:
+            run_typed_in_wei<InputT, int8_t>();
+            break;
+        case data_types::u8:
+            run_typed_in_wei<InputT, uint8_t>();
+            break;
+        default:
+            break;
+        }
+    }
+};
+
+class deconvolution_random_test_params_generator : public std::vector<deconvolution_random_test_params> {
+public:
+    using self = deconvolution_random_test_params_generator;
+    self& add(const deconvolution_random_test_params& params) {
+        push_back(params);
+        return *this;
+    }
+
+    self& add_smoke_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        std::vector<int> batches = { 1, 2 };
+        for (auto b : batches) {
+            // 1x1
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 1, 1}, {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            // 3x3
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7}, wei_dt, format::oiyx, {15, 15, 3, 3}, {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Grouped
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7}, wei_dt, format::goiyx, tensor(group(2), batch(16), feature(4), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Depthwise
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7}, wei_dt, format::goiyx, tensor(group(16), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+
+        }
+        return *this;
+    }
+
+    self& add_smoke_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        std::vector<int> batches = { 1, 2 };
+        for (auto b : batches) {
+            // 1x1
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 1, 1, 1}, {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            // 3x3
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 15, 7, 7, 7}, wei_dt, format::oizyx, {15, 15, 3, 3, 3}, {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Grouped
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 8, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(2), batch(16), feature(4), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Depthwise
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 16, 7, 7, 7}, wei_dt, format::goizyx, tensor(group(16), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+        }
+        return *this;
+    }
+
+    self& add_extra_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        std::vector<int> batches = { 1, 2, 16 };
+        for (auto b : batches) {
+            // 1x1
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 1, 1}, {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            // 3x3
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 3}, {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Asymmetric weights
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, tensor(1), {0, 0, 0, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17}, wei_dt, format::oiyx, {41, 31, 3, 2}, {1, 1, 2, 2}, {0, 0, 0, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Uneven groups
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(1, 1)), {1, 1, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), tensor(1), {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17}, wei_dt, format::goiyx, tensor(group(3), batch(7), feature(9), spatial(3, 3)), {1, 1, 2, 2}, {0, 0, -1, -1, 0}, true, out_dt, implementation_desc{out_fmt, ""} });
+        }
+        return *this;
+    }
+
+    self& add_extra_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        std::vector<int> batches = { 1, 2, 16 };
+        for (auto b : batches) {
+            // 1x1
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 1, 1, 1}, {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            // 3x3
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 3, 3}, {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Asymmetric weights
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, tensor(1), {0, 0, 0, -1, -2}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 31, 19, 17, 11}, wei_dt, format::oizyx, {41, 31, 3, 2, 4}, {1, 1, 2, 2, 2}, {0, 0, 0, -1, -2}, true, out_dt, implementation_desc{out_fmt, ""} });
+            // Uneven groups
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), tensor(1), tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(1, 1, 1)), {1, 1, 2, 2, 2}, tensor(0), true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), tensor(1), {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+            push_back(deconvolution_random_test_params{ in_dt, in_fmt, {b, 27, 19, 17, 11}, wei_dt, format::goizyx, tensor(group(3), batch(7), feature(9), spatial(3, 3, 3)), {1, 1, 2, 2, 2}, {0, 0, -1, -1, -1}, true, out_dt, implementation_desc{out_fmt, ""} });
+        }
+        return *this;
+    }
+
+    self& add_all_2d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        return add_smoke_2d(in_dt, wei_dt, out_dt, in_fmt, out_fmt)
+            .add_extra_2d(in_dt, wei_dt, out_dt, in_fmt, out_fmt);
+    }
+
+    self& add_all_3d(data_types in_dt, data_types wei_dt, data_types out_dt, format::type in_fmt, format::type out_fmt) {
+        return add_smoke_3d(in_dt, wei_dt, out_dt, in_fmt, out_fmt)
+            .add_extra_3d(in_dt, wei_dt, out_dt, in_fmt, out_fmt);
+    }
+};
+
+TEST_P(deconvolution_random_test, basic) {
+    run();
+}
+
+INSTANTIATE_TEST_CASE_P(smoke, deconvolution_random_test, testing::ValuesIn(
+    deconvolution_random_test_params_generator()
+    .add_smoke_2d(data_types::f32, data_types::f32, data_types::f32, format::bfyx, format::any)
+    .add_smoke_3d(data_types::f32, data_types::f32, data_types::f32, format::bfzyx, format::any)
+    .add_smoke_2d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_smoke_3d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+    .add_smoke_2d(data_types::f16, data_types::f16, data_types::f16, format::bfyx, format::any)
+    .add_smoke_3d(data_types::f16, data_types::f16, data_types::f16, format::bfzyx, format::any)
+    .add_smoke_2d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_smoke_3d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+    .add_smoke_2d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_smoke_3d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+), deconvolution_random_test_params::print_params);
+
+INSTANTIATE_TEST_CASE_P(DISABLED_extended, deconvolution_random_test, testing::ValuesIn(
+    deconvolution_random_test_params_generator()
+    .add_extra_2d(data_types::f32, data_types::f32, data_types::f32, format::bfyx, format::any)
+    .add_extra_3d(data_types::f32, data_types::f32, data_types::f32, format::bfzyx, format::any)
+    .add_extra_2d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_extra_3d(data_types::f32, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+    .add_extra_2d(data_types::f16, data_types::f16, data_types::f16, format::bfyx, format::any)
+    .add_extra_3d(data_types::f16, data_types::f16, data_types::f16, format::bfzyx, format::any)
+    .add_extra_2d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_extra_3d(data_types::f16, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+    .add_extra_2d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_all_2d(data_types::u8, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+    .add_extra_3d(data_types::i8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+    .add_all_3d(data_types::u8, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::b_fs_zyx_fsv16)
+
+    .add_all_2d(data_types::i8, data_types::i8, data_types::f32, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16)
+    .add_all_2d(data_types::u8, data_types::i8, data_types::f32, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16)
+    .add_all_3d(data_types::i8, data_types::i8, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16)
+    .add_all_3d(data_types::u8, data_types::i8, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, format::bs_fs_zyx_bsv16_fsv16)
+), deconvolution_random_test_params::print_params);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp

index a90428e..31dc29d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -31,6 +31,7 @@
  #include "api/data.hpp"
  #include "api/resample.hpp"
  #include "api/mvn.hpp"
+#include "api/deconvolution.hpp"
  
  #include "test_utils/test_utils.h"
  
@@ -126,6 +127,17 @@ public:
          size_t reorders_count_fused = get_reorders_count(fused);
          size_t reorders_count_not_fused = get_reorders_count(not_fused);
  
+        std::stringstream description;
+        description << std::endl << "not fused: " << std::endl;
+        for (auto i : not_fused.get_primitives_info()) {
+            description << "  " << i.original_id << " " << i.kernel_id << std::endl;
+        }
+        description << "fused: " << std::endl;
+        for (auto i : fused.get_primitives_info()) {
+            description << "  " << i.original_id << " " << i.kernel_id << std::endl;
+        }
+        SCOPED_TRACE(description.str());
+
          // Subtract reorders count to handle execution in different layouts when input/output reorders can be added in the graph
          ASSERT_EQ(fused.get_executed_primitives().size() - reorders_count_fused, p.expected_fused_primitives);
          ASSERT_EQ(not_fused.get_executed_primitives().size() - reorders_count_not_fused, p.expected_not_fused_primitives);
@@ -350,6 +362,7 @@ public:
  #define CASE_CONV_FP32_10 {32, 16, 4, 5, 4}, {32, 32, 4, 5, 4}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
  #define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
  #define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
+#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
  
  #define CASE_CONV_FP16_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx, data_types::f16, format::bfyx
  #define CASE_CONV_FP16_2 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::os_is_yx_isv16_osv16, data_types::f16, format::bfyx
@@ -654,6 +667,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_eltwise_b_fs_zyx_fsv16,
                                  bc_test_params{CASE_CONV_FP32_9, 2, 3},
                                  bc_test_params{CASE_CONV_FP32_11, 2, 3},
                                  bc_test_params{CASE_CONV_FP32_12, 2, 3},
+                                // bc_test_params{CASE_CONV_FP32_13, 2, 3}, - leads to mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8.basic/11 test failure
  
                                  bc_test_params{CASE_CONV_FP16_6, 2, 3},
                                  bc_test_params{CASE_CONV_FP16_7, 2, 3},
@@ -1498,6 +1512,67 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_quantize_i8,
                                  bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
                          }), );
  
+class conv_int8_relu_quantize : public ConvFusingTest {};
+TEST_P(conv_int8_relu_quantize, i8) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+                 data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+                 data("out_lo", get_mem(get_single_element_layout(p), -127)),
+                 data("out_hi", get_mem(get_single_element_layout(p), 127)),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("relu", "conv_prim", activation_func::relu),
+                 quantize("quantize", "relu", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8),
+                 reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+    );
+    // Output elements are in range [-127, 127]
+    // 1.0f difference is allowed, since quantize can return different values in ref and scale_shift kernels
+    // due to big error of division (in ref kernel).
+    tolerance = 1.0f;
+    execute(p);
+}
+
+TEST_P(conv_int8_relu_quantize, u8) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("in_lo", get_mem(get_per_channel_layout(p), 0)),
+                 data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+                 data("out_lo", get_mem(get_single_element_layout(p), 0)),
+                 data("out_hi", get_mem(get_single_element_layout(p), 255)),
+                 convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
+                 activation("relu", "conv_prim", activation_func::relu),
+                 quantize("quantize", "relu", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
+                 reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+    );
+    tolerance = 1.0f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_relu_quantize,
+                        ::testing::ValuesIn(std::vector<bc_test_params>{
+                                bc_test_params{CASE_CONV_U8S8_1, 2, 4},
+                                bc_test_params{CASE_CONV_U8S8_2, 2, 4},
+                                bc_test_params{CASE_CONV_U8S8_3, 2, 4},
+                                bc_test_params{CASE_CONV_U8S8_4, 2, 4},
+                                bc_test_params{CASE_CONV_S8S8_1, 2, 4},
+                                bc_test_params{CASE_CONV_S8S8_2, 2, 4},
+                                bc_test_params{CASE_CONV_S8S8_3, 2, 4},
+                                bc_test_params{CASE_CONV_S8S8_4, 2, 4},
+
+                                bc_test_params{CASE_CONV3D_U8S8_1, 2, 4},
+                                bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
+                                bc_test_params{CASE_CONV3D_U8S8_3, 2, 4},
+                                bc_test_params{CASE_CONV3D_U8S8_4, 2, 4},
+                                bc_test_params{CASE_CONV3D_S8S8_1, 2, 4},
+                                bc_test_params{CASE_CONV3D_S8S8_2, 2, 4},
+                                bc_test_params{CASE_CONV3D_S8S8_3, 2, 4},
+                                bc_test_params{CASE_CONV3D_S8S8_4, 2, 4},
+                        }), );
+
  class conv_int8_scale_activation_quantize_i8 : public ConvFusingTest {};
  TEST_P(conv_int8_scale_activation_quantize_i8, basic) {
      auto p = GetParam();
@@ -2842,3 +2917,650 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu,
                              lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"},
                          }), );
  
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* -------------------------------- Activation cases --------------------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+struct activation_test_params {
+    tensor input_size;
+    data_types input_type;
+    format input_format;
+    activation_func activation_function;
+    activation_additional_params additional_params;
+    data_types default_type;
+    format default_format;
+    size_t expected_fused_primitives;
+    size_t expected_not_fused_primitives;
+};
+
+#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_2 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_3 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_4 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_5 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F32_6 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_1 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_2 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F32_3 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_2 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_3 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_4 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_5 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_F16_6 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_1 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_2 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_F16_3 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_U8_2 {1, 16, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+#define CASE_ACTIVATION_3D_U8_1 {1, 16, 8, 8, 8}, data_types::u8, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
+
+class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
+public:
+    void execute(activation_test_params& p) {
+        auto input_prim = get_mem(get_input_layout(p));
+
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
+
+        network_fused.set_input_data("input", input_prim);
+        network_not_fused.set_input_data("input", input_prim);
+
+        compare(network_not_fused, network_fused, p);
+    }
+
+    layout get_input_layout(activation_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; }
+
+    layout get_per_channel_layout(activation_test_params& p) {
+        return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
+    }
+    activation_func get_activation_function(activation_test_params& p) { return p.activation_function; }
+    activation_additional_params get_activation_additional_params(activation_test_params& p) { return p.additional_params; }
+    format get_input_format(activation_test_params &p) { return p.input_format; }
+};
+
+class activation_quantize_i8 : public ActivationFusingTest {};
+TEST_P(activation_quantize_i8, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                      activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
+                      data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
+                      data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
+                      data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
+                      data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
+                      quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
+                      reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
+
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    fusings_gpu,
+    activation_quantize_i8,
+    ::testing::ValuesIn(std::vector<activation_test_params>{
+        activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_4, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_5, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_6, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
+    }), );
+
+INSTANTIATE_TEST_CASE_P(
+    DISABLED_fusings_gpu,
+    activation_quantize_i8,
+    ::testing::ValuesIn(std::vector<activation_test_params>{
+        // fp16 cases
+        activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_4, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_5, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_6, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
+
+        // u8 cases
+        activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_U8_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
+    }), );
+
+class activation_opt_quantize_i8 : public ActivationFusingTest {};
+TEST_P(activation_opt_quantize_i8, basic) {
+    auto p = GetParam();
+    implementation_desc activation_impl = {get_input_format(p), "activation_opt"};
+    this->bo_fused.set_option(build_option::force_implementations({{"act", activation_impl}}));
+
+    create_topologies(input_layout("input", get_input_layout(p)),
+                      activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
+                      data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
+                      data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
+                      data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
+                      data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
+                      quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
+                      reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
+
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    fusings_gpu,
+    activation_opt_quantize_i8,
+    ::testing::ValuesIn(std::vector<activation_test_params>{
+        activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
+    }), );
+
+INSTANTIATE_TEST_CASE_P(
+    DISABLED_fusings_gpu,
+    activation_opt_quantize_i8,
+    ::testing::ValuesIn(std::vector<activation_test_params>{
+        // fp16 cases
+        activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
+
+        // u8 cases
+        activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
+        activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
+    }), );
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* --------------------------------------- Deconvolution cases ----------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+using deconv_test_params = bc_test_params;
+
+// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
+#define CASE_DECONV_FP32_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32,  format::gs_oiyx_gsv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::is_os_yx_osv16_isv16, data_types::f32, format::bfyx
+#define CASE_DECONV_FP32_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32,  format::gs_oiyx_gsv16, data_types::f32, format::bfyx
+
+#define CASE_DECONV_FP16_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f16, format::b_fs_yx_fsv16, data_types::f16,  format::gs_oiyx_gsv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::oiyx, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::is_os_yx_osv16_isv16, data_types::f16, format::bfyx
+#define CASE_DECONV_FP16_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::f16, format::b_fs_yx_fsv16, data_types::f16,  format::gs_oiyx_gsv16, data_types::f16, format::bfyx
+
+#define CASE_DECONV_S8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_S8S8_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+
+#define CASE_DECONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
+#define CASE_DECONV_U8S8_8 {1, 32, 4, 5}, {1, 32, 7, 9}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8,  format::goiyx, data_types::f32, format::bfyx
+
+// 3D
+// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
+#define CASE_DECONV_FP32_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfzyx, data_types::f32, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32,  format::gs_oizyx_gsv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfzyx, data_types::f32, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32,  format::gs_oizyx_gsv16, data_types::f32, format::bfzyx
+#define CASE_DECONV_FP32_3D_9 {16, 16, 4, 5, 3}, {16, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::is_os_zyx_osv16_isv16, data_types::f32, format::bfzyx
+
+#define CASE_DECONV_FP16_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfzyx, data_types::f16, format::oizyx, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16,  format::gs_oizyx_gsv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfzyx, data_types::f16, format::oizyx, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16,  format::gs_oizyx_gsv16, data_types::f16, format::bfzyx
+#define CASE_DECONV_FP16_3D_9 {16, 16, 4, 5, 3}, {16, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::f16, format::bs_fs_zyx_bsv16_fsv16, data_types::f16, format::is_os_zyx_osv16_isv16, data_types::f16, format::bfzyx
+
+#define CASE_DECONV_S8S8_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8,  format::goizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_S8S8_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_zyx_fsv16, data_types::i8,  format::goizyx, data_types::f32, format::bfzyx
+
+#define CASE_DECONV_U8S8_3D_1 {1, 15, 4, 5, 3}, {1, 30, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_2 {1, 16, 4, 5, 3}, {1, 32, 6, 7, 5}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_3 {1, 16, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_4 {1, 32, 4, 5, 3}, {1, 32, 4, 5, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8,  format::goizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_5 {1, 15, 4, 5, 3}, {1, 30, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_6 {1, 16, 4, 5, 3}, {1, 32, 9, 11, 7}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_7 {1, 16, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 1, 1, 1}, tensor{1, 1, 2, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8, format::oizyx, data_types::f32, format::bfzyx
+#define CASE_DECONV_U8S8_3D_8 {1, 32, 4, 5, 3}, {1, 32, 7, 9, 5}, {1, 1, 3, 3, 3}, tensor{1, 1, 2, 2, 2}, tensor{0, 0, -1, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_zyx_fsv16, data_types::i8,  format::goizyx, data_types::f32, format::bfzyx
+
+class DeconvolutionFusingTest : public ::WeightsPrimitiveFusingTest {};
+
+class deconv_actv : public DeconvolutionFusingTest {};
+TEST_P(deconv_actv, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+        activation("act", "deconv", activation_func::relu),
+        reorder("out", "act", p.default_format, data_types::f32)
+    );
+    // Need much higher tolerance because of deconvolution -> convolution optimization
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv,
+    ::testing::ValuesIn(std::vector<deconv_test_params>{
+        deconv_test_params{ CASE_DECONV_FP32_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_FP16_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_8, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_8, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_9, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 3 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_9, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 3 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 3 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 3 },
+}), );
+
+class deconv_actv_eltw_actv : public DeconvolutionFusingTest {};
+TEST_P(deconv_actv_eltw_actv, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("eltw_data", get_mem(get_output_layout(p))),
+        deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+        activation("act1", "deconv", activation_func::relu),
+        eltwise("eltw", {"act1", "eltw_data"}, eltwise_mode::sum),
+        activation("act2", "eltw", activation_func::relu),
+        reorder("out", "act2", p.default_format, data_types::f32)
+    );
+    // Need much higher tolerance because of deconvolution -> convolution optimization
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_actv_eltw_actv,
+    ::testing::ValuesIn(std::vector<deconv_test_params>{
+        // Some fusings disabled under deconvolution -> convolution optimization
+        deconv_test_params{ CASE_DECONV_FP32_1, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP16_1, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_9, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 2, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_9, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 5 },
+}), );
+
+class deconv_scale_actv_quant_i8 : public DeconvolutionFusingTest {};
+TEST_P(deconv_scale_actv_quant_i8, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("scale_data", get_mem(get_per_channel_layout(p), 1.f/p.kernel.count())),
+        data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+        data("out_lo", get_mem(get_single_element_layout(p), -127)),
+        data("out_hi", get_mem(get_single_element_layout(p), 127)),
+        deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+        scale("scale", "deconv", "scale_data"),
+        activation("actv", "scale", activation_func::softsign),
+        quantize("quant", "actv", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
+        reorder("out", "quant", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
+    ::testing::ValuesIn(std::vector<deconv_test_params>{
+        // Some fusings disabled under deconvolution -> convolution optimization
+        // Quantize fusing disabled for fp16/fp32 for performance reasons
+        // deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 }, FIXME Failure due to activation + quantization fusing
+        deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 3, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP16_1, 4, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 3, 5 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP32_3D_1, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
+        // FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
+        // add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
+        // deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
+
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
+        // deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 5 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 5 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 5 },
+}), );
+
+class deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8 : public DeconvolutionFusingTest {};
+TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("scale1_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())),
+        data("in1_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in1_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+        data("out1_lo", get_mem(get_single_element_layout(p), 0)),
+        data("out1_hi", get_mem(get_single_element_layout(p), 255)),
+        data("eltw_data", get_mem(layout(p.default_type, p.input_format, p.out_shape))),
+        data("scale2_data", get_mem(get_per_channel_layout(p), 1.f / p.kernel.count())),
+        data("in2_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in2_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+        data("out2_lo", get_mem(get_single_element_layout(p), -127)),
+        data("out2_hi", get_mem(get_single_element_layout(p), 127)),
+        deconvolution("deconv", "input", { "weights" }, p.groups, p.stride, p.pad),
+        scale("scale1", "deconv", "scale1_data"),
+        activation("actv1", "scale1", activation_func::relu),
+        quantize("quant1", "actv1", "in1_lo", "in1_hi", "out1_lo", "out1_hi", 256, data_types::u8),
+        eltwise("eltw", {"quant1", "eltw_data"}, eltwise_mode::sum, p.default_type),
+        scale("scale2", "eltw", "scale2_data"),
+        activation("actv2", "scale2", activation_func::relu),
+        quantize("quant2", "actv2", "in2_lo", "in2_hi", "out2_lo", "out2_hi", 255, data_types::i8),
+        reorder("out", "quant2", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
+    ::testing::ValuesIn(std::vector<deconv_test_params>{
+        // Some fusings disabled under deconvolution -> convolution optimization
+        // Quantize fusing disabled for fp16/fp32 for performance reasons
+        // deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 }, FIXME Failure due to activation + quantization fusing
+        deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_5, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_6, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_7, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_8, 6, 9 },
+
+        deconv_test_params{ CASE_DECONV_FP16_1, 7, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_2, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_4, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_5, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_6, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_7, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_8, 6, 9 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_8, 2, 9 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
+
+        // deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 }, FIXME Failure due to activation + quantization fusing
+        deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_5, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_6, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_7, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP32_3D_8, 6, 9 },
+        // deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
+
+        deconv_test_params{ CASE_DECONV_FP16_3D_1, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_2, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_3, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_4, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_5, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_6, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_7, 6, 9 },
+        deconv_test_params{ CASE_DECONV_FP16_3D_8, 6, 9 },
+        // deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
+
+        deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_U8S8_3D_8, 2, 9 },
+
+        deconv_test_params{ CASE_DECONV_S8S8_3D_1, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_2, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_3, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_4, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_5, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_6, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_7, 2, 9 },
+        deconv_test_params{ CASE_DECONV_S8S8_3D_8, 2, 9 },
+}), );
diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h

index 230ded1..5ab0135 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
@@ -76,6 +76,8 @@ template<typename T>
  using VVVVF = std::vector<VVVF<T>>;    // batch of 3d feature maps
  template<typename T>
  using VVVVVF = std::vector<VVVVF<T>>;    // split of bfyx filters
+template<typename T>
+using VVVVVVF = std::vector<VVVVVF<T>>;    // split of bfyx filters
  
  template<typename T>
  inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
@@ -173,6 +175,14 @@ std::vector<std::vector<std::vector<std::vector<std::vector<T>>>>> generate_rand
      return v;
  }
  
+template<typename T>
+VVVVVVF<T> generate_random_6d(size_t a, size_t b, size_t c, size_t d, size_t e, size_t f, int min, int max, int k = 8) {
+    VVVVVVF<T> v(a);
+    for (size_t i = 0; i < a; ++i)
+        v[i] = generate_random_5d<T>(b, c, d, e, f, min, max, k);
+    return v;
+}
+
  template <class T> void set_value(const cldnn::pointer<T>& ptr, uint32_t index, T value) { ptr[index] = value; }
  template <class T> T    get_value(const cldnn::pointer<T>& ptr, uint32_t index) { return ptr[index]; }
  
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.cpp

index 963f7b3..c39e525 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.cpp
@@ -44,7 +44,9 @@ gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::pp_ker_t(
      , do_sum_(false)
      , max_data_reg_idx_(31), max_unroll_(12), compute_reg_step_(1)
      , data_reg_base_idx_(0)
-    , bf16_emu_(nullptr), eltwise_injector_(nullptr)
+    , bf16_emu_(nullptr)
+    , attr_(pd->attr())
+    , jit_eltwise_injectors_(0)
  {
      using namespace types;
      using namespace Xbyak;
@@ -55,14 +57,20 @@ gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::pp_ker_t(
  
      const auto dst_md = memory_desc_wrapper(pd->dst_pd());
      dst_os_stride_ = dst_md.blk_off(0, 0, 0, 1);
-
+    bool do_depthwise_ = false;
      auto &post_ops = pd->attr()->post_ops_;
-    const int eltwise_ind = post_ops.find(primitive_kind::eltwise);
-    do_eltwise_ = eltwise_ind != -1;
-    if (do_eltwise_)
-        eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
-                this, post_ops.entry_[eltwise_ind].eltwise, true,
-                reserved_eltwise_gpr, reserved_eltwise_maskr);
+    for (int i = 0; i < post_ops.len_; i++) {
+        auto& post_op = post_ops.entry_[i];
+        if (post_op.is_eltwise()) {
+            jit_eltwise_injectors_.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(this,
+                post_op.eltwise.alg,
+                post_op.eltwise.alpha,
+                post_op.eltwise.beta,
+                true, reserved_eltwise_gpr, reserved_eltwise_maskr));
+        } else if (post_op.is_depthwise()) {
+            do_depthwise_ = true;
+        }
+    }
  
      do_sum_ = dst_data_type != data_type::f32
          && post_ops.contain(primitive_kind::sum, 0);
@@ -75,6 +83,9 @@ gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::pp_ker_t(
      if (do_bias_)
          vreg_bias = Zmm(data_reg_base_idx_++);
  
+    if (do_depthwise_)
+        vreg_dw = Zmm(data_reg_base_idx_++);
+
      vlen_ = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
  
      if (!mayiuse(avx512_core_bf16)) {
@@ -110,6 +121,8 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::generate()
      mov(reg_len, ptr[reg_param + PARAM_OFF(spatial_length)]);
      mov(reg_oc_iter, ptr[reg_param + PARAM_OFF(oc_work)]);
  
+    mov(reg_oc_offset, ptr[reg_param + PARAM_OFF(oc_offset)]);
+
      if (do_sum_)
          vbroadcastss(vreg_sum_scale, ptr[reg_param + PARAM_OFF(sum_scale)]);
  #undef PARAM_OFF
@@ -153,8 +166,38 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::generate()
              vfmadd231ps(vreg_dst(idx), vreg_prev_dst(idx), vreg_sum_scale);
          }
  
-        if (do_eltwise_)
-            eltwise_injector_->compute_vector(vreg_dst_idx(idx));
+        int eltwise_inj_idx = 0;
+        const auto& p = attr_->post_ops_;
+        for (int i = 0; i < p.len_; i++) {
+            auto& post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                jit_eltwise_injectors_[eltwise_inj_idx]->compute_vector(vreg_dst_idx(idx));
+                eltwise_inj_idx++;
+            } else if (post_op.is_depthwise()) {
+                mov(reg_dw, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                lea(reg_dw, ptr[reg_dw + reg_oc_offset]);
+
+                switch (post_op.depthwise.alg) {
+                    case alg_kind::depthwise_scale_shift: {
+                        vbroadcastss(vreg_dw, ptr[reg_dw]);
+                        vmulps(vreg_dst(idx), vreg_dst(idx), vreg_dw);
+                        mov(reg_dw, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+                        lea(reg_dw, ptr[reg_dw + reg_oc_offset]);
+                        vbroadcastss(vreg_dw, ptr[reg_dw]);
+                        vaddps(vreg_dst(idx), vreg_dst(idx), vreg_dw);
+                        break;
+                    }
+                    case alg_kind::depthwise_prelu: {
+                        vpxord(vreg_dw, vreg_dw, vreg_dw);
+                        vcmpps(kmask, vreg_dst(idx), vreg_dw, _cmp_lt_os);
+                        vbroadcastss(vreg_dw, ptr[reg_dw]);
+                        vmulps(vreg_dst(idx) | kmask, vreg_dst(idx), vreg_dw);
+                        break;
+                    }
+                    default: assert(!"unsupported depthwise algorithm");
+                }
+            }
+        }
  
          if (dst_data_type == data_type::bf16) {
              // TODO: implement store by zmm registers for bf16
@@ -230,6 +273,8 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::generate()
      if (do_bias_)
          add(reg_bias, sizeof(acc_data_t));
  
+    add(reg_oc_offset, sizeof(acc_data_t));
+
      dec(reg_oc_iter);
      jnz(oc_loop, T_NEAR); // oc_loop end
  
@@ -237,17 +282,17 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::generate()
  
      postamble();
  
-    if (do_eltwise_)
-        eltwise_injector_->prepare_table();
+    for (auto& inj : jit_eltwise_injectors_)
+        inj->prepare_table();
  
      ker_ = getCode<decltype(ker_)>();
  }
  
  template <data_type_t dst_data_type>
  void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::operator ()
-    (dst_data_t *dst, const acc_data_t *acc, const acc_data_t *bias,
+    (dst_data_t *dst, const acc_data_t *acc, const acc_data_t *bias, size_t g_offset,
          float sum_scale, size_t dst_stride_in_elements,
-        size_t acc_stride_in_elements, size_t len, bool do_parallel)
+        size_t acc_stride_in_elements, size_t len, bool do_parallel, const post_ops_t& p)
  {
      assert(ker_);
      if (len == 0)
@@ -260,7 +305,7 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::operator ()
              ker_args args;
              args.acc = acc + start_oc * acc_stride_in_elements;
              args.dst = dst + start_oc * dst_stride_in_elements;
-            args.bias = bias + start_oc;
+            args.bias = bias + start_oc + g_offset;
              args.sum_scale = sum_scale;
              args.dst_stride_in_bytes =
                  dst_stride_in_elements * sizeof(dst_data_t);
@@ -268,6 +313,8 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::pp_ker_t::operator ()
                  acc_stride_in_elements * sizeof(acc_data_t);
              args.spatial_length = len;
              args.oc_work = end_oc - start_oc;
+            args.oc_offset = (start_oc + g_offset) * sizeof(acc_data_t);
+
              ker_(&args);
          }
      });
@@ -287,6 +334,8 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::execute_forward() const {
          : nullptr;
  
      const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
+    auto src_offset = this->pd()->src_pd()->desc()->layout_desc.blocking.offset_padding;
+    auto dst_offset = this->pd()->dst_pd()->desc()->layout_desc.blocking.offset_padding;
  
      float *bias = nullptr;
      if (pd()->desc()->bias_desc.data_type == data_type::bf16) {
@@ -337,9 +386,9 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::execute_forward() const {
          for (size_t iwork = start; iwork < end; ++iwork) {
              int oh = ohb * jcp.oh_block;
              int ow = owb * jcp.ow_block;
-            const src_data_t *_src = src + (n * jcp.ngroups + g) * src_step;
+            const src_data_t *_src = src + (n * jcp.ngroups + g) * src_step + src_offset;
              const wei_data_t *_weights = weights + g * weights_g_size;
-            dst_data_t *_dst_im = dst + (n * jcp.ngroups + g) * dst_step;
+            dst_data_t *_dst_im = dst + (n * jcp.ngroups + g) * dst_step + dst_offset;
              const int h_step = nstl::min(jcp.oh_block, jcp.oh - oh);
              const int w_step = nstl::min(jcp.ow_block, jcp.ow - ow);
              if (jcp.im2col_sz) {
@@ -369,8 +418,8 @@ void gemm_bf16_convolution_fwd_t<dst_data_type>::execute_forward() const {
              if (this->pd()->is_postprocess_required()) {
                  size_t acc_str = LDC;
                  size_t dst_str = M;
-                (*pp_ker_)(dst_local, _acc, bias + g * jcp.oc,
-                    sum_scale, dst_str, acc_str, m, jcp.nthr == 1);
+                (*pp_ker_)(dst_local, _acc, bias, g * jcp.oc,
+                    sum_scale, dst_str, acc_str, m, jcp.nthr == 1, post_ops);
              }
  
              nd_iterator_step(g, jcp.ngroups, n, jcp.mb, od, jcp.od, ohb, nb_oh,
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.hpp

index 23839c1..ef17f5d 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_convolution.hpp
@@ -119,17 +119,25 @@ struct gemm_bf16_convolution_fwd_t: public cpu_primitive_t {
          }
  
          virtual bool is_gemm_conv_format() const {
-            auto const &po = this->attr()->post_ops_;
-            auto is_eltwise = [&](int idx)
-            { return po.entry_[idx].is_eltwise(); };
-            auto is_sum = [&](int idx) { return po.entry_[idx].is_sum(); };
-
-            switch (po.len_) {
-            case 0: return true; // no post_ops
-            case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
-            case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
-            default: return false;
-            }
+            auto const &p = this->attr()->post_ops_;
+            auto all_post_ops_supported = [&]() {
+                bool ok = true;
+
+                for (int i = 0; i < p.len_; i++) {
+                    ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+                }
+                return ok;
+            };
+
+            auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+            auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+            auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+            return all_post_ops_supported() &&
+                count(primitive_kind::sum) <= 1 &&
+                IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
+
+            return false;
          }
      };
  
@@ -174,12 +182,14 @@ private:
  
          ~pp_ker_t() {
              delete bf16_emu_;
-            delete eltwise_injector_;
+            for (auto inj : jit_eltwise_injectors_)
+                delete inj;
+            jit_eltwise_injectors_.clear();
          }
  
          void operator()(dst_data_t *dst, const acc_data_t *acc,
-            const acc_data_t *bias, float sum_scale,
-            size_t dst_str, size_t acc_str, size_t len, bool do_parallel);
+            const acc_data_t *bias, size_t g_offset, float sum_scale,
+            size_t dst_str, size_t acc_str, size_t len, bool do_parallel, const post_ops_t& p);
  
          size_t dst_os_stride_;
  
@@ -193,6 +203,7 @@ private:
              size_t acc_stride_in_bytes;
              size_t spatial_length;
              size_t oc_work;
+            size_t oc_offset;
          };
  
          enum {
@@ -215,10 +226,16 @@ private:
          Xbyak::Reg64 reg_dst_str = r13;
          Xbyak::Reg64 reg_acc_str = r14;
  
+        using Vmm = typename cpu_isa_traits<avx512_common>::Vmm;
+        Xbyak::Reg64 reg_oc_offset = r10;
+        Xbyak::Reg64 reg_dw = r9;
+        Xbyak::Opmask kmask = k7;
+        post_ops_t post_ops_;
+
          Xbyak::Reg64 reserved_eltwise_gpr = r10;
          Xbyak::Opmask reserved_eltwise_maskr = k2;
  
-        Xbyak::Zmm vreg_sum_scale, vreg_bias;
+        Xbyak::Zmm vreg_sum_scale, vreg_bias, vreg_dw;
  
          Xbyak::Zmm bf16_emu_reserv_1 = Xbyak::Zmm(27);
          Xbyak::Zmm bf16_emu_reserv_2 = Xbyak::Zmm(28);
@@ -237,7 +254,8 @@ private:
          int data_reg_base_idx_;
          size_t vlen_;
          bf16_emulation_t *bf16_emu_;
-        jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+        const primitive_attr_t* attr_;
+        nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> jit_eltwise_injectors_;
  
          void generate();
          int vreg_dst_idx(int iter) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_inner_product.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_inner_product.hpp

index 0e131d0..ee4675a 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_inner_product.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_bf16_inner_product.hpp
@@ -59,9 +59,7 @@ struct gemm_bf16_inner_product_fwd_t: public cpu_primitive_t {
                  && IMPLICATION(this->with_bias(), one_of(
                          desc()->bias_desc.data_type,
                          data_type::f32, data_type::bf16))
-                && attr()->post_ops_.len_ <= 1
-                && IMPLICATION(attr()->post_ops_.len_ == 1,
-                        attr()->post_ops_.entry_[0].is_eltwise())
+                && is_supported_post_ops()
                  && dense_gemm_consitency_check(src_pd(), weights_pd(),
                          dst_pd());
              if (!ok) return status::unimplemented;
@@ -73,6 +71,21 @@ struct gemm_bf16_inner_product_fwd_t: public cpu_primitive_t {
              return status::success;
          }
  
+        virtual bool is_supported_post_ops() const {
+            const auto& p = this->attr()->post_ops_;
+
+            auto all_post_ops_supported = [&]() {
+                bool ok = true;
+
+                for (int i = 0; i < p.len_; i++) {
+                    ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::eltwise, primitive_kind::depthwise);
+                }
+                return ok;
+            };
+
+            return all_post_ops_supported();
+        }
+
          bool dst_is_acc_;
  
      private:
@@ -92,10 +105,9 @@ struct gemm_bf16_inner_product_fwd_t: public cpu_primitive_t {
          , pp_kernel_(nullptr)
      {
          bool has_bias = pd()->with_bias(),
-             has_eltwise = pd()->attr()->post_ops_.len_ == 1,
-             has_scale = !pd()->attr()->output_scales_.has_default_values();
-        postops_in_ip_ = false
-                || !pd()->dst_is_acc_ || has_bias || has_eltwise || has_scale;
+            has_post_ops = pd()->attr()->post_ops_.len_ > 0,
+            has_scale = !pd()->attr()->output_scales_.has_default_values();
+        postops_in_ip_ = has_bias || has_post_ops || has_scale;
          if (postops_in_ip_) {
              if (mayiuse(avx512_core_bf16)) {
                  pp_kernel_ = new inner_product_utils::jit_pp_kernel_t<avx512_core_bf16, data_type::f32, dst_data_type>(apd);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp

index cc5c504..f88636a 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp
@@ -589,7 +589,7 @@ status_t jit_avx2_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
              if (post_op.is_eltwise()) {
                  if (post_op.eltwise.alg != alg_kind::eltwise_relu)
                      return status::unimplemented;
-            } else if (post_op.is_depthwise()) {
+            } else if (post_op.is_depthwise() || post_op.is_quantization()) {
                  return status::unimplemented;
              }
          }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp

index f2529f0..472f07f 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp
@@ -598,7 +598,7 @@ status_t jit_avx2_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
              if (post_op.is_eltwise()) {
                  if (post_op.eltwise.alg != alg_kind::eltwise_relu)
                      return status::unimplemented;
-            } else if (post_op.is_depthwise()) {
+            } else if (post_op.is_depthwise() || post_op.is_quantization()) {
                  return status::unimplemented;
              }
          }
@@ -1054,7 +1054,7 @@ status_t jit_avx2_conv_bwd_data_kernel_f32::init_conf(jit_conv_conf_t &jcp,
      if (!mayiuse(avx2)) {
          for (int i = 0; i < p.len_; i++) {
              auto &post_op = p.entry_[i];
-            if (post_op.is_depthwise()) {
+            if (post_op.is_depthwise() || post_op.is_quantization()) {
                  return status::unimplemented;
              }
          }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.cpp

index ee1b927..eecaa2e 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.cpp
@@ -102,7 +102,7 @@ void jit_avx512_core_bf16_1x1_conv_kernel::reduce_loop(int load_loop_blk,
      };
  #endif
      auto vreg_accum = [=](int i_load, int i_ur) {
-        int idx = i_ur * load_loop_blk + i_load;
+        int idx = i_ur + i_load * ur;
          assert(idx < 31);
          return Zmm(idx);
      };
@@ -213,8 +213,36 @@ void jit_avx512_core_bf16_1x1_conv_kernel::reduce_loop(int load_loop_blk,
                  }
              }
              /* Eltwise post-op */
-            if (jcp.with_eltwise)
-                eltwise_injector_->compute_vector_range(0, ur * load_loop_blk);
+            int eltwise_inj_idx = 0;
+            int depthwise_inj_idx = 0;
+            const auto& p = attr_.post_ops_;
+
+            for (int i = 0; i < p.len_; i++) {
+                auto& post_op = p.entry_[i];
+                if (post_op.is_eltwise()) {
+                    eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur * load_loop_blk);
+                    eltwise_inj_idx++;
+                } else if (post_op.is_depthwise()) {
+                    mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                    mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+                    add(reg_d_weights, reg_oc_off);
+                    add(reg_d_bias, reg_oc_off);
+
+                    for (int j = 0; j < load_loop_blk; ++j) {
+                        int start_idx = vreg_accum(j, 0).getIdx();
+                        int end_idx = start_idx + ur;
+
+                        depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                            start_idx, end_idx, reg_d_weights, reg_d_bias);
+
+                        add(reg_d_weights, jcp.oc_block * sizeof(float));
+                        add(reg_d_bias, jcp.oc_block * sizeof(float));
+                    }
+
+                    depthwise_inj_idx++;
+                }
+            }
          };
  
          auto store_output = [=](bool output_is_aligned) {
@@ -434,6 +462,24 @@ void jit_avx512_core_bf16_1x1_conv_kernel::reduce_loop(int load_loop_blk,
  
  void jit_avx512_core_bf16_1x1_conv_kernel::generate()
  {
+    const auto& p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+                this,
+                post_op.eltwise.alg,
+                post_op.eltwise.alpha,
+                post_op.eltwise.beta
+                ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+                this,
+                post_op.depthwise.alg
+                ));
+        }
+    }
+
      preamble();
  
      mov(reg_bcast_data, ptr[param1 + GET_OFF(bcast_data)]);
@@ -454,6 +500,7 @@ void jit_avx512_core_bf16_1x1_conv_kernel::generate()
          mov(reg_output_stride, ptr[param1 + GET_OFF(output_stride)]);
      }
  
+    mov(reg_oc_off, ptr[param1 + GET_OFF(oc_off)]);
      auto load_loop_body = [=](int load_loop_blk) {
          bcast_loop(load_loop_blk);
          add(reg_load_data, load_loop_blk * jcp.load_loop_load_step);
@@ -479,6 +526,7 @@ void jit_avx512_core_bf16_1x1_conv_kernel::generate()
              assert(!"invalid prop_kind");
          }
          sub(reg_load_loop_work, load_loop_blk * jcp.load_loop_iter_step);
+        add(reg_oc_off, load_loop_blk * jcp.oc_block * jcp.typesize_out);
      };
  
      const int simd_w = 16;
@@ -542,8 +590,8 @@ void jit_avx512_core_bf16_1x1_conv_kernel::generate()
  
      postamble();
  
-    if (jcp.with_eltwise)
-        eltwise_injector_->prepare_table();
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
  
      if (jcp.prop_kind == backward_weights) {
          const uint16_t dst_prm_array[32] =
@@ -561,15 +609,21 @@ bool jit_avx512_core_bf16_1x1_conv_kernel::post_ops_ok(
          jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
      const auto &p = attr.post_ops_;
  
-    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
-    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto all_post_ops_supported = [&]() {
+        bool ok = true;
  
-    switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
-    case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
-    default: return false;
-    }
+        for (int i = 0; i < p.len_; i++) {
+            ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+        }
+        return ok;
+    };
+    auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+    auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+    auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+    return all_post_ops_supported() &&
+           count(primitive_kind::sum) <= 1 &&
+           IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
  
      return false;
  }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.hpp

index 7d68126..3aebab8 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.hpp
@@ -22,6 +22,8 @@
  #include "jit_primitive_conf.hpp"
  #include "jit_uni_eltwise.hpp"
  #include "jit_avx512_core_bf16cvt.hpp"
+#include "jit_uni_depthwise.hpp"
+
  
  //#define BF16_CONV_1x1_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
  
@@ -36,13 +38,8 @@ struct jit_avx512_core_bf16_1x1_conv_kernel : public jit_generator {
              const primitive_attr_t &attr) :
      jit_generator(nullptr, ker_code_size),
      jcp(ajcp), attr_(attr)
-    , eltwise_injector_(nullptr)
      , bf16_emu_(nullptr)
      {
-        if (jcp.with_eltwise)
-            eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
-                    this, jcp.eltwise);
-
          if (!mayiuse(avx512_core_bf16))
              bf16_emu_ = new bf16_emulation_t(this,
                      bf16_emu_reserv_1, bf16_emu_reserv_2,
@@ -54,7 +51,13 @@ struct jit_avx512_core_bf16_1x1_conv_kernel : public jit_generator {
      }
  
      ~jit_avx512_core_bf16_1x1_conv_kernel() {
-        delete eltwise_injector_;
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
          delete bf16_emu_;
      }
  
@@ -98,7 +101,7 @@ struct jit_avx512_core_bf16_1x1_conv_kernel : public jit_generator {
      reg64_t reg_load_loop_work = rsi;
      reg64_t reg_reduce_loop_work = r11;
      reg64_t bcast_loop_iter = rdx;
-    reg64_t reduce_loop_iter = abi_param1;
+    reg64_t reduce_loop_iter = r13;
      reg64_t reg_reduce_pos_flag = rax;
      reg64_t reg_output_stride = r13;
      reg64_t reg_bias_data = r12;
@@ -123,8 +126,12 @@ struct jit_avx512_core_bf16_1x1_conv_kernel : public jit_generator {
      Xbyak::Zmm zmm_bias = Xbyak::Zmm(31);
  
      Xbyak::Label dst_prm_table;
+    reg64_t reg_oc_off = abi_param1;
+    reg64_t reg_d_weights = imm_addr64;
+    reg64_t reg_d_bias = r13;
  
-    jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+    nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
  
      int bcast_loop_work_offt = 0;
  #ifdef BF16_CONV_1x1_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_convolution.cpp

index a88c76e..741c468 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_1x1_convolution.cpp
@@ -193,6 +193,7 @@ const {
          } else
              p.bcast_data = src + data_blk_off(src_d, n, _icb, ih, iw);
  
+        p.oc_off = _ocb * jcp.oc_block * sizeof(dst_data_t);
          kernel_->jit_ker(&p);
      };
  
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.cpp

index a1a6b5f..cff073b 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.cpp
@@ -119,14 +119,39 @@ void jit_avx512_core_bf16_fwd_kernel::store_output(int ur_w)
          }
      }
  
-    if (jcp.with_eltwise) {
-        if (ur_w == jcp.ur_w) {
-            eltwise_injector_->compute_vector_range(0,
-                    jcp.nb_oc_blocking * jcp.ur_w);
-        } else {
-            for (int k = 0; k < jcp.nb_oc_blocking; k++)
-                eltwise_injector_->compute_vector_range(k * jcp.ur_w,
-                        k * jcp.ur_w + ur_w);
+    int eltwise_inj_idx = 0;
+    int depthwise_inj_idx = 0;
+    const auto &p = attr_.post_ops_;
+
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            if (ur_w == jcp.ur_w) {
+                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0,
+                                                        jcp.nb_oc_blocking * jcp.ur_w);
+            } else {
+                for (int k = 0; k < jcp.nb_oc_blocking; k++)
+                    eltwise_injectors[eltwise_inj_idx]->compute_vector_range(k * jcp.ur_w,
+                                                                                k * jcp.ur_w + ur_w);
+            }
+
+            eltwise_inj_idx++;
+        } else if (post_op.is_depthwise()) {
+            mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+            mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+            add(reg_d_weights, ptr[this->param1 + GET_OFF(oc_off)]);
+            add(reg_d_bias, ptr[this->param1 + GET_OFF(oc_off)]);
+
+            for (int k = 0; k < jcp.nb_oc_blocking; k++) {
+                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                        k*jcp.ur_w, k*jcp.ur_w + ur_w, reg_d_weights, reg_d_bias);
+
+                add(reg_d_weights, jcp.oc_block * sizeof(float));
+                add(reg_d_bias, jcp.oc_block * sizeof(float));
+            }
+
+            depthwise_inj_idx++;
          }
      }
  
@@ -312,6 +337,24 @@ void jit_avx512_core_bf16_fwd_kernel::compute_loop(
  
  void jit_avx512_core_bf16_fwd_kernel::generate()
  {
+    const auto &p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
      int iw = jcp.iw;
      int ow = jcp.ow;
      int ow_block = jcp.ow_block;
@@ -500,23 +543,29 @@ void jit_avx512_core_bf16_fwd_kernel::generate()
      }
      postamble();
  
-    if (jcp.with_eltwise)
-        eltwise_injector_->prepare_table();
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
  }
  
  bool jit_avx512_core_bf16_fwd_kernel::post_ops_ok(
          jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
      const auto &p = attr.post_ops_;
  
-    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
-    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto all_post_ops_supported = [&]() {
+        bool ok = true;
  
-    switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
-    case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
-    default: return false;
-    }
+        for (int i = 0; i < p.len_; i++) {
+            ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
+        }
+        return ok;
+    };
+    auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+    auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+    auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+    return all_post_ops_supported() &&
+           count(primitive_kind::sum) <= 1 &&
+           IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
  
      return false;
  }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.hpp

index 51298bf..d8a7258 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_conv_kernel.hpp
@@ -25,6 +25,7 @@
  #include "jit_primitive_conf.hpp"
  #include "jit_uni_eltwise.hpp"
  #include "jit_avx512_core_bf16cvt.hpp"
+#include "jit_uni_depthwise.hpp"
  
  //#define BF16_CONV_BWD_W_JIT_KER_USES_PERMW_TRANSPOSITION
  //#define BF16_CONV_BWD_W_DOES_NOT_USE_BARRIERS
@@ -40,12 +41,8 @@ struct jit_avx512_core_bf16_fwd_kernel : public jit_generator {
          jit_generator(nullptr, ker_code_size),
          jcp(ajcp),
          attr_(attr),
-        eltwise_injector_(nullptr),
          bf16_emu_(nullptr)
      {
-        if (jcp.with_eltwise)
-            eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
-                    this, jcp.eltwise);
          if (!mayiuse(avx512_core_bf16))
              bf16_emu_ = new bf16_emulation_t(this,
                      bf16_emu_reserv_1, bf16_emu_reserv_2,
@@ -58,7 +55,13 @@ struct jit_avx512_core_bf16_fwd_kernel : public jit_generator {
  
      ~jit_avx512_core_bf16_fwd_kernel() {
          delete bf16_emu_;
-        delete eltwise_injector_;
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
      }
  
      DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_bf16_fwd_kernel)
@@ -144,7 +147,12 @@ private:
      Xbyak::Zmm bf16_emu_reserv_5 = Xbyak::Zmm(29);
      Xbyak::Zmm bf16_emu_reserv_6 = Xbyak::Zmm(30);
  
-    jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+    reg64_t reg_d_weights = imm_addr64;
+    reg64_t reg_d_bias = reg_kj;
+
+    nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
+
      bf16_emulation_t *bf16_emu_;
  
      inline void prepare_output(int ur_w);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp

index 6ab2965..ccd1d86 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp
@@ -117,6 +117,7 @@ void _jit_avx512_core_bf16_convolution_fwd_t<dst_type>::execute_forward_1d()
              auto src_w = src + src_d.blk_off(n, g_icb, iw_s);
              auto wht_w = weights + wht_blk_off(weights_d, g, ocb);
  
+            par_conv.oc_off = g_oc * sizeof(dst_data_t);
              par_conv.src = src_w;
              par_conv.dst = dst_w;
              par_conv.filt = wht_w;
@@ -219,6 +220,7 @@ void _jit_avx512_core_bf16_convolution_fwd_t<dst_type>::execute_forward_2d()
                  auto aux_src = src_w + i_t_overflow * dilate_h * src_h_stride;
                  auto aux_wht = wht_w + i_t_overflow * wht_h_stride;
  
+                par_conv.oc_off = g_oc * sizeof(dst_data_t);
                  par_conv.src = aux_src;
                  par_conv.dst = dst_w;
                  par_conv.filt = aux_wht;
@@ -337,6 +339,7 @@ void _jit_avx512_core_bf16_convolution_fwd_t<dst_type>::execute_forward_3d()
                  auto aux_src = src_w + i_t_overflow * dilate_h * src_h_stride;
                  auto aux_wht = wht_w + i_t_overflow * wht_h_stride;
  
+                par_conv.oc_off = g_oc * sizeof(dst_data_t);
                  par_conv.src = aux_src;
                  par_conv.dst = dst_w;
                  par_conv.filt = aux_wht;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.cpp

index 9925f8c..cec8c08 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.cpp
@@ -164,11 +164,40 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::apply_filter_unrolled(
      L(iter_exit_label);
  }
  
-void jit_avx512_dw_conv_fwd_kernel_bf16::apply_activation(
+void jit_avx512_dw_conv_fwd_kernel_bf16::apply_postprocess(
          int ur_ch_blocks, int ur_w) {
-    if (this->jcp.with_eltwise) {
-        eltwise_injector_->compute_vector_range(
-                acc_idx_start, ur_w * ur_ch_blocks + acc_idx_start);
+    int eltwise_inj_idx = 0;
+    int depthwise_inj_idx = 0;
+    const auto& p = attr_.post_ops_;
+
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            int start_idx = get_acc_reg(0).getIdx();
+            int end_idx = get_acc_reg(ur_w * ur_ch_blocks).getIdx();
+
+            eltwise_injectors[eltwise_inj_idx]->compute_vector_range(start_idx, end_idx);
+            eltwise_inj_idx++;
+        } else if (post_op.is_depthwise()) {
+            mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+            mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+            add(reg_d_weights, ptr[this->param1 + GET_OFF(oc_off)]);
+            add(reg_d_bias, ptr[this->param1 + GET_OFF(oc_off)]);
+
+            for (int ch = 0; ch < ur_ch_blocks; ch++) {
+                int start_idx = get_acc_reg(ur_w * ch).getIdx();
+                int end_idx = get_acc_reg(ur_w * ch + ur_w).getIdx();
+
+                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                    start_idx, end_idx, reg_d_weights, reg_d_bias);
+
+                add(reg_d_weights, jcp.ch_block * sizeof(float));
+                add(reg_d_bias, jcp.ch_block * sizeof(float));
+            }
+
+            depthwise_inj_idx++;
+        }
      }
  }
  
@@ -246,7 +275,7 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::loop_ow(int ur_ch_blocks) {
  
          load_src(ur_ch_blocks, ur_w);
          apply_filter_unrolled(ur_ch_blocks, ur_w);
-        apply_activation(ur_ch_blocks, ur_w);
+        apply_postprocess(ur_ch_blocks, ur_w);
          store_dst(ur_ch_blocks, ur_w);
  
          add(reg_input, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
@@ -267,7 +296,7 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::loop_ow(int ur_ch_blocks) {
  
          load_src(ur_ch_blocks, ur_w);
          apply_filter(ur_ch_blocks, ur_w);
-        apply_activation(ur_ch_blocks, ur_w);
+        apply_postprocess(ur_ch_blocks, ur_w);
          store_dst(ur_ch_blocks, ur_w);
  
          add(reg_input, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
@@ -281,6 +310,24 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::loop_ow(int ur_ch_blocks) {
  }
  
  void jit_avx512_dw_conv_fwd_kernel_bf16::generate() {
+    const auto& p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+                this,
+                post_op.eltwise.alg,
+                post_op.eltwise.alpha,
+                post_op.eltwise.beta
+                ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+                this,
+                post_op.depthwise.alg
+                ));
+        }
+    }
+
      this->preamble();
  
      mov(reg_input, ptr[this->param1 + GET_OFF(src)]);
@@ -316,8 +363,8 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::generate() {
  
      this->postamble();
  
-    if (jcp.with_eltwise)
-        eltwise_injector_->prepare_table();
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
  }
  
  inline void jit_avx512_dw_conv_bwd_data_kernel_bf16::load_ddst(
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.hpp

index d9cf576..3395ad2 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.hpp
@@ -23,7 +23,7 @@
  #include "jit_generator.hpp"
  #include "jit_primitive_conf.hpp"
  #include "jit_uni_eltwise.hpp"
-
+#include "jit_uni_depthwise.hpp"
  #include "jit_avx512_core_bf16cvt.hpp"
  
  namespace mkldnn {
@@ -33,11 +33,8 @@ namespace cpu {
  struct jit_avx512_dw_conv_fwd_kernel_bf16 : public jit_generator {
      DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_dw_conv_fwd_kernel_bf16)
  
-    jit_avx512_dw_conv_fwd_kernel_bf16(jit_conv_conf_t ajcp, const primitive_attr_t&)
-        : jcp(ajcp), eltwise_injector_(nullptr), bf16_emu_(nullptr) {
-        if (jcp.with_eltwise)
-            eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
-                    this, jcp.eltwise);
+    jit_avx512_dw_conv_fwd_kernel_bf16(jit_conv_conf_t ajcp, const primitive_attr_t& attr)
+        : jcp(ajcp), attr_(attr), bf16_emu_(nullptr) {
          if (!mayiuse(avx512_core_bf16))
              bf16_emu_ = new bf16_emulation_t(this, bf16_emu_reserv_1,
                      bf16_emu_reserv_2, bf16_emu_reserv_3, bf16_emu_reserv_4,
@@ -48,11 +45,19 @@ struct jit_avx512_dw_conv_fwd_kernel_bf16 : public jit_generator {
      }
  
      ~jit_avx512_dw_conv_fwd_kernel_bf16() {
-        delete eltwise_injector_;
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+
          delete bf16_emu_;
      }
  
      jit_conv_conf_t jcp;
+    const primitive_attr_t& attr_;
      void (*jit_ker)(jit_conv_call_s *);
  
  private:
@@ -78,6 +83,8 @@ private:
      reg64_t reg_ur_w = rbp;
      reg64_t reg_ch_blocks = aux1_reg_input;
      reg64_t imm_addr64 = aux1_reg_input;
+    reg64_t reg_d_weights = imm_addr64;
+    reg64_t reg_d_bias = iter_kh;
  
      Xbyak::Zmm zmm_ker_reg = Xbyak::Zmm(0);
      Xbyak::Zmm zmm_src_reg = Xbyak::Zmm(1);
@@ -99,11 +106,12 @@ private:
      inline void load_src(int ur_ch_blocks, int ur_w);
      inline void apply_filter(int ur_ch_blocks, int ur_w);
      inline void apply_filter_unrolled(int ur_ch_blocks, int ur_w);
-    inline void apply_activation(int ur_ch_blocks, int ur_w);
+    inline void apply_postprocess(int ur_ch_blocks, int ur_w);
      inline void store_dst(int ur_ch_blocks, int ur_w);
      inline void loop_ow(int ur_ch_blocks);
  
-    jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
+    nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
  
      bf16_emulation_t *bf16_emu_;
  
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_utils.hpp

index bdc947f..0319c26 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_utils.hpp
@@ -74,34 +74,21 @@ template <cpu_isa_t isa, data_type_t kernel_dt>
  bool jit_uni_dw_conv_fwd_kernel<isa, kernel_dt>::post_ops_ok(
          jit_conv_conf_t &jcp, const primitive_attr_t &attr, bool is_bf16) {
      const auto &p = attr.post_ops_;
+    auto all_post_ops_supported = [&]() {
+        bool ok = true;
  
-    if (is_bf16) {
-        auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
-        auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
-
-        switch (p.len_) {
-            case 0: return true; // no post_ops
-            case 1: return is_eltwise(0) || is_sum(0); // sum OR eltwise
-            case 2: return is_sum(0) && is_eltwise(1); // sum -> eltwise
-            default: return false;
+        for (int i = 0; i < p.len_; i++) {
+            ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise);
          }
-    } else {
-        auto all_post_ops_supported = [&]() {
-            bool ok = true;
-
-            for (int i = 0; i < p.len_; i++) {
-                ok = ok && utils::one_of(p.entry_[i].kind, primitive_kind::sum, primitive_kind::eltwise, primitive_kind::depthwise, primitive_kind::quantization);
-            }
-            return ok;
-        };
-        auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
-        auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
-        auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
-
-        return all_post_ops_supported() &&
-               count(primitive_kind::sum) <= 1 &&
-               IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
-    }
+        return ok;
+    };
+    auto contain = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind) != -1; };
+    auto position = [&](mkldnn::impl::primitive_kind_t kind) { return p.find(kind); };
+    auto count = [&](mkldnn::impl::primitive_kind_t kind) { return p.count(kind); };
+
+    return all_post_ops_supported() &&
+           count(primitive_kind::sum) <= 1 &&
+           IMPLICATION(contain(primitive_kind::sum), position(primitive_kind::sum) == 0);
  
      return false;
  }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp

index 28368f0..9e9b66a 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
@@ -37,6 +37,32 @@ template <typename T> inline T prelu_fwd(T s_val, T w_val) {
      return s_val >= 0 ? s_val : s_val*w_val;
  }
  
+union float_raw {
+    float f;
+    unsigned short i[2];
+};
+
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+    union float_raw t = { 0 };
+    t.i[1] = bf16;
+    t.i[0] = 0;
+    return t.f;
+}
+
+static mkldnn_bfloat16_t f32tobf16(float f32) {
+    union float_raw t = { 0 };
+    t.f = f32;
+    return t.i[1];
+}
+
+inline mkldnn_bfloat16_t bf16_scale_shift_fwd(mkldnn_bfloat16_t s_val, mkldnn_bfloat16_t w_val, mkldnn_bfloat16_t b_val) {
+    return f32tobf16(bf16tof32(s_val) * bf16tof32(w_val) + bf16tof32(b_val));
+}
+
+inline mkldnn_bfloat16_t bf16_prelu_fwd(mkldnn_bfloat16_t s_val, mkldnn_bfloat16_t w_val) {
+    return s_val >= 0 ? s_val : f32tobf16(bf16tof32(s_val) * bf16tof32(w_val));
+}
+
  ref_depthwise_scalar_fwd_t::ref_depthwise_scalar_fwd_t(const alg_kind_t alg_)
          : alg(alg_) {
      using namespace alg_kind;
@@ -91,15 +117,24 @@ void ref_depthwise_fwd_t<data_type>::execute_forward() const {
          data_t b_val = bias ? bias[bias_d.off(wei_idx)] : (data_t)0;
          data_t &d_val = dst[data_off];
  
-        switch (alg_kind) {
-            case depthwise_scale_shift: d_val = scale_shift_fwd(s_val, w_val, b_val); break;
-            case depthwise_prelu: d_val = prelu_fwd(s_val, w_val); break;
-            default: assert(!"unknown depthwise alg_kind");
+        if (data_type == mkldnn_bf16) {
+            switch (alg_kind) {
+                case depthwise_scale_shift: d_val = bf16_scale_shift_fwd(s_val, w_val, b_val); break;
+                case depthwise_prelu: d_val = bf16_prelu_fwd(s_val, w_val); break;
+                default: assert(!"unknown depthwise alg_kind");
+            }
+        } else {
+            switch (alg_kind) {
+                case depthwise_scale_shift: d_val = scale_shift_fwd(s_val, w_val, b_val); break;
+                case depthwise_prelu: d_val = prelu_fwd(s_val, w_val); break;
+                default: assert(!"unknown depthwise alg_kind");
+            }
          }
      });
  }
  
  template struct ref_depthwise_fwd_t<data_type::f32>;
+template struct ref_depthwise_fwd_t<data_type::bf16>;
  
  }
  }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt

index e850b9f..019d137 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt
@@ -64,6 +64,7 @@ file(GLOB PRIM_TEST_CASES_SRC
                                test_convolution_forward_x8s8fp.cpp
                                test_convolution_forward_x8s8fp_3d.cpp
                                test_convolution_eltwise_forward_f32.cpp
+                              test_convolution_eltwise_forward_bf16.cpp
                                test_convolution_eltwise_forward_x8s8f32s32.cpp
                                test_convolution_backward_data_f32.cpp
                                test_convolution_backward_data_s16s16s32.cpp
@@ -77,6 +78,7 @@ file(GLOB PRIM_TEST_CASES_SRC
                                test_rnn_forward.cpp
                                test_roi_pooling_forward.cpp
                                test_convolution_depthwise_forward_f32.cpp
+                              test_convolution_depthwise_forward_bf16f32.cpp
                                test_convolution_depthwise_forward_x8s8f32s32.cpp
                                test_convolution_dw_conv_f32.cpp
                                test_convolution_dw_conv_u8s8s32.cpp
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_bf16f32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_bf16f32.cpp

new file mode 100644 (file)

index 0000000..50ef454
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_bf16f32.cpp
@@ -0,0 +1,87 @@
+/*******************************************************************************
+* Copyright 2020 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "mkldnn.hpp"
+#include "test_convolution_depthwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test = convolution_depthwise_test<mkldnn_bfloat16_t, mkldnn_bfloat16_t, float, float>;
+
+TEST_P(convolution_test, TestConvolution)
+{
+}
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+    mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define FMT_WEIGHTS_BLOCKED16 OIhw8i16o2i
+#define FMT_WEIGHTS_BLOCKED16_DW Goihw16g
+
+#define ENGINE mkldnn::engine::kind::cpu
+#define ALGORITHM mkldnn::convolution_direct
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
+        str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+        str), depthwise),  __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+    EXPAND_ARGS(PARAMS_CONV(depthwise_scale_shift, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(depthwise_prelu, __VA_ARGS__))
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+    test_convolution_depthwise_params_t {alg,  ENGINE, ALGORITHM, \
+    EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+    {__VA_ARGS__} }
+
+    INST_TEST_CASE(SimpleSmall,
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 2, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked16,
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
+            2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
+            2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1),
+
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 8, 13, 13, 16, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 8, 3, 3, 16, 3, 3, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
+            2, 1, 16, 13, 13, 48, 13, 13, 3, 3, 0, 0, 1, 1)
+    );
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp

index 730be03..875df06 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp
@@ -34,6 +34,13 @@ inline typename std::remove_reference<T>::type rnd_up(const T a, const U b) {
      return div_up(a, b) * b;
  }
  
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+    union float_raw t = { 0 };
+    t.i[1] = bf16;
+    t.i[0] = 0;
+    return t.f;
+}
+
  template <typename data_t_src, typename data_t_wei,
            typename data_t_acc, typename data_t_dst>
  void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
@@ -42,6 +49,7 @@ void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
          const memory &depthwise_weights, const memory &depthwise_bias)
  {
      data_t_src *src_data = (data_t_src *)src.get_data_handle();
+    memory::data_type data_type_src = data_traits<data_t_src>::data_type;
      data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
      data_t_dst *bias_data
              = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
@@ -90,8 +98,13 @@ void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
                      + oc * padded_ic_w / c.ng * c.kh * c.kw
                      + ic * c.kh * c.kw + kh * c.kw + kw;
  
-                dst_data[didx] += src_data[map_index(src_d, iidx)]
+                if (data_type_src == mkldnn_bf16) {
+                    dst_data[didx] += bf16tof32(src_data[map_index(src_d, iidx)])
+                        * bf16tof32(weights_data[map_index(weights_d, widx)]);
+                } else {
+                    dst_data[didx] += src_data[map_index(src_d, iidx)]
                          * weights_data[map_index(weights_d, widx)];
+                }
              }
  
              switch (depthwise_alg) {
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_bf16.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_bf16.cpp

new file mode 100644 (file)

index 0000000..3c3008b
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_bf16.cpp
@@ -0,0 +1,89 @@
+/*******************************************************************************
+* Copyright 2020 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+#include "test_convolution_eltwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test = convolution_eltwise_test<mkldnn_bfloat16_t, mkldnn_bfloat16_t, float, float>;
+
+TEST_P(convolution_test, TestConvolutionEltwise)
+{
+}
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+    mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_SUITE_P( \
+        str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+        str), eltwise),  __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_tanh, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_elu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_square, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_abs, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_sqrt, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_linear, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_bounded_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_soft_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_logistic, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_exp, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_swish, __VA_ARGS__))
+
+#define ELTWISE_ALPHA 0.5f
+#define ELTWISE_BETA 1.5f
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+    test_convolution_eltwise_params_t {alg,  mkldnn::engine::kind::cpu, \
+        mkldnn::convolution_direct, ELTWISE_ALPHA, ELTWISE_BETA, \
+    EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+    {__VA_ARGS__} }
+
+    INST_TEST_CASE(SimpleSmall,
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked16,
+        PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 48, 48, 20, 20, 48, 20, 20, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked16_Tail,
+        PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 47, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 1, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw8i16o2i, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+    );
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp

index 6927c79..5d8cfee 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp
@@ -23,6 +23,13 @@ using namespace mkldnn::impl::math;
  
  namespace mkldnn {
  
+static float bf16tof32(mkldnn_bfloat16_t bf16) {
+    union float_raw t = { 0 };
+    t.i[1] = bf16;
+    t.i[0] = 0;
+    return t.f;
+}
+
  template <typename data_t_src, typename data_t_wei,
            typename data_t_acc, typename data_t_dst>
  void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
@@ -31,6 +38,7 @@ void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
          float elt_alpha, float elt_beta)
  {
      data_t_src *src_data = (data_t_src *)src.get_data_handle();
+    memory::data_type data_type_src = data_traits<data_t_src>::data_type;
      data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
      data_t_dst *bias_data
              = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
@@ -75,8 +83,13 @@ void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
                      + oc * padded_ic_w / c.ng * c.kh * c.kw
                      + ic * c.kh * c.kw + kh * c.kw + kw;
  
-                dst_data[didx] += src_data[map_index(src_d, iidx)]
+                if (data_type_src == mkldnn_bf16) {
+                    dst_data[didx] += bf16tof32(src_data[map_index(src_d, iidx)])
+                        * bf16tof32(weights_data[map_index(weights_d, widx)]);
+                } else {
+                    dst_data[didx] += src_data[map_index(src_d, iidx)]
                          * weights_data[map_index(weights_d, widx)];
+                }
              }
  
              auto &d = dst_data[didx];
@@ -92,6 +105,7 @@ void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
              case eltwise_soft_relu: d = soft_relu_fwd(d); break;
              case eltwise_logistic: d = logistic_fwd(d); break;
              case eltwise_exp: d = exp_fwd(d); break;
+            case eltwise_gelu: d = gelu_fwd(d); break;
              case eltwise_clamp: d = clamp_fwd(d, elt_alpha, elt_beta); break;
              case eltwise_swish: d = swish_fwd(d, elt_alpha); break;
              default: assert(!"unknown alg_kind");
diff --git a/inference-engine/thirdparty/mkldnn.cmake b/inference-engine/thirdparty/mkldnn.cmake

index baabd04..9a82e87 100644 (file)
--- a/inference-engine/thirdparty/mkldnn.cmake
+++ b/inference-engine/thirdparty/mkldnn.cmake
@@ -127,15 +127,15 @@ if(GEMM STREQUAL "OPENBLAS")
      list(APPEND ${TARGET}_LINKER_LIBS ${BLAS_LIBRARIES})
  elseif (GEMM STREQUAL "MKL")
      ## enable cblas_gemm from mlkml package
-if(WIN32 OR APPLE)
-    detect_mkl("mklml")
-else()
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-        detect_mkl("mklml_intel")
+    if(WIN32 OR APPLE)
+        detect_mkl("mklml")
      else()
-        detect_mkl("mklml_gnu")
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+            detect_mkl("mklml_intel")
+        else()
+            detect_mkl("mklml_gnu")
+        endif()
      endif()
-endif()
      add_definitions(-DUSE_MKL -DUSE_CBLAS)
      include_directories(AFTER ${MKLINC})
      list(APPEND ${TARGET}_LINKER_LIBS ${MKLLIB})
diff --git a/inference-engine/thirdparty/movidius/CMakeLists.txt b/inference-engine/thirdparty/movidius/CMakeLists.txt

index f75dfd7..8dacbc1 100644 (file)
--- a/inference-engine/thirdparty/movidius/CMakeLists.txt
+++ b/inference-engine/thirdparty/movidius/CMakeLists.txt
@@ -20,4 +20,7 @@ if (ENABLE_MYRIAD)
          "${IE_MAIN_SOURCE_DIR}/thirdparty/movidius/mvnc"
          "${CMAKE_BINARY_DIR}/thirdparty/movidius/mvnc")
  
+    if(ENABLE_TESTS)
+        add_subdirectory(tests)
+    endif()
  endif()
diff --git a/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c b/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c

index 26bd365..a9e09c4 100644 (file)
--- a/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c
+++ b/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c
@@ -34,7 +34,12 @@ int pthread_cond_timedwait(pthread_cond_t* __cond,
          msec = __abstime->tv_sec * 1000 + __abstime->tv_nsec / 1000000;
      }
  
-    return SleepConditionVariableCS(&__cond->_cv, __mutex, (DWORD)msec);
+    // SleepConditionVariableCS returns bool=true on success.
+    if (SleepConditionVariableCS(&__cond->_cv, __mutex, (DWORD)msec))
+        return 0;
+
+    const int rc = (int)GetLastError();
+    return rc == ERROR_TIMEOUT ? ETIMEDOUT : rc;
  }
  
  int pthread_cond_broadcast(pthread_cond_t *__cond)
diff --git a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt

index a0347ba..9b997a0 100644 (file)
--- a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
+++ b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt
@@ -73,3 +73,7 @@ if(NOT WIN32)
              ${LIBUSB_LIBRARY})
  endif()
  
+if(ENABLE_TESTS AND ENABLE_MYRIAD_MVNC_TESTS)
+    add_subdirectory(tests)
+endif()
+
diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp

index 778bac7..35e0316 100644 (file)
--- a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
@@ -202,9 +202,7 @@ class WatchdogImpl {
  
      using Devices = std::list<wd_context_as_tuple>;
      Devices watchedDevices;
-    std::mutex devicesListAcc;
-    std::atomic<int> generation = {0};
-    std::atomic_bool threadRunning;
+    std::atomic_bool threadRunning {false};
  
      pthread_mutex_t routineLock;
      pthread_cond_t  wakeUpPingThread;
@@ -215,6 +213,17 @@ class WatchdogImpl {
      WatchdogImpl& operator = (const WatchdogImpl&) = delete;
      WatchdogImpl& operator = (WatchdogImpl&&) = delete;
  
+    class AutoScope {
+    public:
+        explicit AutoScope(const std::function<void()>& func) : _func(func) {}
+        ~AutoScope() { _func(); }
+
+        AutoScope(const AutoScope&) = delete;
+        AutoScope& operator=(const AutoScope&) = delete;
+    private:
+        std::function<void()> _func;
+    };
+
  private:
  
      WatchdogImpl() {
@@ -229,6 +238,10 @@ private:
          if (rc != 0) {
              throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
          }
+        AutoScope attrDestroy([&attr]{
+            if (pthread_condattr_destroy(&attr) != 0)
+                mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute.");
+        });
  
          rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
          if (rc != 0) {
@@ -372,14 +385,44 @@ public:
      }
  
   private:
+    /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for,
+    /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version,
+    /// that code could be removed.
+    void wait_for(const milliseconds sleepInterval) {
+        struct timespec timeToWait = {0, 0};
+
+        const auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
  
+#if (defined(__APPLE__) || defined(_WIN32))
+        timeToWait.tv_sec = sec.count();
+        timeToWait.tv_nsec =
+            std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+            std::chrono::nanoseconds(sec).count();
+#else
+        clock_gettime(CLOCK_MONOTONIC, &timeToWait);
+        const auto secondInNanoSeconds = 1000000000L;
+        const auto nsecSum = std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+                std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec;
+        timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds;
+        timeToWait.tv_nsec = nsecSum % secondInNanoSeconds;
+#endif // (defined(__APPLE__) || defined(_WIN32))
+
+#if defined(__APPLE__)
+        const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
+#else
+        const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
+#endif // defined(__APPLE__)
+        if (rc != 0 && rc != ETIMEDOUT) {
+            throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc));
+        }
+    }
  
      void watchdog_routine() noexcept {
          try {
              mvLog(MVLOG_INFO, "thread started\n");
  
              milliseconds sleepInterval;
-            struct timespec timeToWait = {0, 0};
+
              CustomUniqueLock lock {&routineLock};
  
              do {
@@ -416,29 +459,11 @@ public:
                  }
                  // TODO: no timer coalescing feature, to minimized thread wakes
                  sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime);
-                mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
-
-                auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
-
-#if (defined(__APPLE__) || defined(_WIN32))
-                timeToWait.tv_sec = sec.count();
-                timeToWait.tv_nsec =
-                    std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
-                    std::chrono::nanoseconds(sec).count();
-#else
-                clock_gettime(CLOCK_MONOTONIC, &timeToWait);
-                timeToWait.tv_sec += sec.count();
-                timeToWait.tv_nsec +=
-                    std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
-                    std::chrono::nanoseconds(sec).count();
-#endif // (defined(__APPLE__) || defined(_WIN32))
-
-#if defined(__APPLE__)
-                pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
-#else
-                pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
-#endif // defined(__APPLE__)
+                if (sleepInterval.count() <= 0)
+                    continue;
  
+                mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
+                wait_for(sleepInterval);
  
                  mvLog(MVLOG_DEBUG, "waiting completed in  %ld ms\n",
                        duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..dafab99
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/CMakeLists.txt
@@ -0,0 +1,74 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(CMAKE_CXX_STANDARD 11)
+
+include(${XLINK_DIR}/XLink.cmake)
+
+set(MVNC_TESTS_COMMON_INCLUDE
+        "cases"
+        "helpers"
+        )
+
+file(GLOB MVNC_TESTS_COMMON_SOURCES
+        "helpers/mvnc_test_helper.cpp"
+        "cases/mvnc_common_test_cases.cpp")
+
+set(PRIVATE_INCLUDE
+        ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/include
+        ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/
+        ${XLINK_INCLUDE}
+        ${XLINK_PLATFORM_INCLUDE}
+        ${WATCHDOG_INCLUDE})
+
+function(add_mvnc_test_target TARGET_NAME TESTS_SOURCES)
+    add_executable(${TARGET_NAME} ${TESTS_SOURCES} ${MVNC_TESTS_COMMON_SOURCES})
+
+    target_include_directories(${TARGET_NAME}
+            PUBLIC
+            "../include"
+            PRIVATE
+            ${MVNC_TESTS_COMMON_INCLUDE}
+            ${PRIVATE_INCLUDE})
+
+    if(ENABLE_MYRIAD_NO_BOOT)
+        target_compile_definitions(${TARGET_NAME} PRIVATE NO_BOOT)
+    endif()
+
+    target_compile_definitions(${TARGET_NAME}
+            PRIVATE
+            __PC__)
+
+    target_link_libraries(${TARGET_NAME} gtest gtest_main mvnc)
+    add_dependencies(${TARGET_NAME} vpu_copy_firmware)
+endfunction()
+
+################# MvncTests ###################
+
+if(ENABLE_MYRIAD_NO_BOOT)
+    set(MVNC_TESTS_SOURCES
+            mvnc_no_boot_tests.cpp
+            cases/mvnc_no_boot_test_cases.cpp
+            )
+else()
+    set(MVNC_TESTS_SOURCES
+            mvnc_tests_common.cpp
+            mvnc_tests_usb.cpp
+            cases/mvnc_usb_test_cases.cpp)
+endif()
+
+add_mvnc_test_target("MvncTests" "${MVNC_TESTS_SOURCES}")
+
+################# MvncStressTests ###################
+
+set(MVNC_STRESS_TESTS_SOURCES
+        mvnc_stress_tests.cpp
+        cases/mvnc_stress_test_cases.cpp
+        )
+add_mvnc_test_target("MvncStressTests" "${MVNC_STRESS_TESTS_SOURCES}")
+
+################# MvncUtilsTests ###################
+
+set(MVNC_UTILS_TESTS_SOURCES mvnc_utils_tests.cpp)
+add_mvnc_test_target("MvncUtilsTests" "${MVNC_UTILS_TESTS_SOURCES}")
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp

new file mode 100644 (file)

index 0000000..8a710ac
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp
@@ -0,0 +1,122 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncTestsCommon
+//------------------------------------------------------------------------------
+MvncTestsCommon::MvncTestsCommon() {
+#if !(defined(_WIN32) || defined(_WIN64))
+    // On linux we should use custom path to firmware due to another searching mechanism for library
+    strcpy(firmwarePath, "./lib");
+#else
+    firmwarePath[0] = 0;
+#endif
+}
+
+void MvncTestsCommon::SetUp() {
+    initialize_usb_boot();
+    ASSERT_NO_ERROR(setLogLevel(ncLogLevel));
+    availableDevices_ = getAmountOfDevices();
+}
+
+void MvncTestsCommon::TearDown() {
+    ncDeviceResetAll();
+}
+
+int MvncTestsCommon::setLogLevel(const mvLog_t logLevel) {
+    ncStatus_t status = ncGlobalSetOption(NC_RW_LOG_LEVEL, &logLevel,
+                                          sizeof(logLevel));
+    if (status != NC_OK) {
+        fprintf(stderr,
+                "WARNING: failed to set log level: %d with error: %d\n",
+                ncLogLevel, status);
+        return -1;
+    }
+    ncLogLevel = logLevel;
+    return 0;
+}
+
+void MvncTestsCommon::openDevices(const int devicesToBoot, ncDeviceHandle_t **deviceHandlers,
+                                  int &amountOfBooted) {
+    ASSERT_TRUE(deviceHandlers != nullptr);
+    const int availableDevices = getAmountOfDevices(NC_USB);
+    if (availableDevices < devicesToBoot) {
+        GTEST_SKIP_("Not enough devices");
+    }
+
+    amountOfBooted = 0;
+    ncDeviceDescr_t ncDeviceDesc = {};
+    ncDeviceDesc.protocol = NC_USB;
+    ncDeviceDesc.platform = NC_ANY_PLATFORM;
+
+    for (int index = 0; index < devicesToBoot; ++index) {
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, watchdogInterval, firmwarePath));
+        ASSERT_TRUE(deviceHandlers[index] != nullptr);
+        ++amountOfBooted;
+    }
+    ASSERT_EQ(amountOfBooted, devicesToBoot) << "Not all devices was loaded";
+}
+
+void MvncTestsCommon::bootOneDevice(ncDeviceProtocol_t deviceProtocol) {
+    if (deviceProtocol == NC_PCIE) {
+        GTEST_FATAL_FAILURE_("Boot doesn't supported for PCIe protocol\n");
+    }
+    ASSERT_NO_ERROR(ncDeviceLoadFirmware(NC_ANY_PLATFORM, firmwarePath));
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncOpenDevice
+//------------------------------------------------------------------------------
+void MvncOpenDevice::SetUp() {
+    MvncTestsCommon::SetUp();
+
+    _deviceProtocol = GetParam();
+    availableDevices_ = getAmountOfDevices(_deviceProtocol);
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncLoggingTests
+//------------------------------------------------------------------------------
+void MvncLoggingTests::SetUp() {
+    MvncOpenDevice::SetUp();
+
+    _deviceDesc.protocol = _deviceProtocol;
+    _deviceDesc.platform = NC_ANY_PLATFORM;
+
+    for (int index = 0; index < availableDevices_; ++index) {
+        ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, watchdogInterval, firmwarePath));
+    }
+
+    setbuf(stdout, buff);
+    fprintf(stdout, "[workaround for getting full content from XLink]\n");
+}
+
+void MvncLoggingTests::TearDown() {
+    setbuf(stdout, NULL);
+    for (int index = 0; index < availableDevices_; ++index) {
+        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index]));
+    }
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncGraphAllocations
+//------------------------------------------------------------------------------
+void MvncGraphAllocations::SetUp() {
+    MvncOpenDevice::SetUp();
+
+    // Load blob
+    blobLoaded = readBINFile(blobPath, _blob);
+    if (!blobLoaded) {
+        std::cout << blobPath << " blob for test not found\n";
+    }
+}
+
+void MvncGraphAllocations::TearDown() {
+    for (int index = 0; index < _bootedDevices; ++index) {
+        ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index]));
+    }
+    _bootedDevices = 0;
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h

new file mode 100644 (file)

index 0000000..f2add94
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h
@@ -0,0 +1,155 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if (defined(_WIN32) || defined(_WIN64))
+#include "windows.h"
+#endif
+
+#include <gtest/gtest.h>
+#include <chrono>
+
+#include "mvnc.h"
+#include "mvnc_ext.h"
+#include "XLinkLog.h"
+#include "mvnc_test_helper.h"
+
+//------------------------------------------------------------------------------
+//      Helpers
+//------------------------------------------------------------------------------
+constexpr std::chrono::seconds operator "" _sec(unsigned long long s)
+{
+    return std::chrono::seconds(s);
+}
+
+//------------------------------------------------------------------------------
+//      class MvncTestsCommon
+//------------------------------------------------------------------------------
+class MvncTestsCommon : public ::testing::Test {
+public:
+    char    firmwarePath[MAX_PATH]  = {};
+    mvLog_t ncLogLevel              = MVLOG_INFO;
+    int     watchdogInterval        = 1000;
+    int     availableDevices_       = 0;
+
+    ~MvncTestsCommon() override = default;
+    MvncTestsCommon();
+protected:
+
+    void SetUp() override;
+    void TearDown() override;
+
+public:
+    int setLogLevel(const mvLog_t logLevel);
+
+    /**
+     * @brief Boot and open selected amount of device
+     * @param[out] amountOfBooted Amount of device which was booted
+     * @param[out] deviceHandlers Pre-allocated array for handlers
+     */
+    void openDevices(const int devicesToBoot, ncDeviceHandle_t** deviceHandlers,
+                     int& amountOfBooted);
+
+    /**
+     * @brief Load firmware to device
+     * @warning Only USB devices is supported
+     */
+    virtual void bootOneDevice(ncDeviceProtocol_t deviceProtocol= NC_USB);
+};
+
+//------------------------------------------------------------------------------
+//      class MvncOpenDevice
+//------------------------------------------------------------------------------
+class MvncOpenDevice :  public MvncTestsCommon,
+                        public testing::WithParamInterface<ncDeviceProtocol_t> {
+protected:
+    ncDeviceProtocol_t _deviceProtocol = NC_ANY_PROTOCOL;
+
+    ~MvncOpenDevice() override = default;
+    void SetUp() override;
+
+};
+
+//------------------------------------------------------------------------------
+//      class MvncLoggingTests
+//------------------------------------------------------------------------------
+class MvncLoggingTests :  public MvncOpenDevice {
+public:
+    char buff[BUFSIZ] = {};
+protected:
+    ncDeviceHandle_t * _deviceHandles[MAX_DEVICES] = {nullptr};
+    ncDeviceDescr_t _deviceDesc = {};
+
+    void SetUp() override;
+    void TearDown() override;
+    ~MvncLoggingTests() override = default;
+};
+
+//------------------------------------------------------------------------------
+//      class MvncGraphAllocations
+//------------------------------------------------------------------------------
+/**
+ * @brief Test transfer data from host to device
+ * @detail Allocate 2 devices and test some graph allocate cases
+ * @warning For correct testing should be used blob with size more than 30mb
+ */
+class MvncGraphAllocations: public MvncOpenDevice {
+public:
+    // Devices
+    ncDeviceHandle_t * _deviceHandle[MAX_DEVICES] = {nullptr};
+    int _bootedDevices = 0;
+
+    // Graphs
+    ncGraphHandle_t*  _graphHandle[MAX_DEVICES] = {nullptr};
+
+    // Blob
+    const std::string blobPath = "bvlc_googlenet_fp16.blob";
+    std::vector<char> _blob;
+    bool blobLoaded = false;
+
+protected:
+    void SetUp() override;
+    void TearDown() override;
+    ~MvncGraphAllocations() override = default;
+};
+
+//------------------------------------------------------------------------------
+//      class MvncCloseDevice
+//------------------------------------------------------------------------------
+class MvncCloseDevice : public MvncTestsCommon {
+protected:
+    ~MvncCloseDevice() override = default;
+};
+
+//------------------------------------------------------------------------------
+//      Parametric tests initialization
+//------------------------------------------------------------------------------
+static const std::vector<ncDeviceProtocol_t> myriadProtocols = {
+        NC_USB,
+        NC_PCIE
+};
+
+static const std::vector<ncDevicePlatform_t> myriadPlatforms = {
+        NC_MYRIAD_2,
+        NC_MYRIAD_X
+};
+
+
+namespace {
+    /**
+     * @brief   Converter from enum to string
+     */
+    struct PrintToStringParamName {
+        std::string operator()(
+                const ::testing::TestParamInfo<ncDeviceProtocol_t> &info) const {
+            return ncProtocolToStr(info.param);
+        }
+
+        std::string operator()(
+                const ::testing::TestParamInfo<ncDevicePlatform_t> &info) const {
+            return std::string("USB_") + ncPlatformToStr(info.param);
+        }
+    };
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.cpp

new file mode 100644 (file)

index 0000000..adfef69
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.cpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_no_boot_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncNoBootTests
+//------------------------------------------------------------------------------
+void MvncNoBootTests::bootOneDevice() {
+    // In case already booted device exist, do nothing
+    if (getAmountOfBootedDevices() == 0) {
+        MvncTestsCommon::bootOneDevice(NC_USB);
+    }
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncNoBootOpenDevice
+//------------------------------------------------------------------------------
+void MvncNoBootOpenDevice::SetUp() {
+    MvncNoBootTests::SetUp();
+    available_devices = getAmountOfDevices(NC_USB);
+    ASSERT_TRUE(available_devices > 0);
+
+    // With NO_BOOT option we should boot device with firmware before trying to open
+    bootOneDevice();
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.h

new file mode 100644 (file)

index 0000000..d00ef1e
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_no_boot_test_cases.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      class MvncNoBootTests
+//------------------------------------------------------------------------------
+class MvncNoBootTests: public MvncTestsCommon {
+public:
+    void bootOneDevice();
+protected:
+    ~MvncNoBootTests() override = default;
+};
+
+//------------------------------------------------------------------------------
+//      class MvncNoBootOpenDevice
+//------------------------------------------------------------------------------
+class MvncNoBootOpenDevice : public MvncNoBootTests {
+public:
+    int available_devices = 0;
+protected:
+    ~MvncNoBootOpenDevice() override = default;
+    void SetUp() override;
+};
+
+//------------------------------------------------------------------------------
+//      class MvncNoBootCloseDevice
+//------------------------------------------------------------------------------
+class MvncNoBootCloseDevice : public MvncNoBootTests {
+protected:
+    ~MvncNoBootCloseDevice() override = default;
+};
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.cpp

new file mode 100644 (file)

index 0000000..fe10859
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.cpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_stress_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncStressTests
+//------------------------------------------------------------------------------
+void MvncStressTests::SetUp() {
+    MvncTestsCommon::SetUp();
+
+    _deviceProtocol = GetParam();
+    available_devices = getAmountOfDevices(_deviceProtocol);
+    ASSERT_TRUE(available_devices > 0) << ncProtocolToStr(_deviceProtocol)
+                                       << " devices not found";
+    ASSERT_NO_ERROR(setLogLevel(MVLOG_WARN));
+
+#ifdef NO_BOOT
+    // In case already booted device exist, do nothing
+        if (getAmountOfBootedDevices() == 0) {
+            MvncTestsCommon::bootOneDevice(NC_USB);
+        }
+#endif
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.h

new file mode 100644 (file)

index 0000000..e536ecc
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_stress_test_cases.h
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      class MvncStressTests
+//------------------------------------------------------------------------------
+class MvncStressTests : public MvncTestsCommon,
+                        public testing::WithParamInterface<ncDeviceProtocol_t>{
+public:
+    int available_devices = 0;
+
+protected:
+    ~MvncStressTests() override = default;
+    void SetUp() override;
+
+    ncDeviceProtocol_t _deviceProtocol = NC_ANY_PROTOCOL;
+};
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.cpp

new file mode 100644 (file)

index 0000000..5a10598
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc_usb_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncOpenUSBDevice
+//------------------------------------------------------------------------------
+void MvncOpenUSBDevice::SetUp() {
+    ncDeviceResetAll();
+    MvncTestsCommon::SetUp();
+
+    availableDevices_ = getAmountOfNotBootedDevices(NC_USB);
+
+    deviceDesc_.protocol = NC_USB;
+    deviceDesc_.platform = NC_ANY_PLATFORM;
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of class MvncDevicePlatform
+//------------------------------------------------------------------------------
+void MvncDevicePlatform::SetUp() {
+    MvncOpenUSBDevice::SetUp();
+
+    available_myriadX_ = getAmountOfMyriadXDevices(NC_USB);
+    available_myriad2_ = getAmountOfMyriad2Devices(NC_USB);
+
+    devicePlatform_ = GetParam();
+    deviceDesc_.platform = devicePlatform_;
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.h

new file mode 100644 (file)

index 0000000..73ef6a2
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_usb_test_cases.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      class MvncOpenUSBDevice
+//------------------------------------------------------------------------------
+class MvncOpenUSBDevice : public MvncTestsCommon {
+public:
+    ncDeviceHandle_t*   deviceHandle_       = nullptr;
+    ncDeviceDescr_t     deviceDesc_         = {};
+
+    ~MvncOpenUSBDevice() override = default;
+
+protected:
+    void SetUp() override;
+};
+
+//------------------------------------------------------------------------------
+//      class MvncCloseUSBDevice
+//------------------------------------------------------------------------------
+class MvncCloseUSBDevice : public MvncOpenUSBDevice {
+};
+
+//------------------------------------------------------------------------------
+//      class MvncDevicePlatform
+//------------------------------------------------------------------------------
+class MvncDevicePlatform : public MvncOpenUSBDevice,
+                           public testing::WithParamInterface<ncDevicePlatform_t>{
+public:
+    long available_myriadX_ = 0;
+    long available_myriad2_ = 0;
+    ncDevicePlatform_t devicePlatform_;
+
+    ~MvncDevicePlatform() override = default;
+
+protected:
+    void SetUp() override;
+};
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.cpp

new file mode 100644 (file)

index 0000000..ef5c417
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.cpp
@@ -0,0 +1,140 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <iostream>
+#include "mvnc_data.h"
+#include "mvnc_test_helper.h"
+
+//------------------------------------------------------------------------------
+//      Implementations of helpers - counters
+//------------------------------------------------------------------------------
+int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol,
+                                        const ncDevicePlatform_t devicePlatform,
+                                        const XLinkDeviceState_t state) {
+    deviceDesc_t req_deviceDesc = {};
+    req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+    req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+    deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+    unsigned int foundDevices = 0;
+    XLinkFindAllSuitableDevices(
+            state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+    return foundDevices;
+}
+
+long getAmountOfMyriadXDevices(ncDeviceProtocol_t deviceProtocol) {
+    return getAmountOfDevices(deviceProtocol, NC_MYRIAD_X);
+}
+
+long getAmountOfMyriad2Devices(ncDeviceProtocol_t deviceProtocol) {
+    return getAmountOfDevices(deviceProtocol, NC_MYRIAD_2);
+}
+
+long getAmountOfBootedDevices(ncDeviceProtocol_t deviceProtocol) {
+    return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_BOOTED);
+}
+
+long getAmountOfNotBootedDevices(ncDeviceProtocol_t deviceProtocol) {
+    return getAmountOfDevices(deviceProtocol, NC_ANY_PLATFORM, X_LINK_UNBOOTED);
+}
+
+long getAmountOfPCIeDevices() {
+    return getAmountOfDevices(NC_PCIE);
+}
+
+long getAmountOfUSBDevices() {
+    return getAmountOfDevices(NC_USB);
+}
+
+//------------------------------------------------------------------------------
+//      Implementations of helpers - get devices
+//------------------------------------------------------------------------------
+std::vector<std::string> getDevicesList(const ncDeviceProtocol_t deviceProtocol,
+                                                         const ncDevicePlatform_t devicePlatform,
+                                                         const XLinkDeviceState_t state) {
+
+    deviceDesc_t req_deviceDesc = {};
+    req_deviceDesc.protocol = convertProtocolToXlink(deviceProtocol);
+    req_deviceDesc.platform = convertPlatformToXlink(devicePlatform);
+
+    deviceDesc_t deviceDescArray[NC_MAX_DEVICES] = {};
+    unsigned int foundDevices = 0;
+    XLinkFindAllSuitableDevices(
+            state, req_deviceDesc, deviceDescArray, NC_MAX_DEVICES, &foundDevices);
+
+    std::vector < std::string > devNames;
+    for (int i = 0; i < foundDevices; ++i) {
+        devNames.emplace_back(deviceDescArray[i].name);
+    }
+
+    return devNames;
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of helpers - comparators
+//------------------------------------------------------------------------------
+bool isMyriadXUSBDevice(const std::string &deviceName) {
+    return (deviceName.find(MYRIAD_X_NAME_STR) != std::string::npos);
+}
+
+bool isMyriad2USBDevice(const std::string &deviceName) {
+    return (deviceName.find(MYRIAD_2_NAME_STR) != std::string::npos);
+}
+
+bool isMyriadPCIeDevice(const std::string &deviceName) {
+    return deviceName.find(std::string(PCIE_NAME_STR)) != std::string::npos;
+}
+
+bool isMyriadUSBDevice(const std::string &deviceName) {
+    return (isMyriad2USBDevice(deviceName)
+            || isMyriadXUSBDevice(deviceName)
+            || isMyriadBootedUSBDevice(deviceName));
+}
+
+bool isMyriadBootedUSBDevice(const std::string &deviceName) {
+    return (!isMyriad2USBDevice(deviceName) &&
+            !isMyriadXUSBDevice(deviceName) &&
+            !isMyriadPCIeDevice(deviceName));
+}
+
+bool isSameProtocolDevice(const std::string &deviceName, const ncDeviceProtocol_t expectedProtocol) {
+    switch (expectedProtocol) {
+        case NC_USB:            return isMyriadUSBDevice(deviceName);
+        case NC_PCIE:           return isMyriadPCIeDevice(deviceName);
+        case NC_ANY_PROTOCOL:
+            return isMyriadPCIeDevice(deviceName) || isMyriadUSBDevice(deviceName);
+        default:
+            std::cout << "Unknown device protocol" << std::endl;
+            return false;
+    }
+}
+
+bool
+isSamePlatformUSBDevice(const std::string &deviceName, const ncDevicePlatform_t expectedPlatform) {
+    switch (expectedPlatform) {
+        case NC_MYRIAD_2:  return isMyriad2USBDevice(deviceName);
+        case NC_MYRIAD_X:  return isMyriadXUSBDevice(deviceName);
+        case NC_ANY_PLATFORM:
+            return isMyriad2USBDevice(deviceName) || isMyriadXUSBDevice(deviceName);
+        default:
+            std::cout << "Unknown device platform" << std::endl;
+            return false;
+    }
+}
+
+//------------------------------------------------------------------------------
+//      Implementation of helpers - file loader
+//------------------------------------------------------------------------------
+bool readBINFile(const std::string &fileName, std::vector<char> &buf) {
+    std::ifstream file(fileName, std::ios_base::binary | std::ios_base::ate);
+    if (file.fail()) {
+        std::cout << "Can't open file!" << std::endl;
+        return false;
+    }
+    buf.resize(static_cast<unsigned int>(file.tellg()));
+    file.seekg(0);
+    file.read(buf.data(), buf.size());
+    return true;
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.h b/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.h

new file mode 100644 (file)

index 0000000..07c5283
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/helpers/mvnc_test_helper.h
@@ -0,0 +1,121 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <string>
+#include <fstream>
+
+#include "XLink.h"
+#include "mvnc.h"
+
+//------------------------------------------------------------------------------
+//      Macroses
+//------------------------------------------------------------------------------
+#define ASSERT_NO_ERROR(call)   ASSERT_EQ(call, 0)
+#define ASSERT_ERROR(call)      ASSERT_TRUE(call)
+
+
+//------------------------------------------------------------------------------
+//      Defines
+//------------------------------------------------------------------------------
+#define MYRIAD_X_NAME_STR "ma2480"
+#define MYRIAD_2_NAME_STR "ma2450"
+
+#if (defined(_WIN32) || defined(_WIN64))
+#define PCIE_NAME_STR     "mxlink"
+#else
+#define PCIE_NAME_STR     "mxlk"
+#endif
+
+const int MAX_DEVICES = 32;
+const int MAX_DEV_NAME = 20;
+
+#ifndef MAX_PATH
+const int MAX_PATH = 255;
+#endif
+
+//------------------------------------------------------------------------------
+//      Usb initialization
+//------------------------------------------------------------------------------
+// Without this initialization find device on windows could not work
+#if (defined(_WIN32) || defined(_WIN64) )
+extern "C" void initialize_usb_boot();
+#else
+#define initialize_usb_boot()
+#endif
+
+
+//------------------------------------------------------------------------------
+//      Helpers - counters
+//------------------------------------------------------------------------------
+/**
+     * @brief Get amount of all currently connected Myriad devices
+     * @param[in] deviceProtocol Count only platform specific devices
+     */
+int getAmountOfDevices(const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+                              const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+                              const XLinkDeviceState_t state = X_LINK_ANY_STATE);
+
+long getAmountOfMyriadXDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfMyriad2Devices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfNotBootedDevices(ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL);
+
+long getAmountOfPCIeDevices();
+
+long getAmountOfUSBDevices();
+
+//------------------------------------------------------------------------------
+//      Helpers - get devices
+//------------------------------------------------------------------------------
+/**
+ * @brief Get list of all currently connected Myriad devices
+ */
+std::vector<std::string> getDevicesList(
+        const ncDeviceProtocol_t deviceProtocol = NC_ANY_PROTOCOL,
+        const ncDevicePlatform_t devicePlatform = NC_ANY_PLATFORM,
+        const XLinkDeviceState_t state = X_LINK_ANY_STATE);
+
+//------------------------------------------------------------------------------
+//      Helpers - comparators
+//------------------------------------------------------------------------------
+bool isMyriadXUSBDevice(const std::string &deviceName);
+
+bool isMyriad2USBDevice(const std::string &deviceName);
+
+bool isMyriadPCIeDevice(const std::string& deviceName);
+
+/**
+     * @warning The booted USB device will also be counted here.
+     */
+bool isMyriadUSBDevice(const std::string& deviceName);
+
+bool isMyriadBootedUSBDevice(const std::string &deviceName);
+
+/**
+ * @brief Check that device matches the specified protocol
+ */
+bool isSameProtocolDevice(const std::string &deviceName,
+                                 const ncDeviceProtocol_t expectedProtocol);
+
+/**
+* @brief Check that device matches the specified platform for USB
+*/
+bool isSamePlatformUSBDevice(const std::string &deviceName,
+                                    const ncDevicePlatform_t expectedPlatform);
+
+//------------------------------------------------------------------------------
+//      Helpers - file loader
+//------------------------------------------------------------------------------
+/**
+ * @brief   Read blob
+ * @param   fileName Path to blob from bin directory
+ * @return  True if blob is readed without problem
+ */
+bool readBINFile(const std::string& fileName, std::vector<char>& buf);
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_no_boot_tests.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_no_boot_tests.cpp

new file mode 100644 (file)

index 0000000..4118443
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_no_boot_tests.cpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvnc.h"
+#include "mvnc_no_boot_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      MvncNoBootOpenDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device and close it
+*/
+TEST_F(MvncNoBootOpenDevice, OpenAndClose) {
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Try to open device twice. DeviceHandle shouldn't be overwritten
+*/
+TEST_F(MvncNoBootOpenDevice, OpenTwiceSameHandler) {
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    char dev_addr_first_open[MAX_DEV_NAME];
+    unsigned int data_lenght_first = MAX_DEV_NAME;
+
+    char dev_addr_second_open[MAX_DEV_NAME];
+    unsigned int data_lenght_second = MAX_DEV_NAME;
+
+    // First open, get device name
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+                        dev_addr_first_open, &data_lenght_first));
+
+    // Second open, get device name
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+                        dev_addr_second_open, &data_lenght_second));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    // Should be the same device
+    ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+
+/**
+ * @brief Open device twice one run after another. It should check, that link to device closed correctly
+ * @note Mostly this test important for PCIE and connect to booted option, as in that cases XLinkReset have another behavior
+ */
+TEST_F(MvncNoBootOpenDevice, OpenDeviceWithOneXLinkInitializion) {
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+    // Second open
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+}
+
+//------------------------------------------------------------------------------
+//      MvncNoBootCloseDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Correct closing if handle is empty
+*/
+TEST_F(MvncNoBootCloseDevice, EmptyDeviceHandler) {
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Device, which was booted before open, shouldn't reboot after ncDeviceClose call
+*/
+TEST_F(MvncNoBootCloseDevice, AlreadyBootedDeviceWillNotReboot) {
+    bootOneDevice();
+
+    ASSERT_EQ(getAmountOfBootedDevices(), 1);
+
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+    ASSERT_EQ(getAmountOfBootedDevices(), 1);
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp

new file mode 100644 (file)

index 0000000..1cdaf8b
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp
@@ -0,0 +1,157 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "mvnc.h"
+#include "ncPrivateTypes.h"
+#include "mvnc_stress_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      MvncStressTests Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open and close device for 1001 times
+*/
+TEST_P(MvncStressTests, OpenClose1001) {
+    const int iterations = 1001;
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    for (int i = 0; i < iterations; ++i) {
+        printf("Iteration %d of %d\n", i, iterations);
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+        deviceHandle = nullptr;
+    }
+}
+
+/**
+* @brief Allocate and deallocate graph on device for 1001 times
+*/
+TEST_P(MvncStressTests, AllocateDeallocateGraph1001) {
+    const int iterations = 1001;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    // Load graph
+    const std::string blobPath = "bvlc_googlenet_fp16.blob";
+    std::vector<char> _blob;
+
+    if (!readBINFile(blobPath, _blob)) GTEST_SKIP_("Blob not found\n");
+
+    // Open device
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+    for (int i = 0; i < iterations; ++i) {
+        printf("Iteration %d of %d\n", i, iterations);
+
+        // Create graph handlers
+        ncGraphHandle_t*  graphHandle = nullptr;
+        std::string graphName = "graph";
+
+        ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &graphHandle));
+        ASSERT_TRUE(graphHandle != nullptr);
+
+        // Allocate graph
+        ASSERT_NO_ERROR(ncGraphAllocate(deviceHandle, graphHandle,
+                                        _blob.data(), _blob.size(),     // Blob
+                                        _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2)) );   // Header
+
+        // Destroy graph
+        ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
+    }
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+
+/**
+* @brief Run the full cycle of inference 101 times.
+* It includes opening device, allocating graph and fifos, inference,
+ * destroying graph and fifos, closing device
+*/
+TEST_P(MvncStressTests, FullCycleOfWork101Times) {
+    const int iterations = 101;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    const std::string blobPath = "bvlc_googlenet_fp16.blob";
+    std::vector<char> blob;
+    if (!readBINFile(blobPath, blob)) GTEST_SKIP_("Blob not found\n");
+
+    for (int i = 0; i < iterations; i++) {
+        ncDeviceHandle_t *deviceHandle = nullptr;
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+        ncGraphHandle_t*  graphHandle = nullptr;
+        std::string graphName = "graph";
+        ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &graphHandle));
+        ASSERT_TRUE(graphHandle != nullptr);
+
+        ASSERT_NO_ERROR(ncGraphAllocate(deviceHandle, graphHandle,
+                                        blob.data(), blob.size(),     // Blob
+                                        blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ));
+
+
+        unsigned int dataLength = sizeof(int);
+
+        int numInputs = 0;
+        ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_INPUT_COUNT, &numInputs, &dataLength));
+
+        int numOutputs = 0;
+        ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_OUTPUT_COUNT, &numOutputs, &dataLength));
+
+        dataLength = sizeof(ncTensorDescriptor_t);
+
+        ncTensorDescriptor_t inputDesc = {};
+        ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_INPUT_TENSOR_DESCRIPTORS, &inputDesc,
+                                         &dataLength));
+
+
+        ncTensorDescriptor_t outputDesc = {};
+        ASSERT_NO_ERROR(ncGraphGetOption(graphHandle, NC_RO_GRAPH_OUTPUT_TENSOR_DESCRIPTORS, &outputDesc,
+                                         &dataLength));
+
+        unsigned int fifo_elements = 4;
+
+        ncFifoHandle_t *inputFifoHandle = nullptr;
+        ASSERT_NO_ERROR(ncFifoCreate("input", NC_FIFO_HOST_WO, &inputFifoHandle));
+
+        ASSERT_NO_ERROR(ncFifoAllocate(inputFifoHandle, deviceHandle, &inputDesc, fifo_elements));
+
+        ncFifoHandle_t *outputFifoHandle = nullptr;
+        ASSERT_NO_ERROR(ncFifoCreate("output", NC_FIFO_HOST_RO, &outputFifoHandle));
+
+        ASSERT_NO_ERROR(ncFifoAllocate(outputFifoHandle, deviceHandle, &outputDesc, fifo_elements));
+
+        uint8_t *input_data = new uint8_t[inputDesc.totalSize];
+        uint8_t *result_data = new uint8_t[outputDesc.totalSize];
+        ASSERT_NO_ERROR(ncGraphQueueInferenceWithFifoElem(graphHandle,
+                                                          inputFifoHandle, outputFifoHandle,
+                                                          input_data, &inputDesc.totalSize, nullptr));
+
+        void *userParam = nullptr;
+        ASSERT_NO_ERROR(ncFifoReadElem(outputFifoHandle, result_data, &outputDesc.totalSize, &userParam));
+
+        delete[] input_data;
+        delete[] result_data;
+        ASSERT_NO_ERROR(ncFifoDestroy(&inputFifoHandle));
+        ASSERT_NO_ERROR(ncFifoDestroy(&outputFifoHandle));
+
+        ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle));
+
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    }
+
+}
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+                        MvncStressTests,
+                        ::testing::ValuesIn(myriadProtocols),
+                        PrintToStringParamName());
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp

new file mode 100644 (file)

index 0000000..03e95c9
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp
@@ -0,0 +1,534 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <thread>
+#include "mvnc.h"
+#include "ncPrivateTypes.h"
+#include "mvnc_common_test_cases.h"
+
+//------------------------------------------------------------------------------
+//      MvncTestsCommon Tests
+//      Platform independent tests
+//------------------------------------------------------------------------------
+TEST_F(MvncTestsCommon, DoubleCheckOfAvailableDevicesCount) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    const int min_name_size = 2;
+
+    struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+    int act_devicesCount = 0;
+    int exp_devicesCount = getAmountOfNotBootedDevices();
+
+    ASSERT_NO_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, &act_devicesCount));
+
+    ASSERT_TRUE(act_devicesCount);
+    ASSERT_EQ(act_devicesCount, exp_devicesCount);
+
+    for (int i = 0; i < act_devicesCount; ++i) {
+        ASSERT_GE(strlen(act_devices[i].name), min_name_size);
+    }
+
+    for (int j = act_devicesCount; j < NC_MAX_DEVICES; ++j) {
+        ASSERT_EQ(strlen(act_devices[j].name), 0);
+    }
+}
+
+TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfArrayIsNULL) {
+    int act_devicesCount = 0;
+    ASSERT_ERROR(ncAvailableDevices(NULL, NC_MAX_DEVICES, &act_devicesCount));
+}
+
+TEST_F(MvncTestsCommon, AvailableDevicesSholdReturnErrorIfCountPtrIsNULL) {
+    struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+    ASSERT_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, NULL));
+}
+
+TEST_F(MvncTestsCommon, CanGetPCIeAndUSB) {
+    if (!(getAmountOfUSBDevices() && getAmountOfPCIeDevices()))
+        GTEST_SKIP_("USB and PCIe not available");
+
+    struct ncDeviceDescr_t act_devices[NC_MAX_DEVICES] = {};
+    int act_devicesCount = 0;
+    ASSERT_NO_ERROR(ncAvailableDevices(act_devices, NC_MAX_DEVICES, &act_devicesCount));
+
+    bool usb_device_found = false;
+    bool pcie_device_found = false;
+
+    for (int i = 0; i < act_devicesCount; ++i) {
+        if (isMyriadUSBDevice(act_devices[i].name)) {
+            usb_device_found = true;
+        } else if (isMyriadPCIeDevice(act_devices[i].name)) {
+            pcie_device_found = true;
+        }
+    }
+
+    EXPECT_TRUE(usb_device_found);
+    EXPECT_TRUE(pcie_device_found);
+}
+
+TEST_F(MvncTestsCommon, ShouldFailToSetNegativeTimeout) {
+    ASSERT_ERROR(ncSetDeviceConnectTimeout(-1));
+}
+
+//------------------------------------------------------------------------------
+//      MvncTestsCommon Tests
+//      PCIe + USB Tests
+//------------------------------------------------------------------------------
+
+/**
+ * @brief Test that USB and PCIe works at the same time. USB first
+ */
+TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) {
+    if (getAmountOfPCIeDevices() == 0)
+        GTEST_SKIP() << "PCIe devices not found";
+    if (getAmountOfUSBDevices() == 0)
+        GTEST_SKIP() << "USB devices not found";
+
+    ncDeviceHandle_t *deviceHandle_USB = nullptr;
+    ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
+    std::string actDeviceName;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_USB;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle_USB->private_data->dev_addr;
+    ASSERT_TRUE(actDeviceName.size());
+    ASSERT_TRUE(isMyriadUSBDevice(actDeviceName));
+
+    // Open PCIe device
+    deviceDesc.protocol = NC_PCIE;
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
+    ASSERT_TRUE(actDeviceName.size());
+    ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
+
+    // Close all
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+}
+
+/**
+ * @brief Test that USB and PCIe works at the same time. PCIe first
+ */
+TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) {
+    if (getAmountOfPCIeDevices() == 0)
+        GTEST_SKIP() << "PCIe devices not found";
+    if (getAmountOfUSBDevices() == 0)
+        GTEST_SKIP() << "USB devices not found";
+
+    ncDeviceHandle_t *deviceHandle_USB = nullptr;
+    ncDeviceHandle_t *deviceHandle_PCIe = nullptr;
+    std::string actDeviceName;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_PCIE;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    // Open PCIe device
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle_PCIe->private_data->dev_addr;
+    ASSERT_TRUE(actDeviceName.size());
+    ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName));
+
+    // Open USB device
+    deviceDesc.protocol = NC_USB;
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle_USB->private_data->dev_addr;
+    ASSERT_TRUE(actDeviceName.size());
+    ASSERT_TRUE(isMyriadUSBDevice(actDeviceName));
+
+
+    // Close all
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB));
+}
+
+//------------------------------------------------------------------------------
+//      MvncOpenDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device and close it
+*/
+TEST_P(MvncOpenDevice, OpenAndClose) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    ncDeviceHandle_t*   deviceHandle = nullptr;
+    std::string         deviceName;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+
+    ASSERT_TRUE(deviceHandle != nullptr);
+    ASSERT_TRUE(deviceHandle->private_data != nullptr);
+    ASSERT_TRUE(deviceHandle->private_data->dev_addr_booted != nullptr);
+
+    deviceName = deviceHandle->private_data->dev_addr_booted;
+    ASSERT_TRUE(deviceName.size() > 0);
+
+    ASSERT_TRUE(isSameProtocolDevice(deviceName, _deviceProtocol));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+ * @brief Check that all field of deviceHandle would be initialized
+ */
+TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    ncDeviceHandle_t*   deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+                                 watchdogInterval, firmwarePath));
+
+    ASSERT_TRUE(deviceHandle != nullptr);
+
+    devicePrivate_t * device = deviceHandle->private_data;
+    ASSERT_TRUE(device != nullptr);
+    ASSERT_TRUE(device->dev_addr != nullptr);
+    ASSERT_TRUE(device->dev_addr_booted != nullptr);
+    ASSERT_TRUE(device->xlink != nullptr);
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Try to open device twice. DeviceHandle shouldn't be overwritten
+* @details Expected behavior - ncDeviceOpen should warn that deviceHandle
+ * already has allocated device
+*/
+TEST_P(MvncOpenDevice, OpenTwiceSameHandler) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    char dev_addr_first_open[MAX_DEV_NAME];
+    unsigned int data_lenght_first = MAX_DEV_NAME;
+
+    char dev_addr_second_open[MAX_DEV_NAME];
+    unsigned int data_lenght_second = MAX_DEV_NAME;
+
+    // First open, get device name
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+                                      dev_addr_first_open, &data_lenght_first));
+
+    // Second open, get device name
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME,
+                                      dev_addr_second_open, &data_lenght_second));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+    // Should be the same device
+    ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+/**
+ * @brief Try to open device twice with different handlers. Second open should return error
+ * @reason #-18548
+ */
+ // Fixme Test only for one device
+TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    ncDeviceHandle_t *deviceHandle1 = nullptr;
+    ncDeviceHandle_t *deviceHandle2 = nullptr;
+
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    // Till we don't have multiple device support, this function would try to open same device
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1));
+}
+
+
+/**
+ * @brief Open device twice one run after another. It should check, that link to device closed correctly
+ * @note Mostly this test important for PCIe and connect to booted option, as in that cases XLinkReset have another behavior
+ */
+TEST_P(MvncOpenDevice, OpenTwiceWithOneXLinkInitializion) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    std::string actDeviceName;
+
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = _deviceProtocol;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle->private_data->dev_addr;
+    ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+    // Second open
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc,
+            watchdogInterval, firmwarePath));
+
+    actDeviceName = deviceHandle->private_data->dev_addr;
+    ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+//------------------------------------------------------------------------------
+//      MvncLoggingTests Tests
+//------------------------------------------------------------------------------
+TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP() << ncProtocolToStr(_deviceProtocol) << " devices not found";
+
+    setLogLevel(MVLOG_INFO);
+    ncDeviceHandle_t * deviceHandle = nullptr;
+
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, watchdogInterval, firmwarePath));
+
+    std::string content(buff);
+    for (int i = MVLOG_WARN; i < MVLOG_LAST; i++) {
+        auto found = content.find(mvLogHeader[i]);
+        ASSERT_TRUE(found == std::string::npos);
+    }
+}
+
+//------------------------------------------------------------------------------
+//      MvncGraphAllocations Tests
+//------------------------------------------------------------------------------
+/**
+ * @brief Allocate graph for one device
+ */
+TEST_P(MvncGraphAllocations, DISABLED_OneGraph) {
+    if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+    openDevices(1, _deviceHandle, _bootedDevices);
+
+    // Create graph handlers
+    std::string graphName = "graph";
+    ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[0]));
+    ASSERT_TRUE(_graphHandle[0] != nullptr);
+
+    // Allocate graph
+    ASSERT_NO_ERROR(ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+                                    _blob.data(), _blob.size(),     // Blob
+                                    _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ));   // Header
+}
+
+/**
+ * @brief Allocate graphs for 2 device (serial)
+ */
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesSerial) {
+    if (!blobLoaded)
+        GTEST_SKIP_("Blob for test is not loaded\n");
+    openDevices(2, _deviceHandle, _bootedDevices);
+
+    // Create graphs handlers
+    for (int index = 0; index < _bootedDevices; ++index) {
+        std::string graphName = "graph";
+        graphName += std::to_string(index);
+        ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[index]));
+        ASSERT_TRUE(_graphHandle[index] != nullptr);
+    }
+
+    // Allocate graphs in serial mode
+    ncStatus_t rc[MAX_DEVICES];
+
+    for (int i = 0; i < _bootedDevices; ++i) {
+        rc[i] = ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+                                _blob.data(), _blob.size(),     // Blob
+                                _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) );  // Header
+    }
+
+    for (int i = 0; i < _bootedDevices; ++i) {
+        ASSERT_NO_ERROR(rc[i]);
+    }
+}
+
+/**
+* @brief Allocate graphs for 2 device (parallel)
+* @detail Open devices and then in parallel threads try to load graphs to it
+*         The error easy appear, if USBLINK_TRANSFER_SIZE is (1024 * 1024 * 20)
+* @warning It's depend on USBLINK_TRANSFER_SIZE constant from UsbLinkPlatform.c file
+* @warning Need blob to use this tests
+*/
+TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesParallel) {
+    if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+    openDevices(2, _deviceHandle, _bootedDevices);
+
+    // Create graphs handlers
+    for (int index = 0; index < _bootedDevices; ++index) {
+        std::string graphName = "graph";
+        graphName += std::to_string(index);
+        ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[index]));
+        ASSERT_TRUE(_graphHandle[index] != nullptr);
+    }
+
+    // Allocate graphs in parallel threads
+    std::thread requests[MAX_DEVICES];
+    ncStatus_t rc[MAX_DEVICES];
+    for (int i = 0; i < _bootedDevices; ++i) {
+        requests[i] = std::thread([i, &rc, this]() {
+            rc[i] = ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+                                    _blob.data(), _blob.size(),     // Blob
+                                    _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) );
+        });
+    }
+
+    for (int i = 0; i < _bootedDevices; ++i) {
+        requests[i].join();
+        ASSERT_NO_ERROR(rc[i]);
+    }
+}
+
+//------------------------------------------------------------------------------
+//      MvncCloseDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Correct closing if handle is empty
+*/
+TEST_F(MvncCloseDevice, EmptyDeviceHandler) {
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+}
+
+/**
+* @brief Correct closing if some handler fields is null
+*/
+TEST_F(MvncCloseDevice, EmptyFieldsOfDeviceHandle) {
+
+    ncDeviceHandle_t *deviceHandlePtr;
+    auto dH = std::unique_ptr<ncDeviceHandle_t, decltype(std::free)*>(
+        (ncDeviceHandle_t*)calloc(1, sizeof(ncDeviceHandle_t)), std::free);
+
+    auto d = std::unique_ptr<_devicePrivate_t,  decltype(std::free)*>(
+        (_devicePrivate_t*)calloc(1, sizeof(_devicePrivate_t)), std::free);
+
+    if (dH.get() && d.get()) {
+        dH->private_data = d.get();
+        d->dev_addr = nullptr;
+        d->dev_addr_booted = nullptr;
+        d->device_mon_stream_id = INVALID_LINK_ID;
+        d->graph_monitor_stream_id = INVALID_LINK_ID;
+        d->wd_interval = watchdogInterval;
+        deviceHandlePtr = dH.get();
+    }
+
+    ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS);
+}
+
+//------------------------------------------------------------------------------
+//      MvncInference Tests
+//------------------------------------------------------------------------------
+using MvncInference = MvncGraphAllocations;
+
+TEST_P(MvncInference, DISABLED_DoOneIterationOfInference) {
+    if (!blobLoaded) GTEST_SKIP_("Blob for test is not loaded\n");
+    openDevices(1, _deviceHandle, _bootedDevices);
+
+    std::string graphName = "graph";
+    ASSERT_NO_ERROR(ncGraphCreate(graphName.c_str(), &_graphHandle[0]));
+    ASSERT_TRUE(&_graphHandle[0] != nullptr);
+
+    ASSERT_NO_ERROR(ncGraphAllocate(_deviceHandle[0], _graphHandle[0],
+                                    _blob.data(), _blob.size(),     // Blob
+                                    _blob.data(), sizeof(ElfN_Ehdr) + sizeof(blob_header_v2) ));
+
+
+    unsigned int dataLength = sizeof(int);
+
+    int numInputs = 0;
+    ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_INPUT_COUNT, &numInputs, &dataLength));
+
+    int numOutputs = 0;
+    ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_OUTPUT_COUNT, &numOutputs, &dataLength));
+
+    dataLength = sizeof(ncTensorDescriptor_t);
+
+    ncTensorDescriptor_t inputDesc = {};
+    ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_INPUT_TENSOR_DESCRIPTORS, &inputDesc,
+                                     &dataLength));
+
+
+    ncTensorDescriptor_t outputDesc = {};
+    ASSERT_NO_ERROR(ncGraphGetOption(_graphHandle[0], NC_RO_GRAPH_OUTPUT_TENSOR_DESCRIPTORS, &outputDesc,
+                                     &dataLength));
+
+    unsigned int fifo_elements = 4;
+
+    ncFifoHandle_t *inputFifoHandle = nullptr;
+    ASSERT_NO_ERROR(ncFifoCreate("input", NC_FIFO_HOST_WO, &inputFifoHandle));
+
+    ASSERT_NO_ERROR(ncFifoAllocate(inputFifoHandle, _deviceHandle[0], &inputDesc, fifo_elements));
+
+    ncFifoHandle_t *outputFifoHandle = nullptr;
+    ASSERT_NO_ERROR(ncFifoCreate("output", NC_FIFO_HOST_RO, &outputFifoHandle));
+
+    ASSERT_NO_ERROR(ncFifoAllocate(outputFifoHandle, _deviceHandle[0], &outputDesc, fifo_elements));
+
+    uint8_t *input_data = new uint8_t[inputDesc.totalSize];
+    uint8_t *result_data = new uint8_t[outputDesc.totalSize];
+    ASSERT_NO_ERROR(ncGraphQueueInferenceWithFifoElem(_graphHandle[0],
+                                                      inputFifoHandle, outputFifoHandle,
+                                                      input_data, &inputDesc.totalSize, nullptr));
+
+    void *userParam = nullptr;
+    ASSERT_NO_ERROR(ncFifoReadElem(outputFifoHandle, result_data, &outputDesc.totalSize, &userParam));
+
+    delete[] input_data;
+    delete[] result_data;
+    ASSERT_NO_ERROR(ncFifoDestroy(&inputFifoHandle));
+    ASSERT_NO_ERROR(ncFifoDestroy(&outputFifoHandle));
+
+    ASSERT_NO_ERROR(ncGraphDestroy(&_graphHandle[0]));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0]));
+}
+
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+                        MvncOpenDevice,
+                        ::testing::ValuesIn(myriadProtocols),
+                        PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+                        MvncLoggingTests,
+                        ::testing::ValuesIn(myriadProtocols),
+                        PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+                        MvncGraphAllocations,
+                        ::testing::ValuesIn(myriadProtocols),
+                        PrintToStringParamName());
+
+INSTANTIATE_TEST_CASE_P(MvncTestsCommon,
+                        MvncInference,
+                        ::testing::ValuesIn(myriadProtocols),
+                        PrintToStringParamName());
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp

new file mode 100644 (file)

index 0000000..a5cdea0
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp
@@ -0,0 +1,247 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <thread>
+
+#include "mvnc.h"
+#include "mvnc_test_helper.h"
+#include "mvnc_usb_test_cases.h"
+#include "ncPrivateTypes.h"
+
+//------------------------------------------------------------------------------
+//      MvncOpenUSBDevice Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open any device with custom firmware path as ncDeviceOpen argument
+*/
+
+TEST_F(MvncOpenUSBDevice, ShouldOpenDeviceAfterChangeConnectTimeoutFromZero) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    ncDeviceHandle_t *deviceHandle = nullptr;
+    std::string actDeviceName;
+    ncDeviceDescr_t deviceDesc = {};
+    deviceDesc.protocol = NC_ANY_PROTOCOL;
+    deviceDesc.platform = NC_ANY_PLATFORM;
+
+    ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(0));
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    std::this_thread::sleep_for(3_sec);
+    ASSERT_NO_ERROR(ncDeviceResetAll());
+
+    ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(30));
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle));
+
+    ASSERT_NO_ERROR(ncDeviceResetAll());
+}
+
+
+TEST_F(MvncOpenUSBDevice, WithCustomFirmware) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    // Use custom firmware dir path as parameter for ncDeviceOpen
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+}
+
+/**
+* @brief Open all available devices and close them
+*/
+TEST_F(MvncOpenUSBDevice, AllAvailableDevices) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    ncDeviceHandle_t * deviceHandles[MAX_DEVICES] = {nullptr};
+
+    for (int index = 0; index < availableDevices_; ++index) {
+        ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, watchdogInterval, firmwarePath));
+    }
+    for (int index = 0; index < availableDevices_; ++index) {
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index]));
+    }
+}
+
+/**
+* @brief Open all available devices in parallel threads and close them
+*/
+TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    std::thread requests[MAX_DEVICES];
+    ncDeviceHandle_t * deviceHandle[MAX_DEVICES] = {nullptr};
+    ncStatus_t rc[MAX_DEVICES];
+
+    for (int i = 0; i < availableDevices_; ++i) {
+        requests[i] = std::thread([i, &rc, &deviceHandle, this]() {
+            rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, watchdogInterval, firmwarePath);
+        });
+    }
+
+    for (int i = 0; i < availableDevices_; ++i) {
+        requests[i].join();
+        ASSERT_NO_ERROR(rc[i]);
+    }
+
+    for (int i = 0; i < availableDevices_; ++i) {
+        ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i]));
+    }
+}
+
+/**
+* @brief Open any device with invalid firmware path
+*/
+TEST_F(MvncOpenUSBDevice, WithInvalidFirmwarePath) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    const char invalidPath[MAX_PATH] = "./InvalidPath/";
+
+    // Use custom firmware dir path as parameter for ncDeviceOpen
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, invalidPath));
+
+    ASSERT_EQ(deviceHandle_, nullptr);
+}
+
+TEST_F(MvncOpenUSBDevice, OpenAvailableDeviceByName) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    char dev_addr_open[NC_MAX_NAME_SIZE];
+    unsigned int data_lenght = NC_MAX_NAME_SIZE;
+
+    auto availableDevices = getDevicesList();
+
+    ASSERT_TRUE(availableDevices.size());
+    strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+                                      dev_addr_open, &data_lenght));
+
+    ASSERT_TRUE(strncmp(dev_addr_open, deviceDesc_.name, NC_MAX_NAME_SIZE) == 0);
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+}
+
+TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    char badName[] = "BadName";
+
+    strncpy(deviceDesc_.name, badName, NC_MAX_NAME_SIZE);
+
+    auto availableDevices = getDevicesList();
+    ASSERT_TRUE(availableDevices.size());
+
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+}
+
+TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    char dev_addr_first_open[MAX_DEV_NAME];
+    unsigned int data_lenght_first = MAX_DEV_NAME;
+
+    char dev_addr_second_open[MAX_DEV_NAME];
+    unsigned int data_lenght_second = MAX_DEV_NAME;
+
+    auto availableDevices = getDevicesList();
+
+    ASSERT_TRUE(availableDevices.size());
+    strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+                                      dev_addr_first_open, &data_lenght_first));
+
+    // Second open, get device name
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME,
+                                      dev_addr_second_open, &data_lenght_second));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+    // Should be the same device
+    ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open);
+}
+
+TEST_F(MvncOpenUSBDevice, CheckErrorWhenPlatformConflictWithName) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    ncDevicePlatform_t wrongPlatform = NC_ANY_PLATFORM;
+    auto availableDevices = getDevicesList();
+
+    ASSERT_TRUE(availableDevices.size());
+
+    if(isMyriadXUSBDevice(availableDevices[0])) {
+        wrongPlatform = NC_MYRIAD_2;
+    } else {
+        wrongPlatform = NC_MYRIAD_X;
+    }
+
+    strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE);
+    deviceDesc_.platform = wrongPlatform;
+
+    ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+}
+
+//------------------------------------------------------------------------------
+//      MvncCloseUSBDevice Tests
+//------------------------------------------------------------------------------
+#if (!(defined(_WIN32) || defined(_WIN64)))
+TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) {
+    if (availableDevices_ == 0)
+        GTEST_SKIP();
+
+    ASSERT_NO_ERROR(ncDeviceOpen(
+            &deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+
+    ASSERT_TRUE(deviceHandle_);
+
+    deviceDesc_t toFindDeviceDescr = {
+            .protocol = X_LINK_USB_VSC,
+            .platform = X_LINK_ANY_PLATFORM
+    };
+    strcpy(deviceDesc_.name, deviceHandle_->private_data->dev_addr);
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+
+    deviceDesc_t foundDevice = {};
+    XLinkError_t rc = XLinkFindFirstSuitableDevice(
+            X_LINK_UNBOOTED, toFindDeviceDescr, &foundDevice);
+    ASSERT_EQ(X_LINK_SUCCESS, rc);
+}
+#endif
+
+//------------------------------------------------------------------------------
+//      MvncDevicePlatform Tests
+//------------------------------------------------------------------------------
+/**
+* @brief Open specified device and close it
+*/
+TEST_P(MvncDevicePlatform, OpenAndClose) {
+    if (available_myriad2_ == 0 || available_myriadX_ == 0)
+        GTEST_SKIP();
+
+    ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath));
+
+    char deviceName[MAX_DEV_NAME];
+    unsigned int size = MAX_DEV_NAME;
+    ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME, deviceName, &size));
+
+    EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform_));
+
+    ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_));
+
+}
+
+INSTANTIATE_TEST_CASE_P(MvncTestsPlatform,
+                        MvncDevicePlatform,
+                        ::testing::ValuesIn(myriadPlatforms),
+                        PrintToStringParamName());
diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_utils_tests.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_utils_tests.cpp

new file mode 100644 (file)

index 0000000..4910c1e
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_utils_tests.cpp
@@ -0,0 +1,61 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ncCommPrivate.h"
+#include "mvnc_test_helper.h"
+
+#include <gtest/gtest.h>
+#include <fstream>
+
+extern "C" {
+#include "XLinkStringUtils.h"
+}
+
+class MvncUtilsTest : public ::testing::Test {
+public:
+    void TearDown() override {
+        std::remove(mvcmdExpectedPath.c_str());
+    }
+
+protected:
+    std::string mvcmdExpectedPath = "";
+    // FIXME: seems it is not going to work on Windows
+    const std::string tmpDir = "/tmp";
+};
+
+TEST_F(MvncUtilsTest, CanGetSpecialFWIfUniversalIsNotPresent) {
+    mvcmdExpectedPath = tmpDir + "/usb-ma248x.mvcmd";
+
+    std::ofstream mvcmd;
+    mvcmd.open(mvcmdExpectedPath, std::ios::out);
+
+    char mvcmdFilePath[MAX_PATH] = "";
+    mv_strcpy(mvcmdFilePath, MAX_PATH, tmpDir.c_str());
+
+    deviceDesc_t dummyDevDesc2480;
+    strcpy(dummyDevDesc2480.name, "0-ma2480");
+    dummyDevDesc2480.protocol = X_LINK_USB_VSC;
+    dummyDevDesc2480.platform = X_LINK_MYRIAD_X;
+
+    ASSERT_EQ(NC_OK, getFirmwarePath(mvcmdFilePath, MAX_PATH, dummyDevDesc2480));
+    ASSERT_STRCASEEQ(mvcmdExpectedPath.c_str(), mvcmdFilePath);
+}
+
+TEST_F(MvncUtilsTest, CanGetUniversalFWIfItExists) {
+    mvcmdExpectedPath = tmpDir + "/usb-ma2x8x.mvcmd";
+
+    std::ofstream mvcmd;
+    mvcmd.open(mvcmdExpectedPath, std::ios::out);
+
+    char mvcmdFilePath[MAX_PATH] = "";
+    mv_strcpy(mvcmdFilePath, MAX_PATH, tmpDir.c_str());
+
+    deviceDesc_t dummyDevDesc2480;
+    strcpy(dummyDevDesc2480.name, "0-ma2480");
+    dummyDevDesc2480.protocol = X_LINK_USB_VSC;
+    dummyDevDesc2480.platform = X_LINK_MYRIAD_X;
+
+    ASSERT_EQ(NC_OK, getFirmwarePath(mvcmdFilePath, MAX_PATH, dummyDevDesc2480));
+    ASSERT_STRCASEEQ(mvcmdExpectedPath.c_str(), mvcmdFilePath);
+}
diff --git a/inference-engine/thirdparty/movidius/tests/CMakeLists.txt b/inference-engine/thirdparty/movidius/tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..ba62169
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(XLink)
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/CMakeLists.txt b/inference-engine/thirdparty/movidius/tests/XLink/CMakeLists.txt

new file mode 100644 (file)

index 0000000..69cfced
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "XLinkTests")
+set(CMAKE_CXX_STANDARD 11)
+
+include(${XLINK_DIR}/XLink.cmake)
+file(GLOB_RECURSE XLINK_TESTS_SOURCES "*.cpp")
+
+add_executable(${TARGET_NAME} ${XLINK_TESTS_SOURCES})
+
+target_include_directories(${TARGET_NAME}
+        PRIVATE
+        helpers
+        cases
+        ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/include
+        ${IE_MAIN_SOURCE_DIR}/tests_new/gtest/googletest/
+        ${XLINK_INCLUDE}
+        ${XLINK_PLATFORM_INCLUDE})
+
+target_compile_definitions(${TARGET_NAME}
+        PRIVATE
+        __PC__)
+
+target_link_libraries(${TARGET_NAME}
+        PRIVATE
+        XLink gtest gtest_main)
+
+set_target_properties(${TARGET_NAME} PROPERTIES
+        POSITION_INDEPENDENT_CODE TRUE
+        COMPILE_PDB_NAME ${TARGET_NAME})
+
+add_dependencies(${TARGET_NAME} vpu_copy_firmware)
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/XLink_common_tests.cpp b/inference-engine/thirdparty/movidius/tests/XLink/XLink_common_tests.cpp

new file mode 100644 (file)

index 0000000..b76c340
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/XLink_common_tests.cpp
@@ -0,0 +1,432 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_common_cases.hpp"
+
+#include <thread>
+
+//------------------------------------------------------------------------------
+//      XLinkNullPtrTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkNullPtrTests, XLinkInitialize) {
+    ASSERT_EQ(XLinkInitialize(nullptr), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkConnect) {
+    ASSERT_EQ(XLinkConnect(nullptr), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkOpenAndCloseStream) {
+    ASSERT_EQ(XLinkOpenStream(0, nullptr, 0), X_LINK_ERROR);
+    ASSERT_EQ(XLinkCloseStream(0), X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkFindDevice) {
+    ASSERT_EQ(XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, {}, nullptr), X_LINK_ERROR);
+    ASSERT_EQ(XLinkFindAllSuitableDevices(X_LINK_ANY_STATE, {}, nullptr, -1, nullptr),
+              X_LINK_ERROR);
+}
+
+TEST_F(XLinkNullPtrTests, XLinkWriteData) {
+    ASSERT_EQ(XLinkWriteData(0, nullptr, 0), X_LINK_ERROR);
+}
+
+//------------------------------------------------------------------------------
+//  XLinkBootTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkBootTests, StressTestBootToOpenAndCloseDevice) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t in_deviceDesc = {};
+
+    in_deviceDesc.protocol = _protocol;
+    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+    ASSERT_EQ(X_LINK_SUCCESS,
+        XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &deviceDesc));
+
+    std::string firmwarePath;
+    ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(deviceDesc));
+
+    for (int i = 0; i < 10; ++i) {
+        printf("Boot device. Iteration: %d\n", i);
+        ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&deviceDesc, firmwarePath.c_str()));
+        // FIXME: need to find a way to avoid this sleep
+        std::this_thread::sleep_for(kBootTimeoutSec);
+
+        // Find booted
+        deviceDesc_t bootedDeviceDesc = {};
+        ASSERT_EQ(X_LINK_SUCCESS,
+            XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &bootedDeviceDesc));
+
+        XLinkHandler_t handler = {0};
+
+        connectToDevice(bootedDeviceDesc, &handler);
+        closeDevice(&handler);
+    }
+}
+
+//------------------------------------------------------------------------------
+//      XLinkConnectUSBTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkConnectTests, ConnectToDevice) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t bootedDeviceDesc = {};
+
+    deviceDesc.protocol = _protocol;
+    bootDevice(deviceDesc, bootedDeviceDesc);
+
+    XLinkHandler_t handler = {0};
+    handler.protocol = bootedDeviceDesc.protocol;
+    handler.devicePath = bootedDeviceDesc.name;
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+    std::this_thread::sleep_for(std::chrono::seconds(2));
+
+    closeDevice(&handler);
+}
+
+//------------------------------------------------------------------------------
+//      XLinkFindFirstSuitableDeviceTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkFindFirstSuitableDevicePlatformTests, ReturnCorrectAvailableDeviceName) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol, _platform) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t in_deviceDesc = {};
+    in_deviceDesc.protocol = _protocol;
+    in_deviceDesc.platform = _platform;
+
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &deviceDesc));
+    ASSERT_TRUE(strlen(deviceDesc.name) > 2);
+    ASSERT_EQ(deviceDesc.protocol, in_deviceDesc.protocol);
+
+    if(_platform == X_LINK_ANY_PLATFORM) {
+        EXPECT_NE(deviceDesc.platform, X_LINK_ANY_PLATFORM);
+    } else {
+        EXPECT_EQ(deviceDesc.platform, _platform);
+    }
+
+    if(_protocol != X_LINK_USB_VSC) {
+        std::string deviceName(deviceDesc.name);
+        switch (_platform) {
+            case X_LINK_MYRIAD_2: {
+                EXPECT_TRUE(deviceName.find(kUSBMyriad2) != std::string::npos);
+                break;
+            }
+            case X_LINK_MYRIAD_X: {
+                EXPECT_TRUE(deviceName.find(kUSBMyriadX) != std::string::npos);
+                break;
+            }
+            default:
+                break;
+        }
+    }
+}
+
+TEST_P(XLinkFindFirstSuitableDeviceTests, CanFindDeviceByName) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t in_deviceDesc = {};
+
+    in_deviceDesc.protocol = _protocol;
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindFirstSuitableDevice(
+        X_LINK_ANY_STATE, in_deviceDesc, &deviceDesc));
+
+    deviceDesc_t deviceRequirementsWithName = {};
+    deviceRequirementsWithName.protocol = _protocol;
+    strcpy(deviceRequirementsWithName.name, deviceDesc.name);
+
+    deviceDesc_t deviceDescSearchByName = {};
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkFindFirstSuitableDevice(
+        X_LINK_ANY_STATE, deviceRequirementsWithName, &deviceDescSearchByName));
+
+    ASSERT_TRUE(strcmp(deviceDesc.name, deviceDescSearchByName.name) == 0);
+}
+
+/**
+ * This is temporary test.
+ * For now it's not clear how to tests multiple device as for now we don't have bench like this
+ */
+TEST_P(XLinkFindFirstSuitableDeviceTests, OnSecondIndexDeviceWillBeNotFound) {
+    auto availableDevices = getCountSpecificDevices(X_LINK_ANY_STATE, _protocol);
+    if (availableDevices != 1) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t in_deviceDesc = {};
+    in_deviceDesc.protocol = _protocol;
+
+    const int index = 1;
+    // Find device
+    deviceDesc_t deviceDesc = {};
+    ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND, findDeviceOnIndex(
+        index, X_LINK_ANY_STATE, in_deviceDesc, &deviceDesc));
+}
+
+TEST_P(XLinkFindFirstSuitableDeviceTests, ReturnCorrectBootedDeviceName) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t bootedDevice = {};
+    deviceDesc_t deviceDesc = {};
+    deviceDesc.protocol = _protocol;
+
+    bootDevice(deviceDesc, bootedDevice);
+
+    deviceDesc_t foundDeviceDesc = {};
+    deviceDesc_t in_deviceDesc = {};
+    in_deviceDesc.protocol = _protocol;
+    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+    EXPECT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &foundDeviceDesc));
+
+    EXPECT_TRUE(strcmp(bootedDevice.name, foundDeviceDesc.name) == 0);
+    EXPECT_EQ(foundDeviceDesc.protocol, _protocol);
+
+    if(_protocol == X_LINK_USB_VSC) {
+        std::string foundDeviceName(foundDeviceDesc.name);
+        EXPECT_TRUE(foundDeviceName.find(kUSBMyriad2) == std::string::npos);
+        EXPECT_TRUE(foundDeviceName.find(kUSBMyriadX) == std::string::npos);
+    }
+
+    connectAndCloseDevice(bootedDevice);
+}
+
+//------------------------------------------------------------------------------
+//      XLinkFindAllSuitableDevicesTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkFindAllSuitableDevicesTests, CanFindMoreThenTwoDeviceAnyState_USB_PCIE) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED) < 2) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t in_deviceDesc = {};
+    deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {{}};
+
+    in_deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+    unsigned int numOfFoundDevices = 0;
+    ASSERT_EQ(X_LINK_SUCCESS,
+        XLinkFindAllSuitableDevices(
+            X_LINK_ANY_STATE, in_deviceDesc, deviceDescArray,
+            XLINK_MAX_DEVICES, &numOfFoundDevices));
+
+    ASSERT_EQ(numOfFoundDevices, getCountSpecificDevices(X_LINK_UNBOOTED));
+    ASSERT_EQ(numOfFoundDevices,
+              getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_ANY_PROTOCOL, X_LINK_MYRIAD_2) +
+              getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_ANY_PROTOCOL, X_LINK_MYRIAD_X));
+}
+
+TEST_F(XLinkFindAllSuitableDevicesTests, CanFindTwoDeviceDifferentState_USB_PCIE) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED) < 2) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t in_deviceDesc = {};
+    in_deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+
+    // Find & boot one device
+    deviceDesc_t firstDeviceDesc = {};
+    deviceDesc_t bootedDeviceDesc = {};
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &firstDeviceDesc));
+    bootDevice(firstDeviceDesc, bootedDeviceDesc);
+
+    deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {{}};
+    unsigned int numOfFoundDevices = 0;
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindAllSuitableDevices(
+                  X_LINK_ANY_STATE, in_deviceDesc, deviceDescArray,
+                  XLINK_MAX_DEVICES, &numOfFoundDevices));
+
+    bool foundBootedDevice = false;
+    for (int i = 0; i < numOfFoundDevices; ++i) {
+        if (deviceDescArray[i].platform == X_LINK_ANY_PLATFORM)
+            foundBootedDevice = true;
+    }
+
+    EXPECT_GE(numOfFoundDevices, 2);
+    EXPECT_TRUE(foundBootedDevice);
+    EXPECT_EQ(numOfFoundDevices, getCountSpecificDevices(X_LINK_UNBOOTED) +
+        getCountSpecificDevices(X_LINK_BOOTED));
+
+    connectAndCloseDevice(bootedDeviceDesc);
+}
+
+//------------------------------------------------------------------------------
+//     XLinkResetRemoteTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkResetRemoteTests, CanResetRemoteDevice) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    XLinkHandler_t handler = {0};
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t bootedDeviceDesc = {};
+
+    deviceDesc.protocol = _protocol;
+    bootDevice(deviceDesc, bootedDeviceDesc);
+    connectToDevice(bootedDeviceDesc, &handler);
+
+    // Reset device
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+    std::this_thread::sleep_for(kResetTimeoutSec);
+
+    // Make sure that device is really rebooted
+    deviceDesc_t foundDeviceDesc = {};
+    ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+              XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &foundDeviceDesc));
+}
+
+//------------------------------------------------------------------------------
+//      XLinkResetAllTests
+//------------------------------------------------------------------------------
+TEST_P(XLinkResetAllTests, DISABLED_ResetBootedDevice) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, _protocol) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t bootedDeviceDesc = {};
+
+    deviceDesc.protocol = _protocol;
+    bootDevice(deviceDesc, bootedDeviceDesc);
+
+    // Try to reset device
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetAll());
+    std::this_thread::sleep_for(kResetTimeoutSec);
+
+    deviceDesc.protocol = X_LINK_ANY_PROTOCOL;
+    deviceDesc_t afterResetBootedDescr = {};
+    ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+              XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &afterResetBootedDescr));
+}
+
+//------------------------------------------------------------------------------
+//      XLinkOpenStreamTests
+//------------------------------------------------------------------------------
+
+TEST_P(XLinkOpenStreamTests, CanOpenAndCloseStream) {
+    streamId_t stream = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream", 1024);
+    ASSERT_NE(INVALID_STREAM_ID, stream);
+    ASSERT_NE(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream));
+}
+
+// CannotOpenStreamMoreThanMemoryOnDevice
+TEST_P(XLinkOpenStreamTests, CannotOpenStreamMoreThanMemoryOnDevice) {
+    const int _512MB = 512 * 1024 * 1024;
+    streamId_t stream = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream", _512MB);
+    ASSERT_EQ(INVALID_STREAM_ID_OUT_OF_MEMORY, stream);
+}
+
+// FIXME: the test doesn't work
+// TODO: is it correct behavior, should we accept the same names
+TEST_P(XLinkOpenStreamTests, DISABLED_CannotOpenTwoStreamsWithTheSameName) {
+    const int _1KB = 1 * 1024;
+    const char streamName[] = "mySuperStream";
+    streamId_t stream0 = XLinkOpenStream(_handlerPtr.get()->linkId, streamName, _1KB);
+    ASSERT_NE(INVALID_STREAM_ID, stream0);
+
+    streamId_t stream1 = XLinkOpenStream(_handlerPtr.get()->linkId, streamName, _1KB);
+    ASSERT_EQ(INVALID_STREAM_ID, stream1);
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
+}
+
+// FIXME: XLinkOpenStream doesn't allocate any memory on device
+TEST_P(XLinkOpenStreamTests, DISABLED_CannotOpenStreamsMoreThanMemoryOnDevice) {
+    const int _256MB = 256 * 1024 * 1024;
+    streamId_t stream0 = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream0", _256MB);
+    ASSERT_NE(INVALID_STREAM_ID, stream0);
+
+    streamId_t stream1 = XLinkOpenStream(_handlerPtr.get()->linkId, "mySuperStream1", _256MB);
+    ASSERT_EQ(INVALID_STREAM_ID, stream1);
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream0));
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkCloseStream(stream1));
+}
+
+//------------------------------------------------------------------------------
+// Initialization of XLinkCommonTests
+//------------------------------------------------------------------------------
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkBootTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkConnectTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkFindFirstSuitableDevicePlatformTests,
+    Combine(Values(X_LINK_USB_VSC),
+            Values(X_LINK_MYRIAD_2, X_LINK_MYRIAD_X, X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommonPCIE,
+    XLinkFindFirstSuitableDevicePlatformTests,
+    Combine(Values(X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkFindFirstSuitableDeviceTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkResetAllTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkResetRemoteTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkBootTests::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(
+    XLinkCommon,
+    XLinkOpenStreamTests,
+    Combine(Values(X_LINK_USB_VSC, X_LINK_PCIE),
+            Values(X_LINK_ANY_PLATFORM)),
+    XLinkOpenStreamTests::getTestCaseName);
+
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/XLink_deprecated_tests.cpp b/inference-engine/thirdparty/movidius/tests/XLink/XLink_deprecated_tests.cpp

new file mode 100644 (file)

index 0000000..63f5616
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/XLink_deprecated_tests.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_specific_cases.hpp"
+#include "usb_boot.h"
+#include <thread>
+
+//------------------------------------------------------------------------------
+//      XLinkBootTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkBootUSBTests, CanBootConnectAndResetDevice_deprecated) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_USB_VSC) == 0) {
+        GTEST_SKIP();
+    }
+
+    std::string firmwarePath;
+    char deviceName[XLINK_MAX_NAME_SIZE] = {0};
+    // Find device
+    ASSERT_EQ(X_LINK_SUCCESS,
+            XLinkGetDeviceName(0, deviceName, XLINK_MAX_NAME_SIZE));
+    ASSERT_NO_THROW(firmwarePath = getMyriadUSBFirmwarePath(deviceName));
+
+    // Boot it
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkBootRemote(deviceName, firmwarePath.c_str()));
+    // FIXME: need to find a way to avoid this sleep
+    std::this_thread::sleep_for(kBootTimeoutSec);
+
+    // Find booted
+    char bootedDeviceName[XLINK_MAX_NAME_SIZE] = {0};
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkGetDeviceNameExtended(0, bootedDeviceName, XLINK_MAX_NAME_SIZE, DEFAULT_OPENPID));
+
+    // Connect to device
+    XLinkHandler_t handler = {};
+    handler.protocol = X_LINK_USB_VSC;
+    handler.devicePath = bootedDeviceName;
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+    std::this_thread::sleep_for(std::chrono::seconds(2));
+
+    // Reset device
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+    // FIXME: need to find a way to avoid this sleep
+    std::this_thread::sleep_for(kResetTimeoutSec);
+}
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/XLink_specific_tests.cpp b/inference-engine/thirdparty/movidius/tests/XLink/XLink_specific_tests.cpp

new file mode 100644 (file)

index 0000000..89ac8de
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/XLink_specific_tests.cpp
@@ -0,0 +1,154 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cases/XLink_specific_cases.hpp"
+#include <thread>
+
+//------------------------------------------------------------------------------
+//      XLinkFindFirstSuitableDeviceUSBTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkFindFirstSuitableDeviceUSBTests, CanFindBootedDeviceByName) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_USB_VSC) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t deviceDesc = {};
+    deviceDesc_t bootedDeviceDesc = {};
+
+    bootDevice(deviceDesc, bootedDeviceDesc);
+
+    deviceDesc_t foundDeviceDescr = {};
+    EXPECT_EQ(X_LINK_SUCCESS,
+            XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, bootedDeviceDesc, &foundDeviceDescr));
+
+    EXPECT_TRUE(strcmp(bootedDeviceDesc.name, foundDeviceDescr.name) == 0);
+
+    connectAndCloseDevice(bootedDeviceDesc);
+}
+
+//------------------------------------------------------------------------------
+//      XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkBootUSBTests, DeviceNameChangedAfterBoot) {
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, X_LINK_USB_VSC) == 0) {
+        GTEST_SKIP();
+    }
+
+    deviceDesc_t unbootedDeviceDescr = {};
+    deviceDesc_t in_deviceDesc = {};
+    in_deviceDesc.protocol = X_LINK_USB_VSC;
+    in_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+
+    // Get device name
+    ASSERT_EQ(X_LINK_SUCCESS,
+            XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &unbootedDeviceDescr));
+    std::string firmwarePath;
+    ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(unbootedDeviceDescr));
+
+    // Boot device
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&unbootedDeviceDescr, firmwarePath.c_str()));
+    std::this_thread::sleep_for(kBootTimeoutSec);
+
+    // Booted device appear
+    deviceDesc_t bootedDeviceDesc = {};
+    EXPECT_EQ(X_LINK_SUCCESS,
+            XLinkFindFirstSuitableDevice(X_LINK_BOOTED, in_deviceDesc, &bootedDeviceDesc));
+
+    // The device is not in unbooted and booted list at the same time
+    deviceDesc_t foundDeviceDesc = {};
+    EXPECT_EQ(X_LINK_DEVICE_NOT_FOUND,
+            XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, unbootedDeviceDescr, &foundDeviceDesc));
+
+    connectAndCloseDevice(bootedDeviceDesc);
+}
+
+
+//------------------------------------------------------------------------------
+//      XLinkFindPCIEDeviceTests
+//------------------------------------------------------------------------------
+
+TEST_F(XLinkPCIEDeviceTests, CannotFindSameDeviceTwice) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_PCIE) == 0)
+        GTEST_SKIP();
+
+    deviceDesc_t deviceRequirements = {};
+    deviceRequirements.protocol = X_LINK_PCIE;
+    deviceRequirements.platform = X_LINK_ANY_PLATFORM;
+
+    deviceDesc_t deviceDescFirst = {};
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_ANY_STATE, deviceRequirements, &deviceDescFirst));
+
+    // On index 1
+    deviceDesc_t deviceDescSecond = {};
+    XLinkError_t rc = findDeviceOnIndex(
+            1, X_LINK_ANY_STATE, deviceRequirements, &deviceDescSecond);
+
+    if (rc != X_LINK_DEVICE_NOT_FOUND) {
+        ASSERT_EQ(rc, X_LINK_SUCCESS);
+        ASSERT_TRUE(strstr(deviceDescFirst.name, PCIE_NAME_SUBSTR) != nullptr);
+        ASSERT_TRUE(strstr(deviceDescSecond.name, PCIE_NAME_SUBSTR) != nullptr);
+        ASSERT_TRUE(strcmp(deviceDescFirst.name, deviceDescSecond.name) != 0);
+    }
+}
+
+/**
+ * This is real test for two multi-device case, require two PCIe cards
+ * Boot second and expect that first will be unbooted, second booted
+ */
+TEST_F(XLinkPCIEDeviceTests, DISABLED_CanFindFirstDeviceAfterBootSecond) {
+    if (getCountSpecificDevices(X_LINK_ANY_STATE, X_LINK_PCIE) == 0)
+        GTEST_SKIP();
+
+    // TODO Add check that there two devices
+    deviceDesc_t deviceRequirements = {};
+    deviceRequirements.protocol = X_LINK_PCIE;
+    deviceRequirements.platform = X_LINK_ANY_PLATFORM;
+
+    // Find first device
+    deviceDesc_t firstDeviceDesc = {};
+    ASSERT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+            0, X_LINK_ANY_STATE, deviceRequirements, &firstDeviceDesc));
+
+    // Find second device
+    deviceDesc_t secondDeviceDesc = {};
+    ASSERT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+            1, X_LINK_ANY_STATE, deviceRequirements, &secondDeviceDesc));
+
+    // Boot second device
+    std::string firmwarePath;
+    ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(deviceRequirements));
+
+    EXPECT_EQ(X_LINK_SUCCESS, XLinkBoot(&secondDeviceDesc, firmwarePath.c_str()));
+    std::this_thread::sleep_for(kBootTimeoutSec);
+
+    // Check that first still in unbooted state
+    deviceDesc_t firstDeviceDescAfter = {};
+    firstDeviceDescAfter.protocol = X_LINK_PCIE;
+    firstDeviceDescAfter.platform = X_LINK_ANY_PLATFORM;
+
+
+    EXPECT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+            0, X_LINK_UNBOOTED, firstDeviceDesc, &firstDeviceDescAfter));
+
+    // Check that second device now in booted state
+    deviceDesc_t secondDeviceDescAfter = {};
+    secondDeviceDescAfter.protocol = X_LINK_PCIE;
+    secondDeviceDescAfter.platform = X_LINK_ANY_PLATFORM;
+
+    EXPECT_EQ(X_LINK_SUCCESS, findDeviceOnIndex(
+            0, X_LINK_BOOTED, secondDeviceDesc, &secondDeviceDescAfter));
+
+    // TODO Move it to separate function
+    // Close second device
+    XLinkHandler_t handler = {0};
+    handler.protocol = secondDeviceDesc.protocol;
+    handler.devicePath = secondDeviceDesc.name;
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(&handler));
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler.linkId));
+    std::this_thread::sleep_for(kResetTimeoutSec);
+}
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.cpp b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.cpp

new file mode 100644 (file)

index 0000000..9aaefa5
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include "XLink_common_cases.hpp"
+
+#include <thread>
+
+static XLinkGlobalHandler_t globalHandler;
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkTests
+//------------------------------------------------------------------------------
+
+void XLinkTests::SetUpTestCase() {
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkInitialize(&globalHandler));
+
+    // Deprecated field usage. Begin.
+    globalHandler.protocol = USB_VSC;
+    // Deprecated field usage. End.
+
+    // Waiting for initialization
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+}
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkDeviceTestsCommon
+//------------------------------------------------------------------------------
+
+static std::string protocolToString(XLinkProtocol_t protocol) {
+    switch (protocol) {
+        case X_LINK_USB_VSC:
+            return std::string("USB");
+        case X_LINK_PCIE:
+            return std::string("PCIE");
+        default:
+            return std::string("ANY");
+    }
+}
+
+static std::string platformToString(XLinkPlatform_t platform) {
+    switch (platform) {
+        case X_LINK_MYRIAD_2:
+            return std::string("Myriad2");
+        case X_LINK_MYRIAD_X:
+            return std::string("MyriadX");
+        default:
+            return std::string("ANY");
+    }
+}
+
+std::string XLinkDeviceTestsCommon::getTestCaseName(
+    const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>&  param) {
+    XLinkProtocol_t protocol = get<0>(param.param);
+    XLinkPlatform_t platform = get<1>(param.param);
+
+    return "protocol=" + protocolToString(protocol) +
+            "_platform=" + platformToString(platform);
+}
+
+void XLinkDeviceTestsCommon::SetUp() {
+    _protocol = get<0>(XLinkDeviceTestsCommonParam::GetParam());
+    _platform = get<1>(XLinkDeviceTestsCommonParam::GetParam());
+}
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkOpenStreamUSBTests
+//------------------------------------------------------------------------------
+
+std::string XLinkOpenStreamTests::getTestCaseName(
+    const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param) {
+    const auto name = XLinkDeviceTestsCommon::getTestCaseName(param);
+
+    XLinkProtocol_t protocol = get<0>(param.param);
+    if (getCountSpecificDevices(X_LINK_UNBOOTED, protocol) == 0) {
+        return "DISABLED_" + name;
+    }
+
+    return name;
+}
+
+XLinkOpenStreamTests::XLinkOpenStreamTests() : _handlerPtr(new XLinkHandler_t()) {
+}
+
+void XLinkOpenStreamTests::SetUp() {
+    XLinkDeviceTestsCommon::SetUp();
+
+    _deviceDesc.protocol = _protocol;
+    _deviceDesc.platform = _platform;
+
+    XLinkTestsHelper::bootDevice(_deviceDesc, _bootedDesc);
+    XLinkTestsHelper::connectToDevice(_bootedDesc, _handlerPtr.get());
+}
+
+void XLinkOpenStreamTests::TearDown() {
+    XLinkTestsHelper::closeDevice(_handlerPtr.get());
+}
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.hpp b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.hpp

new file mode 100644 (file)

index 0000000..ee45709
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_common_cases.hpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#pragma once
+
+#include "XLink.h"
+#include "XLink_tests_helpers.hpp"
+
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace ::testing;
+using XLinkDeviceTestsCommonParam = WithParamInterface<std::tuple<XLinkProtocol_t, XLinkPlatform_t>>;
+
+//------------------------------------------------------------------------------
+//      class XLinkTests
+//------------------------------------------------------------------------------
+class XLinkTests : public ::testing::Test,
+                   protected XLinkTestsHelper {
+public:
+    static void SetUpTestCase();
+};
+
+//------------------------------------------------------------------------------
+//      class XLinkNullPtrTests
+//------------------------------------------------------------------------------
+class XLinkNullPtrTests: public XLinkTests {};
+
+//------------------------------------------------------------------------------
+//      class XLinkFindAllSuitableDevicesTests
+//------------------------------------------------------------------------------
+
+class XLinkFindAllSuitableDevicesTests : public XLinkTests {};
+
+
+//------------------------------------------------------------------------------
+//      class XLinkCommonTests
+//------------------------------------------------------------------------------
+class XLinkDeviceTestsCommon : public XLinkTests,
+                               public XLinkDeviceTestsCommonParam {
+public:
+    //Operations
+    static std::string getTestCaseName(
+        const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>&  param);
+
+    void SetUp() override;
+
+protected:
+    XLinkProtocol_t _protocol;
+    XLinkPlatform_t _platform;
+};
+
+//------------------------------------------------------------------------------
+//      class XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkBootTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+//      class XLinkConnectTests
+//------------------------------------------------------------------------------
+class XLinkConnectTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+//      class XLinkFindFirstSuitableDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDeviceTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+//      class XLinkFindFirstSuitableBootedDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDevicePlatformTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+//      class XLinkResetRemoteTests
+//------------------------------------------------------------------------------
+class XLinkResetRemoteTests : public XLinkDeviceTestsCommon {};
+
+//------------------------------------------------------------------------------
+//      class XLinkResetAllTests
+//------------------------------------------------------------------------------
+class XLinkResetAllTests : public XLinkDeviceTestsCommon {};
+
+
+
+//------------------------------------------------------------------------------
+//      class XLinkOpenStreamTests
+//------------------------------------------------------------------------------
+
+class XLinkOpenStreamTests : public XLinkDeviceTestsCommon {
+public:
+    //Operations
+    static std::string getTestCaseName(
+        const TestParamInfo<XLinkDeviceTestsCommonParam::ParamType>& param);
+
+protected:
+    XLinkOpenStreamTests();
+
+    void SetUp() override;
+    void TearDown() override;
+
+protected:
+    std::unique_ptr<XLinkHandler_t> _handlerPtr;
+    deviceDesc_t _deviceDesc = {};
+    deviceDesc_t _bootedDesc = {};
+};
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_specific_cases.hpp b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_specific_cases.hpp

new file mode 100644 (file)

index 0000000..4638dd3
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/cases/XLink_specific_cases.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "XLink_common_cases.hpp"
+
+//------------------------------------------------------------------------------
+//      class XLinkBootUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkBootUSBTests : public XLinkTests {};
+
+//------------------------------------------------------------------------------
+//      class XLinkFindFirstSuitableDeviceUSBTests
+//------------------------------------------------------------------------------
+
+class XLinkFindFirstSuitableDeviceUSBTests : public XLinkTests {};
+
+//------------------------------------------------------------------------------
+//      class XLinkFindPCIEDeviceTests
+//------------------------------------------------------------------------------
+
+class XLinkPCIEDeviceTests: public XLinkTests {};
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.cpp b/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.cpp

new file mode 100644 (file)

index 0000000..2ab2d01
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.cpp
@@ -0,0 +1,133 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "XLink_tests_helpers.hpp"
+#include <thread>
+
+//------------------------------------------------------------------------------
+// Implementation of methods of class XLinkTestsHelpersBoot
+//------------------------------------------------------------------------------
+
+void XLinkTestsHelper::bootDevice(const deviceDesc_t& in_deviceDesc, deviceDesc_t& out_bootedDeviceDesc) {
+    deviceDesc_t tmp_deviceDesc = {};
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_UNBOOTED, in_deviceDesc, &tmp_deviceDesc));
+
+    std::string firmwarePath;
+    ASSERT_NO_THROW(firmwarePath = getMyriadFirmwarePath(tmp_deviceDesc));
+    printf("Would boot (%s) device with firmware (%s) \n", tmp_deviceDesc.name, firmwarePath.c_str());
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkBoot(&tmp_deviceDesc, firmwarePath.c_str()));
+    // FIXME: need to find a way to avoid this sleep
+    std::this_thread::sleep_for(kBootTimeoutSec);
+
+    // Check, that device booted
+    tmp_deviceDesc.platform = X_LINK_ANY_PLATFORM;
+    memset(tmp_deviceDesc.name, 0, XLINK_MAX_NAME_SIZE);
+    ASSERT_EQ(X_LINK_SUCCESS,
+              XLinkFindFirstSuitableDevice(X_LINK_BOOTED, tmp_deviceDesc, &out_bootedDeviceDesc));
+}
+
+void XLinkTestsHelper::connectToDevice(deviceDesc_t& in_bootedDeviceDesc, XLinkHandler_t* out_handler) {
+    if (!out_handler){
+        GTEST_FAIL();
+    }
+
+    memset(out_handler, 0, sizeof(XLinkHandler_t));
+    out_handler->protocol = in_bootedDeviceDesc.protocol;
+    out_handler->devicePath = in_bootedDeviceDesc.name;
+
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkConnect(out_handler));
+}
+
+void XLinkTestsHelper::closeDevice(XLinkHandler_t* handler) {
+    ASSERT_EQ(X_LINK_SUCCESS, XLinkResetRemote(handler->linkId));
+    std::this_thread::sleep_for(kResetTimeoutSec);
+
+    // Make sure that device is closed
+    deviceDesc_t deviceDesc = {};
+    deviceDesc.protocol = handler->protocol;
+    deviceDesc.platform = X_LINK_ANY_PLATFORM;
+    strcpy(deviceDesc.name, handler->devicePath);
+
+    deviceDesc_t foundDeviceDesc = {};
+    ASSERT_EQ(X_LINK_DEVICE_NOT_FOUND,
+              XLinkFindFirstSuitableDevice(X_LINK_BOOTED, deviceDesc, &foundDeviceDesc));
+}
+
+void XLinkTestsHelper::connectAndCloseDevice(deviceDesc_t& in_bootedDeviceDesc) {
+    XLinkHandler_t handler = {0};
+
+    connectToDevice(in_bootedDeviceDesc, &handler);
+    closeDevice(&handler);
+}
+
+std::string XLinkTestsHelper::getMyriadUSBFirmwarePath(const std::string& deviceName) {
+    if (deviceName.find('-') == std::string::npos) {
+        throw std::invalid_argument("Invalid device address");
+    }
+
+    if (deviceName.find("ma2480") != std::string::npos) {
+        return FIRMWARE_SUBFOLDER + std::string("usb-ma2x8x.mvcmd");
+    }
+
+    return FIRMWARE_SUBFOLDER + std::string("usb-ma2450.mvcmd");
+}
+
+std::string XLinkTestsHelper::getMyriadFirmwarePath(const deviceDesc_t& in_deviceDesc) {
+    if(in_deviceDesc.protocol != X_LINK_USB_VSC &&
+        in_deviceDesc.protocol != X_LINK_PCIE) {
+        throw std::invalid_argument("Device protocol must be specified");
+    }
+
+    if(in_deviceDesc.protocol == X_LINK_PCIE) {
+#if defined(_WIN32)
+        return FIRMWARE_SUBFOLDER + std::string("pcie-ma248x.elf");
+#else
+        return FIRMWARE_SUBFOLDER + std::string("pcie-ma248x.mvcmd");
+#endif
+    }
+
+    return getMyriadUSBFirmwarePath(in_deviceDesc.name);
+}
+
+XLinkError_t XLinkTestsHelper::findDeviceOnIndex(
+    const int index,
+    const XLinkDeviceState_t deviceState,
+    const deviceDesc_t in_deviceRequirements,
+    deviceDesc_t *out_foundDevicesPtr) {
+
+    deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {};
+    unsigned int foundDevices = 0;
+    XLinkError_t rc = XLinkFindAllSuitableDevices(
+            deviceState, in_deviceRequirements, deviceDescArray, XLINK_MAX_DEVICES, &foundDevices);
+
+    if (rc != X_LINK_SUCCESS) {
+        return rc;
+    }
+
+    if (foundDevices <= index) {
+        return X_LINK_DEVICE_NOT_FOUND;
+    }
+
+    out_foundDevicesPtr->platform = deviceDescArray[index].platform;
+    out_foundDevicesPtr->protocol = deviceDescArray[index].protocol;
+    strncpy(out_foundDevicesPtr->name, deviceDescArray[index].name, XLINK_MAX_NAME_SIZE);
+    return X_LINK_SUCCESS;
+}
+
+int XLinkTestsHelper::getCountSpecificDevices(const XLinkDeviceState_t state,
+                                              const XLinkProtocol_t deviceProtocol,
+                                              const XLinkPlatform_t devicePlatform) {
+    deviceDesc_t req_deviceDesc = {};
+    req_deviceDesc.protocol = deviceProtocol;
+    req_deviceDesc.platform = devicePlatform;
+
+    deviceDesc_t deviceDescArray[XLINK_MAX_DEVICES] = {};
+    unsigned int foundDevices = 0;
+    XLinkFindAllSuitableDevices(
+            state, req_deviceDesc, deviceDescArray, XLINK_MAX_DEVICES, &foundDevices);
+
+    return foundDevices;
+}
diff --git a/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.hpp b/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.hpp

new file mode 100644 (file)

index 0000000..238b9d8
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/tests/XLink/helpers/XLink_tests_helpers.hpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "XLink.h"
+#include "XLinkPrivateDefines.h"
+
+#include <gtest/gtest.h>
+#include <chrono>
+#include <string>
+
+//------------------------------------------------------------------------------
+//      Defines
+//------------------------------------------------------------------------------
+
+using std_seconds = std::chrono::seconds;
+
+#if (defined(_WIN32) || defined(_WIN64))
+    static constexpr char PCIE_NAME_SUBSTR[] = "mxlink";
+    static constexpr char FIRMWARE_SUBFOLDER[] = "./";
+#else
+    static constexpr char PCIE_NAME_SUBSTR[] = "mxlk";
+    static constexpr char FIRMWARE_SUBFOLDER[] = "./lib/";
+#endif
+
+//------------------------------------------------------------------------------
+//      Helpers
+//------------------------------------------------------------------------------
+
+constexpr std::chrono::seconds operator "" _sec(unsigned long long s)
+{
+    return std::chrono::seconds(s);
+}
+
+//------------------------------------------------------------------------------
+//      class XLinkTestsHelper
+//------------------------------------------------------------------------------
+
+class XLinkTestsHelper {
+public:
+    const std_seconds kBootTimeoutSec = 2_sec;
+    const std_seconds kResetTimeoutSec = 5_sec;
+
+    const std::string kUSBMyriadX = "ma2480";
+    const std::string kUSBMyriad2 = "ma2450";
+
+    // Device management
+    void bootDevice(const deviceDesc_t& in_deviceDesc, deviceDesc_t& out_bootedDeviceDesc);
+
+    void connectToDevice(deviceDesc_t& in_bootedDeviceDesc, XLinkHandler_t* out_handler);
+    void closeDevice(XLinkHandler_t* handler);
+
+    void connectAndCloseDevice(deviceDesc_t& in_bootedDeviceDesc);
+
+    // Firmware
+    std::string getMyriadUSBFirmwarePath(const std::string& deviceName);
+    std::string getMyriadFirmwarePath(const deviceDesc_t& in_deviceDesc);
+
+    // Device searching
+    XLinkError_t findDeviceOnIndex(const int index,
+                                   const XLinkDeviceState_t deviceState,
+                                   const deviceDesc_t in_deviceRequirements,
+                                   deviceDesc_t *out_foundDevicesPtr);
+
+
+    static int getCountSpecificDevices(const XLinkDeviceState_t state = X_LINK_ANY_STATE,
+                                const XLinkProtocol_t deviceProtocol = X_LINK_ANY_PROTOCOL,
+                                const XLinkPlatform_t devicePlatform = X_LINK_ANY_PLATFORM);
+};
diff --git a/inference-engine/tools/CMakeLists.txt b/inference-engine/tools/CMakeLists.txt

index 844e541..2af002e 100644 (file)
--- a/inference-engine/tools/CMakeLists.txt
+++ b/inference-engine/tools/CMakeLists.txt
@@ -24,4 +24,6 @@ if(ENABLE_PYTHON)
      install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_tool
              DESTINATION deployment_tools/tools
              COMPONENT python_tools)
+    install(FILES
+            COMPONENT python_tools)
  endif()
diff --git a/inference-engine/tools/cross_check_tool/README.md b/inference-engine/tools/cross_check_tool/README.md

new file mode 100644 (file)

index 0000000..6ec1a87
--- /dev/null
+++ b/inference-engine/tools/cross_check_tool/README.md
@@ -0,0 +1,281 @@
+# Cross Check Tool
+
+Cross Check Tool is a console application that enables comparing accuracy and performance metrics for two successive 
+model inferences that are performed on two different supported Intel&reg; devices or with different precisions.
+The Cross Check Tool can compare the metrics per layer or all over the model.
+
+## Running the Cross Check Tool
+
+Cross Check Tool is distributed as a Python module and there is no need to build it. To run the Cross Check Tool, 
+execute the `cross_check_tool.py` file with necessary parameters. Please note that the Inference Engine assumes that weights 
+are in the same folder as the `.xml` file.
+
+You can get the list of all available options using the `-h` option:
+
+```sh
+$python3 cross_check_tool.py -h
+
+Cross Check Tool is a console application that enables comparing accuracy and
+provides performance metrics
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+Model specific arguments:
+  --input INPUT, -i INPUT
+                        Path to an input image file or multi-input file to
+                        infer. Generates input(s) from normal distribution if
+                        empty
+  --batch BATCH, -b BATCH
+                        Overrides batch size. Default is inherited from model
+  --model MODEL, -m MODEL
+                        Path to an .xml file that represents the first IR of
+                        the trained model to infer.
+  --reference_model REFERENCE_MODEL, -ref_m REFERENCE_MODEL
+                        Path to an .xml file that represents the second IR in
+                        different precision to compare the metrics.
+  --layers LAYERS, -layers LAYERS
+                        Defines layers to check. Options: all, None - for
+                        output layers check, list of comma-separated layer
+                        names to check. Default value is None.
+  --mapping MAPPING, -map MAPPING
+                        Model Optimizer provided mapping for --model/-m
+  --reference_mapping REFERENCE_MAPPING, -ref_map REFERENCE_MAPPING
+                        Model Optimizer provided mapping for
+                        --reference_model/-ref_model
+  --num_of_iterations NUM_OF_ITERATIONS, -ni NUM_OF_ITERATIONS
+                        Number of iterations to collect all over the net
+                        performance
+
+Plugin specific arguments:
+  --plugin_path PLUGIN_PATH, -pp PLUGIN_PATH
+                        Path to a plugin folder.
+  --device DEVICE, -d DEVICE
+                        The first target device to infer the model specified
+                        with the -m or --model option. CPU, GPU, HDDL or
+                        MYRIAD are acceptable.
+  --config CONFIG, -conf CONFIG
+                        Path to config file for -d or -device device plugin
+  --reference_device REFERENCE_DEVICE, -ref_d REFERENCE_DEVICE
+                        The second target device to infer the model and
+                        compare the metrics. CPU, GPU, HDDL or MYRIAD are
+                        acceptable.
+  --reference_config REFERENCE_CONFIG, -ref_conf REFERENCE_CONFIG
+                        Path to config file for -ref_d or -reference_device
+                        device plugin
+  -l L                  Required for MKLDNN (CPU)-targeted custom layers.
+                        Comma separated paths to a shared libraries with the
+                        kernels implementation.
+
+CCT mode arguments:
+  --dump                Enables blobs statistics dumping
+  --load LOAD           Path to a file to load blobs from
+
+```
+### Examples
+
+1. To check per-layer accuracy and performance of inference in FP32 precision on the CPU against the GPU, run:
+   ```sh
+   $python3 cross_check_tool.py -i <path_to_input_image_or_multi_input_file> \
+                 -m <path_to_FP32_xml>                            \
+                 -d GPU                                           \
+                 -ref_d CPU                                       \
+                 --layers all
+   ```
+   
+   The output looks as follows:
+   ```sh
+   [ INFO ] Cross check with one IR was enabled
+   [ INFO ] GPU:FP32 vs CPU:FP32
+   [ INFO ] The same IR on both devices: <path_to_IR> 
+   [ INFO ] Statistics will be dumped for X layers: <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
+   [ INFO ] Layer <layer_1_name> statistics 
+        Max absolute difference : 1.15204E-03
+        Min absolute difference : 0.0
+        Max relative difference : 1.15204E+17
+        Min relative difference : 0.0
+        Min reference value : -1.69513E+03
+        Min absolute reference value : 2.71080E-06
+        Max reference value : 1.17132E+03
+        Max absolute reference value : 1.69513E+03
+        Min actual value : -1.69513E+03
+        Min absolute actual value : 8.66465E-05
+        Max actual value : 1.17132E+03
+        Max absolute actual value : 1.69513E+03
+          Device:           -d GPU       -ref_d CPU
+          Status:    OPTIMIZED_OUT    OPTIMIZED_OUT
+          Layer type:      Convolution      Convolution
+        Real time, microsec:     0              120
+          Number of NAN:         0                0
+          Number of INF:         0                0
+          Number of ZERO:        0                0
+    ...
+   <list_of_layer_statistics>
+   ...
+   
+   [ INFO ] Overall max absolute difference = 0.00115203857421875
+   [ INFO ] Overall min absolute difference = 0.0
+   [ INFO ] Overall max relative difference = 1.1520386483093504e+17
+   [ INFO ] Overall min relative difference = 0.0
+   [ INFO ] Execution successful
+   ```
+
+2. To check the overall accuracy and performance of inference on the CPU in FP32 precision against the 
+   Intel&reg; Movidius&trade; Myriad&trade; device in FP16 precision, run:
+   ```sh
+   $python3 cross_check_tool.py    -i <path_to_input_image_or_multi_input_file> \
+                   -m <path_to_FP16_xml>                        \
+                   -d MYRIAD                                    \
+                   -ref_m <path_to_FP32_xml>                    \
+                   -ref_d CPU                                   
+   ```
+   
+   The output looks as follows:
+   ```sh
+   [ INFO ] Cross check with two IRs was enabled
+   [ INFO ] GPU:FP16 vs CPU:FP32
+   [ INFO ] IR for MYRIAD : <path_to_FP16_xml>
+   [ INFO ] IR for CPU : <path_to_FP32_xml>
+   [ INFO ] Statistics will be dumped for 1 layer: <output_layer_name(s)>
+   [ INFO ] Layer <output_layer_name> statistics 
+        Max absolute difference : 2.32944E-02
+        Min absolute difference : 3.63002E-13
+        Max relative difference : 6.41717E+10
+        Min relative difference : 1.0
+        Min reference value : 3.63002E-13
+        Min absolute reference value : 3.63002E-13
+        Max reference value : 7.38138E-01
+        Max absolute reference value : 7.38138E-01
+        Min actual value : 0.0
+        Min absolute actual value : 0.0
+        Max actual value : 7.14844E-01
+        Max absolute actual value : 7.14844E-01
+          Device:        -d MYRIAD       -ref_d CPU
+          Status:    OPTIMIZED_OUT    OPTIMIZED_OUT
+          Layer type:          Reshape          Reshape
+        Real time, microsec:      0                0
+          Number of NAN:          0                0
+          Number of INF:          0                0
+          Number of ZERO:         0                0
+   ----------------------------------------------------------------------
+     Overall performance, microseconds:      2.79943E+05      6.24670E+04
+   ----------------------------------------------------------------------
+   [ INFO ] Overall max absolute difference = 0.023294448852539062
+   [ INFO ] Overall min absolute difference = 3.630019191052519e-13
+   [ INFO ] Overall max relative difference = 64171696128.0
+   [ INFO ] Overall min relative difference = 1.0
+   [ INFO ] Execution successful
+   ```
+
+3. To dump layer statistics from a specific list of layers, run:
+   ```sh
+   $python3 cross_check_tool.py    -i <path_to_input_image_or_multi_input_file> \
+                   -m <path_to_FP16_xml>                        \
+                   -d MYRIAD                                    \
+                   --dump                                       \
+                   --layers <comma_separated_list_of_layers>
+   ```
+   
+   The output looks as follows:
+   ```sh
+   [ INFO ] Dump mode was enabled
+   [ INFO ] <layer_1_name> layer processing
+   ...
+   [ INFO ] <layer_X_name> layer processing
+   [ INFO ] Dump file path: <path_where_dump_will_be_saved>
+   [ INFO ] Execution successful
+   ```
+   
+   If you do not provide the `-i` key, the Cross Check Tool generates an input from normal distributed noise and saves 
+   it in a multi-input file format with the filename `<path_to_xml>_input_layers_dump.txt` in the same folder as the Intermediate Representation (IR).
+
+4. To check the overall accuracy and performance of inference on the CPU in FP32 precision against dumped results, run:
+   ```sh
+   $python3 cross_check_tool.py    -i <path_to_input_image_or_multi_input_file> \
+                   -m <path_to_FP32_xml>                        \
+                   -d CPU                                       \
+                   --load <path_to_dump>                        \
+                   --layers all
+   ```
+   
+   The output looks as follows:
+   ```sh
+   [ INFO ] Load mode was enabled
+   [ INFO ] IR for CPU : <path_to_FP32_xml>
+   [ INFO ] Loading blob from /localdisk/models/FP16/icv_squeezenet_v1.0.xml_GPU_dump.npz
+   [ INFO ] Statistics will be dumped for X layers:  <layer_1_name>, <layer_2_name>, ... , <layer_X_name>
+   [ INFO ] Layer <layer_1_name> statistics
+        Max absolute difference : 0.0
+        Min absolute difference : 0.0
+        Max relative difference : 0.0
+        Min relative difference : 0.0
+        Min reference value : 0.0
+        Min absolute reference value : 0.0
+        Max reference value : 7.14844E-01
+        Max absolute reference value : 7.14844E-01
+        Min actual value : 0.0
+        Min absolute actual value : 0.0
+        Max actual value : 7.14844E-01
+        Max absolute actual value : 7.14844E-01
+          Device:           -d CPU        -load GPU
+          Status:    OPTIMIZED_OUT    OPTIMIZED_OUT
+          Layer type:          Reshape          Reshape
+        Real time, microsec:      0                0
+          Number of NAN:          0                0
+          Number of INF:          0                0
+          Number of ZERO:        609              699
+   
+   ...
+   <list_of_layer_statistics>
+   ...
+               
+   [ INFO ] Overall max absolute difference = 0.0
+   [ INFO ] Overall min absolute difference = 0.0
+   [ INFO ] Overall max relative difference = 0.0
+   [ INFO ] Overall min relative difference = 0.0
+   [ INFO ] Execution successful
+   ```
+   
+### Multi-input and dump file format
+
+Multi-input and dump file is a numpy compressed `.npz` file with hierarchy:
+
+```sh
+{
+  ‘layer_name’: {
+    ‘blob’: np.array([…])
+    ‘pc’: {
+      ‘device’: ‘device_name’,
+      ‘real_time’: int_real_time_in_microseconds_from_plugin,
+      ‘exec_type’: ‘exec_type_from_plugin’,
+      ‘layer_type’: ‘layer_type_from_plugin’,
+      ‘status’: ‘status_from_plugin’
+    }
+  },
+  ‘another_layer_name’: {
+    ‘blob’: np.array([…])
+    ‘pc’: {
+      ‘device’: ‘device_name’,
+      ‘real_time’: int_real_time_in_microseconds_from_plugin,
+      ‘exec_type’: ‘exec_type_from_plugin’,
+      ‘layer_type’: ‘layer_type_from_plugin’,
+      ‘status’: ‘status_from_plugin’
+    }
+  },
+  ...
+}
+```
+
+### Configuration file
+
+There is an option to pass configuration file to plugin by providing 
+`--config` and/or `--reference_config` keys.
+
+Configuration file is a text file with content of pairs of keys and values.
+
+Structure of configuration file:
+
+```sh
+KEY VALUE
+ANOTHER_KEY ANOTHER_VALUE,VALUE_1
+```
diff --git a/inference-engine/tools/cross_check_tool/__init__.py b/inference-engine/tools/cross_check_tool/__init__.py

new file mode 100644 (file)

index 0000000..79b3c57
--- /dev/null
+++ b/inference-engine/tools/cross_check_tool/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (C) 2018-2020 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
diff --git a/inference-engine/tools/cross_check_tool/cross_check_tool.py b/inference-engine/tools/cross_check_tool/cross_check_tool.py

new file mode 100644 (file)

index 0000000..1176187
--- /dev/null
+++ b/inference-engine/tools/cross_check_tool/cross_check_tool.py
@@ -0,0 +1,297 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import datetime
+import logging as log
+import os
+import sys
+
+import numpy as np
+
+try:
+    from openvino import inference_engine as ie
+    from openvino.inference_engine import IENetwork, IECore
+except Exception as e:
+    exception_type = type(e).__name__
+    print("The following error happened while importing Python API module:\n[ {} ] {}".format(exception_type, e))
+    sys.exit(1)
+
+from utils import get_config_dictionary, get_layers_list, print_output_layers, input_processing, \
+    accuracy_metrics, validate_args, build_parser, set_logger, find_out_cct_mode, print_all_over_the_net_metrics, \
+    update_global_accuracy_matrics, blob_counters, performance_metrics, manage_user_outputs_with_mapping, \
+    dump_output_file, load_dump, error_handling, print_input_layers, set_verbosity
+
+
+###
+#   PLUGIN
+###
+
+
+@error_handling('plugin of \'{plugin.device}\' device config \'{config}\' loading')
+def set_plugin_config(core: IECore, device : str, config: str = None):
+    core.set_config(get_config_dictionary(config_file=config), device_name=device)
+
+
+@error_handling('\'{cpu_ext}\' cpu extensions loading')
+def set_cpu_extensions(core: IECore, cpu_ext: str):
+    core.add_extension(cpu_ext, "CPU")
+
+
+def get_plugin(device: str, cpu_ext: str = None, config: str = None):
+    ie = IECore()
+    # log.info('{} plugin:\n          API version ............ {}'.format(device, plugin.version), extra={'no_lvl': True})
+    set_plugin_config(core=ie, device=device, config=config)
+    if cpu_ext and 'CPU' in device:
+        set_cpu_extensions(core=ie, cpu_ext=cpu_ext)
+    return ie
+
+
+###
+#   MODEL
+###
+
+
+@error_handling('reading {model} IR model')
+def get_net(model: str, core: IECore):
+    model_xml = model
+    model_bin = os.path.splitext(model_xml)[0] + ".bin"
+    net = core.read_network(model=model_xml, weights=model_bin)
+    return net
+
+
+@error_handling('loading network to plugin of {plugin.device} device')
+def get_exec_net(core, net, device):
+    return core.load_network(network=net, device_name=device)
+
+
+@error_handling('output \'{output}\' addition for network from model \'{model}\'')
+def get_net_copy_with_output(model: str, output: str, core: IECore):
+    net_copy = get_net(model=model, core=core)
+    if output not in ['None', None]:
+        net_copy.add_outputs(output)
+    return net_copy
+
+
+@error_handling('getting model layers info')
+def get_model_info(net: IENetwork):
+    layers = net.layers
+    precision = layers[list(layers.keys())[0]].out_data[0].precision
+    return layers, net.inputs, net.outputs, precision
+
+
+###
+#   INFER
+###
+
+
+@error_handling('processing inference on \'{device}\' device')
+def get_infer_results(executable_network, inputs: dict):
+    return executable_network.infer(inputs=inputs)
+
+
+@error_handling('getting performance counts from executable network on \'{device}\' device')
+def get_perf_counts(executable_network):
+    return executable_network.requests[0].get_perf_counts()
+
+
+@error_handling('getting inference results for outputs: \'{output}\'')
+def infer(net: IENetwork, core: IECore, device : str, inputs: dict, output: list):
+    executable_network = get_exec_net(core=core, net=net, device=device)
+    infer_dict = get_infer_results(executable_network=executable_network, inputs=inputs)
+    pc = get_perf_counts(executable_network=executable_network)
+    no_i = 'no_info'
+    no_info_pc = {'cpu_time': no_i, 'exec_time': no_i, 'layer_type': no_i, 'real_time': no_i, 'status': no_i}
+    result = {}
+    for out in output:
+        if out not in infer_dict:
+            log.warning("There is no '{}' layer in Inference Engine outputs results".format(out))
+            continue
+        pc = pc[out] if out in pc else no_info_pc
+        pc['device'] = device
+        result = {out: [infer_dict[out], pc]}
+    return result
+
+
+@error_handling('getting inference results for outputs: \'{output}\'')
+def overall_accuracy_check(model: str, ref_model: str, out_layers: list, ref_out_layers: list, inputs: dict,
+                           ref_inputs: dict, core: IECore, device: str, ref_core: IECore, ref_device : str, layers: str,
+                           num_of_iterations: int):
+    global_times, ref_global_times = [], []
+    if layers in ['None', None]:
+        net_copy = get_net_copy_with_output(model=model, output=layers, core=core)
+        ref_net_copy = get_net_copy_with_output(model=ref_model, output=layers, core=ref_core)
+        for i in range(num_of_iterations):
+            t1 = datetime.datetime.now()
+            infer(net=net_copy, core=core, device=device, inputs=inputs, output=out_layers)
+            t2 = datetime.datetime.now()
+            infer(net=ref_net_copy, core=ref_core, device=ref_device, inputs=ref_inputs, output=ref_out_layers)
+            t3 = datetime.datetime.now()
+            global_times.append(t2 - t1)
+            ref_global_times.append(t3 - t2)
+    return global_times, ref_global_times
+
+
+def one_ir_mode(args):
+    core = get_plugin(args.device, args.l, args.config)
+    net = get_net(model=args.model, core=core)
+    net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+    log.info('{}:{} vs {}:{}'.format(args.device, precision, args.reference_device, precision))
+    log.info('The same IR on both devices: {}'.format(args.model))
+    out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+    print_input_layers(net_inputs)
+    print_output_layers(out_layers)
+    ref_core = get_plugin(args.reference_device, args.l, args.reference_config)
+    global_accuracy = []
+    inputs = input_processing(model_path=args.model, net_inputs=net_inputs, input_file=args.input)
+    global_times, ref_global_times = overall_accuracy_check(model=args.model, ref_model=args.model,
+                                                            out_layers=out_layers, ref_out_layers=out_layers,
+                                                            inputs=inputs, ref_inputs=inputs, core=core,
+                                                            device=args.device, ref_core=ref_core,
+                                                            ref_device=args.reference_device, layers=args.layers,
+                                                            num_of_iterations=args.num_of_iterations)
+    for out_layer in out_layers:
+        log.info('Layer {} statistics'.format(out_layer))
+        net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+        results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+        if out_layer not in results:
+            continue
+        out_blob, pc = results[out_layer]
+        ref_results = infer(net=net_copy, core=ref_core, device=args.reference_device, inputs=inputs, output=[out_layer])
+        if out_layer not in ref_results:
+            continue
+        ref_out_blob, ref_pc = ref_results[out_layer]
+        a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        performance_metrics(pc=pc, ref_pc=ref_pc)
+        blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+    print_all_over_the_net_metrics(global_times=global_times, ref_global_times=ref_global_times,
+                                   global_accuracy=global_accuracy)
+
+
+def two_ir_mode(args):
+    core = get_plugin(args.device, args.l, args.config)
+    ref_core = get_plugin(args.reference_device, args.l, args.reference_config)
+    net = get_net(model=args.model, core=core)
+    net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+    ref_net = get_net(model=args.reference_model, core=ref_core)
+    ref_net_layers, ref_net_inputs, ref_net_outputs, ref_precision = get_model_info(ref_net)
+    log.info('{}:{} vs {}:{}'.format(args.device, precision, args.reference_device, ref_precision))
+    log.info('IR for {} : {}'.format(args.device, args.model))
+    log.info('IR for {} : {}'.format(args.reference_device, args.reference_model))
+    out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+    ref_out_layers = get_layers_list(ref_net_layers, ref_net_inputs, ref_net_outputs, args.layers)
+    print_input_layers(net_inputs)
+    print_output_layers(out_layers)
+    layers_map = manage_user_outputs_with_mapping(mapping=args.mapping, reference_mapping=args.reference_mapping,
+                                                  user_layers=out_layers)
+    inputs = input_processing(model_path=args.model, net_inputs=net_inputs, input_file=args.input,
+                              layers_map=layers_map)
+    ref_inputs = input_processing(model_path=args.reference_model, net_inputs=ref_net_inputs, input_file=args.input,
+                                  layers_map=layers_map)
+    global_accuracy = []
+    global_times, ref_global_times = overall_accuracy_check(model=args.model, ref_model=args.reference_model,
+                                                            out_layers=out_layers, ref_out_layers=ref_out_layers,
+                                                            inputs=inputs, ref_inputs=ref_inputs, plugin=core,
+                                                            ref_plugin=ref_core, layers=args.layers,
+                                                            num_of_iterations=args.num_of_iterations)
+    for out_layer in layers_map:
+        ref_out_layer = layers_map[out_layer]
+        if out_layer == ref_out_layer:
+            log.info('Layer {} statistics'.format(out_layer))
+        else:
+            log.info('Statistics \'{}\' vs \'{}\''.format(out_layer, ref_out_layer))
+        net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+        ref_net_copy = get_net_copy_with_output(model=args.reference_model, output=ref_out_layer, core=ref_core)
+        results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+        if out_layer not in results:
+            continue
+        out_blob, pc = results[out_layer]
+        ref_results = infer(net=ref_net_copy, core=ref_core, device=args.reference_device, inputs=ref_inputs, output=[ref_out_layer])
+        ref_out_blob, ref_pc = ref_results[ref_out_layer]
+        if ref_out_layer not in ref_results:
+            continue
+        a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        performance_metrics(pc=pc, ref_pc=ref_pc)
+        blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+    print_all_over_the_net_metrics(global_times=global_times, ref_global_times=ref_global_times,
+                                   global_accuracy=global_accuracy)
+
+
+def dump_mode(args):
+    core = get_plugin(args.device, args.l, args.config)
+    net = get_net(model=args.model, core=core)
+    out_layers = get_layers_list(net.layers, net.inputs, net.outputs, args.layers)
+    inputs = input_processing(args.model, net.inputs, args.input)
+    dump_dict = {}
+    for out_layer in out_layers:
+        log.info('Layer {} processing'.format(out_layer))
+        net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+        results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+        if out_layer not in results:
+            continue
+        out_blob, pc = results[out_layer]
+        dump_dict[out_layer] = np.array({'blob': out_blob, 'pc': pc})
+    dump_output_file(args.model + '_' + args.device + '_dump.npz', dump_dict)
+
+
+def load_mode(args):
+    core = get_plugin(args.device, args.l, args.config)
+    log.info('IR for {} : {}'.format(args.device, args.model))
+    log.info('Loading blob from {}'.format(args.load))
+    net = get_net(model=args.model, core=core)
+    net_layers, net_inputs, net_outputs, precision = get_model_info(net)
+    out_layers = get_layers_list(net_layers, net_inputs, net_outputs, args.layers)
+    print_input_layers(net_inputs)
+    print_output_layers(out_layers)
+    layers_map = manage_user_outputs_with_mapping(mapping=args.mapping, reference_mapping=args.reference_mapping,
+                                                  user_layers=out_layers)
+    inputs = input_processing(args.model, net_inputs, args.input, layers_map)
+    global_accuracy = []
+    loaded = load_dump(args.load)
+    for out_layer in layers_map:
+        ref_out_layer = layers_map[out_layer]
+        if out_layer == ref_out_layer:
+            log.info('Layer {} statistics'.format(out_layer))
+        else:
+            log.info('Statistics \'{}\' vs \'{}\''.format(out_layer, ref_out_layer))
+        net_copy = get_net_copy_with_output(model=args.model, output=out_layer, core=core)
+        results = infer(net=net_copy, core=core, device=args.device, inputs=inputs, output=[out_layer])
+        if out_layer not in results:
+            continue
+        out_blob, pc = results[out_layer]
+        if ref_out_layer not in loaded:
+            continue
+        ref_out_blob = loaded[ref_out_layer]['blob']
+        a_m = accuracy_metrics(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        if 'pc' in loaded[ref_out_layer]:
+            ref_pc = loaded[ref_out_layer]['pc']
+            performance_metrics(pc=pc, ref_pc=ref_pc)
+        blob_counters(out_blob=out_blob, ref_out_blob=ref_out_blob)
+        global_accuracy = update_global_accuracy_matrics(global_accuracy=global_accuracy, current_accuracy=a_m)
+    print_all_over_the_net_metrics(global_accuracy=global_accuracy)
+
+
+def main(args):
+    log.info('Inference Engine:\n          API version ............ {}'.format(ie.__version__), extra={'no_lvl': True})
+    set_verbosity(args.verbosity)
+    mode = find_out_cct_mode(args)
+    if mode == 1:
+        log.info('Cross check with one IR was enabled')
+        one_ir_mode(args)
+    elif mode == 2:
+        log.info('Cross check with two IRs was enabled')
+        two_ir_mode(args)
+    elif mode == 3:
+        log.info('Dump mode was enabled')
+        dump_mode(args)
+    elif mode == 4:
+        log.info('Load mode was enabled')
+        load_mode(args)
+    log.info("Execution successful")
+
+
+if __name__ == '__main__':
+    set_logger(log.DEBUG)
+    main(validate_args(build_parser().parse_args()))
diff --git a/inference-engine/tools/cross_check_tool/requirements.txt b/inference-engine/tools/cross_check_tool/requirements.txt

new file mode 100644 (file)

index 0000000..fc4586c
--- /dev/null
+++ b/inference-engine/tools/cross_check_tool/requirements.txt
@@ -0,0 +1,2 @@
+numpy
+opencv-python
+\ No newline at end of file
diff --git a/inference-engine/tools/cross_check_tool/utils.py b/inference-engine/tools/cross_check_tool/utils.py

new file mode 100644 (file)

index 0000000..06de440
--- /dev/null
+++ b/inference-engine/tools/cross_check_tool/utils.py
@@ -0,0 +1,536 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import argparse
+import logging as log
+import os
+import sys
+import traceback
+import xml
+
+try:
+    import cv2
+except Exception as e:
+    log.error("Can not import OpenCV Python package.\nPlease install required python packages by running:\n"
+              "pip3 install -r requirements.txt\n\n Original error message: {}".format(e))
+    sys.exit(1)
+
+try:
+    import numpy as np
+except Exception as e:
+    log.error("Can not import numpy python package.\nPlease install required python packages by running:\n"
+              "pip3 install -r requirements.txt\n\n Original error message: {}".format(e))
+    sys.exit(1)
+
+verbosity = False
+
+
+def set_verbosity(flag: bool):
+    global verbosity
+    verbosity = flag
+
+
+###
+#   USER INTERACTION
+###
+
+
+class LvlFormatter(log.Formatter):
+    usual = '[ %(levelname)s ]  %(msg)s'
+    format_dict = {
+        'no_lvl': '%(msg)s',
+        log.DEBUG: '[ %(asctime)s ] [ %(levelname)s ] [ %(module)s:%(lineno)d ]  %(msg)s',
+        log.INFO: usual, log.WARNING: usual, log.ERROR: usual, log.CRITICAL: usual
+    }
+
+    def __init__(self, lvl, fmt=None):
+        log.Formatter.__init__(self, fmt)
+        self.lvl = lvl
+
+    def format(self, record: log.LogRecord):
+        if self.lvl == 'DEBUG':
+            self._style._fmt = self.format_dict[log.DEBUG]
+        else:
+            self._style._fmt = self.format_dict[record.levelno]
+        if 'no_lvl' in record.__dict__.keys() and record.__dict__['no_lvl']:
+            self._style._fmt = self.format_dict['no_lvl']
+        return log.Formatter.format(self, record)
+
+
+def set_logger(lvl: str):
+    logger = log.getLogger()
+    logger.setLevel(lvl)
+    handler = log.StreamHandler(sys.stdout)
+    handler.setFormatter(LvlFormatter(lvl))
+    logger.addHandler(handler)
+
+
+def error_handling(desc: str):
+    """
+    Error handler that prints description formatted with keyword arguments in case of exception
+    :param desc: description for an error
+    :return: decorator
+    """
+
+    def decorator(func):
+        def try_except_func(*args, **kwargs):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                exception_type = type(e).__name__
+                log.error("The following error happened while {}:\n[ {} ] {}".format(desc.format(**kwargs),
+                                                                                     exception_type, e))
+                global verbosity
+                if verbosity:
+                    traceback.print_tb(tb=e.__traceback__, file=sys.stdout)
+                sys.exit(1)
+
+        return try_except_func
+
+    return decorator
+
+
+class ExistingFileAction(argparse.Action):
+    """
+    Expand user home directory paths and convert relative-paths to absolute.
+    """
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values is not None:
+            if not os.path.isfile(values):
+                log.error("File was not found: {}".format(values))
+                sys.exit(1)
+        setattr(namespace, self.dest, values)
+
+
+class ExistingDirAction(argparse.Action):
+    """
+    Expand user home directory paths and convert relative-paths to absolute.
+    """
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values is not None:
+            if not os.path.isdir(values):
+                log.error("Directory was not found: {}".format(values))
+                sys.exit(1)
+        setattr(namespace, self.dest, values)
+
+
+def build_parser():
+    parser = argparse.ArgumentParser(
+        prog='Cross Check Tool',
+        description='Cross Check Tool is a console application that enables comparing accuracy and provides performance'
+                    ' metrics',
+        usage='\n' + '-' * 62 +
+              '\nFor cross precision check provide two IRs \n'
+              '(mapping files may be needed) run:'
+              '\npython3 cross_check_tool.py                        \\'
+              '\n--input path/to/file/describing/input              \\'
+              '\n--model path/to/model/*.xml                        \\'
+              '\n--device device_for_model                          \\'
+              '\n--reference_model path/to/reference_model/*.xml    \\'
+              '\n--reference_device reference_device_for_model      \n'
+              + '-' * 62 +
+              '\nFor cross device check with one precision provide one IR run:'
+              '\npython3 cross_check_tool.py                        \\'
+              '\n--input path/to/file/describing/input              \\'
+              '\n--model path/to/model/*.xml                        \\'
+              '\n--device device_for_model                          \\'
+              '\n--reference_device reference_device_for_model      \n'
+              + '-' * 62 +
+              '\nFor dumping blob and performance counters run:'
+              '\npython3 cross_check_tool.py                        \\'
+              '\n--input path/to/file/describing/input              \\'
+              '\n--model path/to/model/*.xml                        \\'
+              '\n--device device_for_model                          \\'
+              '\n--dump\n'
+              + '-' * 62 +
+              '\nFor check inference against dumped results run:'
+              '\npython3 cross_check_tool.py                        \\'
+              '\n--input path/to/file/describing/input              \\'
+              '\n--model path/to/model/*.xml                        \\'
+              '\n--device device_for_model                          \\'
+              '\n--load path/to/dump/file/* \n'
+              + '-' * 62 +
+              '\nFor all layers check provide:\n'
+              '--layers=\'all\' \n'
+              'For specific number of layers check provide:\n'
+              '--layers=\'layer_name,another_layer_name,...,last_layer_name\'\n'
+              + '-' * 62 +
+              '\nIf --input is empty CCT generates input(s) from normal\n'
+              'distribution and dumps this input to a file\n'
+              + '-' * 62
+    )
+
+    model = parser.add_argument_group('Model specific arguments')
+    model.add_argument('--input', '-i', type=str, action=ExistingFileAction,
+                       help='Path to an input image file or multi-input file to infer. Generates input(s) from normal '
+                            'distribution if empty')
+    # model.add_argument('--batch', '-b', type=int, help='Overrides batch size. Default is inherited from model')
+    model.add_argument('--model', '-m', type=str, action=ExistingFileAction,
+                       help='Path to an .xml file that represents the first IR of the trained model to infer.')
+    model.add_argument('--reference_model', '-ref_m', type=str, action=ExistingFileAction,
+                       help='Path to an .xml file that represents the second IR to compare the metrics. '
+                            'Uses --model if empty')
+    model.add_argument('--layers', '-layers', type=str, default=None,
+                       help='Defines layers to check. Options: all, None - for output layers check, list of '
+                            'comma-separated layer names to check. Default value is None.')
+    model.add_argument('--mapping', '-map', type=str, action=ExistingFileAction,
+                       help='Model Optimizer provided mapping for --model/-m')
+    model.add_argument('--reference_mapping', '-ref_map', type=str, action=ExistingFileAction,
+                       help='Model Optimizer provided mapping for --reference_model/-ref_model')
+
+    plugin = parser.add_argument_group('Plugin specific arguments')
+    plugin.add_argument('--plugin_path', '-pp', type=str, action=ExistingDirAction, help='Path to a plugin folder.')
+    plugin.add_argument('--device', '-d', type=str, required=True,
+                        help='The first target device to infer the model specified with the -m or --model option. '
+                             'CPU, GPU, HDDL or MYRIAD are acceptable.')
+    plugin.add_argument('--config', '-conf', type=str, action=ExistingFileAction,
+                        help='Path to config file for -d or -device device plugin')
+    plugin.add_argument('--reference_device', '-ref_d', type=str,
+                        help='The second target device to infer the model and compare the metrics. '
+                             'CPU, GPU, HDDL or MYRIAD are acceptable.')
+    plugin.add_argument('--reference_config', '-ref_conf', type=str, action=ExistingFileAction,
+                        help='Path to config file for -ref_d or -reference_device device plugin')
+    plugin.add_argument('-l', type=str, action=ExistingFileAction,
+                        help='Required for MKLDNN (CPU)-targeted custom layers. Comma separated paths to a shared'
+                             ' libraries with the kernels implementation.')
+
+    modes = parser.add_argument_group('CCT mode arguments')
+    # TODO eps? nobody uses it
+    modes.add_argument('--dump', help='Enables blobs statistics dumping', action='store_true', default=False)
+    modes.add_argument('--load', type=str, action=ExistingFileAction, help='Path to a file to load blobs from')
+    model.add_argument('--num_of_iterations', '-ni', type=int, default=50,
+                       help='Number of iterations to collect all over the net performance')
+    parser.add_argument('-v', '--verbosity', action='store_true', default=False,
+                        help='Increase output verbosity')
+    return parser
+
+
+@error_handling('validating arguments passed to cross_check_tool.py')
+def validate_args(args):
+    # input check
+    if args.input is None:
+        log.info('No input was provided by --input/-i. Generate input from noise')
+    # model check
+    if args.model is None and args.reference_model is None:
+        raise Exception(
+            "Parameters --model/-m and --reference_model/-ref_m are empty. At least one of them is required")
+    elif args.model is None and args.reference_model:
+        args.model = args.reference_model
+    if args.model == args.reference_model:
+        args.reference_model = None
+    if args.model != args.reference_model and args.reference_model is not None and args.mapping is None and \
+            args.reference_mapping is None:
+        log.warning('Check over two different IRs was enabled. In case if layer names in this two IRs differ, '
+                    'please provide mapping files with --mapping/-map and --reference_mapping/-ref_map')
+    # device check
+    if args.device is None and args.reference_device is None:
+        raise Exception("Parameters -device/-d and -reference_device/-ref_d are not set. Can not proceed."
+                        "\nFor more details use -h option")
+    if args.reference_device is None and args.reference_model is None and not args.dump and args.load is None:
+        raise Exception("Please provide --reference_model/-ref_m to compare executions on different devices."
+                        "\nAnother option is to provide --dump key to dump all execution info on one device."
+                        "\nOr provide --load key to compare execution on device with dumped info"
+                        "\nFor more details use -h option")
+    if args.device is None:
+        args.device = args.reference_device
+        args.reference_device = None
+    # dump and load check
+    if args.dump and args.load is not None:
+        raise Exception("Cross Check Tool does not support both loading and dumping modes to be enabled. "
+                        "Choose one of them and proceed")
+    if args.model is not None and args.reference_model is not None and args.dump or \
+            args.device is not None and args.reference_device is not None and args.dump:
+        raise Exception("Cross Check Tool does support dumping mode to be enabled only for one model on one device"
+                        "\nFor more details use -h option")
+    if args.model is not None and args.reference_model is not None and args.load is not None or \
+            args.device is not None and args.reference_device is not None and args.load is not None:
+        raise Exception("Cross Check Tool does support loading mode to be enabled for one model on one device against a"
+                        " dumped file\nFor more details use -h option")
+    return args
+
+
+def find_out_cct_mode(args):
+    """
+    1 -- one IR mode
+    2 -- two IRs mode
+    3 -- dump mode
+    4 -- load mode
+    """
+    # dump mode
+    if args.dump and args.model is not None and args.device is not None and \
+            args.reference_model is None and args.reference_device is None:
+        return 3
+    # load mode
+    if args.load is not None and args.model is not None and args.device is not None and args.reference_device is None:
+        return 4
+    # two IR mode
+    if args.model is not None and args.reference_model is not None:
+        return 2
+    # one IR mode
+    if args.model is not None and args.reference_model is None:
+        return 1
+    raise Exception('Unknown Cross Check Tool CLI configuration.\nFor more details use -h option')
+
+
+def print_input_layers(inputs: list):
+    word = 'inputs' if len(inputs) > 1 else 'input'
+    log.info('{} {} detected: {}'.format(len(inputs), word, ', '.join(inputs)))
+
+
+def print_output_layers(outputs: list):
+    layers = 'layers' if len(outputs) > 1 else 'layer'
+    log.info('Statistics will be dumped for {} {}: {}'.format(len(outputs), layers, ', '.join(outputs)))
+
+
+###
+#   PLUGIN
+###
+
+
+@error_handling('parsing config file for plugin: \'{config_file}\'')
+def get_config_dictionary(config_file):
+    config = {'PERF_COUNT': 'YES'}
+    if not config_file:
+        return config
+    with open(config_file) as f:
+        config_line = f.readline()
+        key = config_line.split(config_file)[0]
+        value = config_line[len(key):].strip()
+        config[key] = value
+    return config
+
+
+###
+#   INPUTS
+###
+
+
+def read_multi_input_file(input_file: str, net_inputs: dict):
+    npz = np.load(input_file, allow_pickle=True)
+    files = npz.files
+    dump = {}
+    for net_input in net_inputs:
+        if net_input not in files:
+            raise Exception('Can not find input data for input {} in multi-input file {}.\n'
+                            'Input data was provided for layers: {}\n'
+                            'Network inputs: {}'.format(net_input, input_file, ', '.join(files),
+                                                        ', '.join(net_inputs.keys())))
+        if 'blob' in npz[net_input].item(0):
+            just_blob = npz[net_input].item(0)['blob']
+            network_shape = net_inputs[net_input].shape
+            log.info('Layer {} shape = {}, input blob from multi-input file shape = {}'
+                     ''.format(net_input, network_shape, just_blob.shape))
+            try:
+                reshaped_blob = np.reshape(just_blob, network_shape)
+            except:
+                raise Exception('Can not reshape input blob from multi-input file for layer {} to shape {}'
+                                ''.format(net_input, network_shape))
+            dump[net_input] = reshaped_blob
+        else:
+            raise Exception(
+                'Can not find \'blob\' parameter for input {} in input file {}'.format(net_input, input_file))
+    return dump
+
+
+@error_handling('reading --input/-i by OpenCV python module. OpenCV version: {}. '
+                'It may happen due to wrong input image format'.format(cv2.__version__))
+def read_image_file(input_file: str, net_inputs: dict):
+    inputs = dict()
+    if len(net_inputs) == 1:
+        image = cv2.imread(input_file)
+        if image is None:
+            raise Exception('Can not read input image ' + input_file)
+        only_layer_name = list(net_inputs.keys())[0]
+        shape = net_inputs[only_layer_name].shape
+        if len(shape) != 4:
+            raise Exception('Can not interpret input shape as image')
+        n, c, h, w = shape
+        image = cv2.resize(image, (w, h))
+        image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
+        image = image.reshape((n, c, h, w))
+        inputs[only_layer_name] = image
+    else:
+        raise Exception('Multi-input topology detected. Please provide multi-input file to --input key')
+    return inputs
+
+
+def input_processing(model_path: str, net_inputs: dict, input_file: str, layers_map: dict = None):
+    inputs = dict()
+    if input_file is None:
+        for net_input in net_inputs:
+            inputs[net_input] = np.clip(np.random.normal(0.5, 0.1, size=net_inputs[net_input].shape), 0, 1)
+        dump_output_file(model_path + '_random_input_dump.npz', {inp: {'blob': inputs[inp]} for inp in inputs})
+        return inputs
+    try:
+        inputs = read_multi_input_file(input_file=input_file, net_inputs=net_inputs)
+    except:
+        inputs = read_image_file(input_file=input_file, net_inputs=net_inputs)
+    return inputs
+
+
+def accuracy_metrics(out_blob, ref_out_blob):
+    if out_blob.size != ref_out_blob.size:
+        raise Exception('Different number of elements in blobs {} and {}. Can not compare'
+                        ''.format(out_blob.size, ref_out_blob.size))
+    abs_diff = np.absolute(out_blob - ref_out_blob)
+    rel_diff = np.divide(abs_diff, np.min(abs_diff) if np.min(abs_diff) != 0 else 1e-20)
+
+    metrics = [
+        ('Max absolute difference', np.max(abs_diff)),
+        ('Min absolute difference', np.min(abs_diff)),
+        ('Max relative difference', np.max(rel_diff)),
+        ('Min relative difference', np.min(rel_diff)),
+        ('Min reference value', np.min(ref_out_blob)),
+        ('Min absolute reference value', np.min(np.abs(ref_out_blob))),
+        ('Max reference value', np.max(ref_out_blob)),
+        ('Max absolute reference value', np.max(np.abs(ref_out_blob))),
+        ('Min actual value', np.min(out_blob)),
+        ('Min absolute actual value', np.min(np.abs(out_blob))),
+        ('Max actual value', np.max(out_blob)),
+        ('Max absolute actual value', np.max(np.abs(out_blob)))
+    ]
+
+    for key, value in metrics:
+        if len(str(value)) > 5:
+            log.info('{:>35} : {:.5E}'.format(key, value), extra={'no_lvl': True})
+        else:
+            log.info('{:>35} : {}'.format(key, value), extra={'no_lvl': True})
+    return {metric: value for metric, value in metrics}
+
+
+def performance_metrics(pc, ref_pc):
+    compare = [
+        ('Device', '-d ' + pc['device'], '-ref_d ' + ref_pc['device']),
+        ('Status', pc['status'], ref_pc['status']),
+        ('Layer type', pc['layer_type'], ref_pc['layer_type']),
+        ('Real time, microsec', pc['real_time'], ref_pc['real_time'])
+    ]
+
+    for metric, actual, reference in compare:
+        log.info('{:>35}: {:>16} {:>16}'.format(metric, actual, reference), extra={'no_lvl': True})
+
+
+def blob_counters(out_blob, ref_out_blob):
+    counters = [
+        ('Number of NAN', np.sum(np.isnan(out_blob)), np.sum(np.isnan(ref_out_blob))),
+        ('Number of INF', np.sum(np.isinf(out_blob)), np.sum(np.isinf(ref_out_blob))),
+        ('Number of ZERO', out_blob.size - np.count_nonzero(out_blob),
+         ref_out_blob.size - np.count_nonzero(ref_out_blob))
+    ]
+    for metric, actual, reference in counters:
+        log.info('{:>35}: {:>16} {:>16}'.format(metric, actual, reference), extra={'no_lvl': True})
+
+
+def update_global_accuracy_matrics(global_accuracy: list, current_accuracy: dict):
+    metrics = [
+        ('Max absolute difference', lambda x, y: max(x, y)),
+        ('Min absolute difference', lambda x, y: min(x, y)),
+        ('Max relative difference', lambda x, y: max(x, y)),
+        ('Min relative difference', lambda x, y: min(x, y))]
+    for metric, formula in metrics:
+        global_metric = [item for item in global_accuracy if item[0] == metric]
+        if len(global_metric) == 1:
+            g_metric, g_value = global_metric[0]
+            global_accuracy.remove(global_metric[0])
+            global_accuracy.append((metric, formula(g_value, current_accuracy[metric])))
+        else:
+            global_accuracy.append((metric, current_accuracy[metric]))
+    return global_accuracy
+
+
+def print_all_over_the_net_metrics(global_accuracy: (str, float), global_times: list = None,
+                                   ref_global_times: list = None):
+    if global_times is not None and ref_global_times is not None and len(global_times) and len(ref_global_times):
+        log.info('-' * 70, extra={'no_lvl': True})
+        log.info('{:>35}: {:>16,.5E} {:>16,.5E}'.format(
+            'Overall performance, microseconds', global_times[len(global_times) // 2].microseconds,
+            ref_global_times[len(ref_global_times) // 2].microseconds), extra={'no_lvl': True})
+        log.info('-' * 70, extra={'no_lvl': True})
+    for metric, value in global_accuracy:
+        log.info('{} {} = {}'.format('Overall', metric.lower(), value))
+
+
+###
+#   MAPPING
+###
+
+
+def read_mapping(file_name: str):
+    # TODO check twice
+    mapping_dict = {}
+    xml_tree = xml.etree.ElementTree.parse(file_name)
+    xml_root = xml_tree.getroot()
+    for child in xml_root:
+        fw_info = child.find('.//framework')
+        ir_info = child.find('.//IR')
+        if fw_info is None:
+            continue
+        if ir_info is None:
+            continue
+        framework_name = fw_info.attrib['name'] + ':' + fw_info.attrib['out_port_id']
+        ir_name = ir_info.attrib['name'] if ir_info is not None else None
+        ir_layer_id = int(ir_info.attrib['id']) if ir_info is not None else None
+        mapping_dict[framework_name] = (ir_name, ir_layer_id)
+    return mapping_dict
+
+
+def map_layers(mapping_file: str = None, ref_mapping_file: str = None):
+    if mapping_file is not None and ref_mapping_file is not None:
+        mapping = read_mapping(mapping_file)
+        ref_mapping = read_mapping(ref_mapping_file)
+        mapping = {layer: ref_layer for layer in mapping for ref_layer in ref_mapping if layer == ref_layer}
+        return mapping
+
+
+def manage_user_outputs_with_mapping(mapping, reference_mapping, user_layers):
+    if mapping is not None and reference_mapping is not None:
+        layers_map = map_layers(mapping, reference_mapping)
+    else:
+        layers_map = {layer: layer for layer in user_layers}
+    for layer in user_layers:
+        if layer not in layers_map:
+            if mapping is not None and reference_mapping is not None:
+                log.warning(
+                    'Can not map layer {} from --model/-m to any layer from --reference_model/-ref_m'.format(layer))
+            else:
+                log.warning('Can not find layer {} in --reference_model/-ref_m model'.format(layer))
+    for layer in layers_map:
+        if layer not in user_layers:
+            del layers_map[layer]
+    return layers_map
+
+
+def get_layers_list(all_layers: dict, inputs: dict, outputs: list, layers: str):
+    if layers is not None and layers != 'None':
+        if layers == 'all':
+            return {name: layer for name, layer in all_layers.items() if layer.type not in ['Const']}
+        else:
+            user_layers = [layer.strip() for layer in layers.split(',')]
+            layers_to_check = []
+            for user_layer in user_layers:
+                if user_layer not in all_layers:
+                    raise Exception("Layer {} doesn't exist in the model".format(user_layer))
+                if user_layer in inputs:
+                    raise Exception("Layer {} is input layer. Can not proceed".format(user_layer))
+                layers_to_check.append(user_layer)
+            return layers_to_check
+    else:
+        return outputs
+
+
+###
+#   FILES
+###
+
+def dump_output_file(output_file, dump_dict):
+    np.savez_compressed(output_file, **dump_dict)
+    log.info('Dump file path: {}'.format(output_file))
+
+
+def load_dump(file_to_load: str):
+    npz = np.load(file_to_load, allow_pickle=True)
+    dump = {file: npz[file].item(0) for file in npz}
+    return dump
diff --git a/inference-engine/tools/package_BOM.txt b/inference-engine/tools/package_BOM.txt

new file mode 100644 (file)

index 0000000..a16cdeb
--- /dev/null
+++ b/inference-engine/tools/package_BOM.txt
@@ -0,0 +1,8 @@
+benchmark_tool/benchmark_app.py
+benchmark_tool/requirements.txt
+benchmark_tool/README.md
+cross_check_tool/__init__.py
+cross_check_tool/utils.py
+cross_check_tool/requirements.txt
+cross_check_tool/README.md
+cross_check_tool/cross_check_tool.py
+\ No newline at end of file
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt

index d0797b3..9b00a3f 100644 (file)
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -13,6 +13,7 @@ extensions/back/AvgPool.py
  extensions/back/blob_normalizer.py
  extensions/back/compress_quantized_weights.py
  extensions/back/ConvolutionNormalizer.py
+extensions/back/CorrectName.py
  extensions/back/CropToStridedSlice.py
  extensions/back/CutMemory.py
  extensions/back/disable_unsupported_ND_operations.py
@@ -133,6 +134,7 @@ extensions/front/FillToBroadcast.py
  extensions/front/flatten_to_reshape.py
  extensions/front/freeze_placeholder_value.py
  extensions/front/GeLUMerger_Erf.py
+extensions/front/GeLUMerger_Tanh.py
  extensions/front/global_pooling_to_reduce.py
  extensions/front/image_scaler.py
  extensions/front/input_cut.py
@@ -299,6 +301,7 @@ extensions/front/onnx/scatter_ext.py
  extensions/front/onnx/shape_ext.py
  extensions/front/onnx/slice_ext.py
  extensions/front/onnx/softmax_ext.py
+extensions/front/onnx/softmaxONNX_to_softmax.py
  extensions/front/onnx/split_ext.py
  extensions/front/onnx/squeeze_ext.py
  extensions/front/onnx/top_k_ext.py
@@ -549,6 +552,7 @@ extensions/middle/RNNSequenceNormalizeToIE.py
  extensions/middle/ScaleInput.py
  extensions/middle/SharedWeightsDuplication.py
  extensions/middle/SliceConverter.py
+extensions/middle/SliceLikeToStridedSlice.py
  extensions/middle/space_to_depth.py
  extensions/middle/sparse_reshape.py
  extensions/middle/ssd_anchors_to_const.py
@@ -566,7 +570,6 @@ extensions/middle/UnsqueezeTileReshapeBlockToInterpolate.py
  extensions/middle/UpsampleToResample.py
  extensions/middle/UselessMerge.py
  extensions/middle/UselessSplitEraser.py
-extensions/middle/UselessStridedSlice.py
  extensions/middle/wights_permute_normalizer.py
  extensions/ops/__init__.py
  extensions/ops/accum.py
@@ -594,7 +597,6 @@ extensions/ops/elementwise.py
  extensions/ops/embedding_bag.py
  extensions/ops/Enter.py
  extensions/ops/Exit.py
-extensions/ops/exp.py
  extensions/ops/fakequantize.py
  extensions/ops/gather.py
  extensions/ops/GatherNd.py
@@ -608,7 +610,6 @@ extensions/ops/identity.py
  extensions/ops/instance_normalization.py
  extensions/ops/interp.py
  extensions/ops/interpolate.py
-extensions/ops/Log.py
  extensions/ops/LSTM.py
  extensions/ops/lstm_cell.py
  extensions/ops/lstm_sequence.py
@@ -656,6 +657,7 @@ extensions/ops/select.py
  extensions/ops/shufflechannel.py
  extensions/ops/simplernms.py
  extensions/ops/size.py
+extensions/ops/slice_like.py
  extensions/ops/space_to_depth.py
  extensions/ops/sparse_fill_empty_rows.py
  extensions/ops/sparse_reshape.py
@@ -695,6 +697,9 @@ install_prerequisites/install_prerequisites_onnx.bat
  install_prerequisites/install_prerequisites_onnx.sh
  install_prerequisites/install_prerequisites_tf.bat
  install_prerequisites/install_prerequisites_tf.sh
+install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg
  mo.py
  mo/__init__.py
  mo/back/__init__.py
@@ -898,12 +903,14 @@ mo/pipeline/__init__.py
  mo/pipeline/common.py
  mo/pipeline/unified.py
  mo/utils/__init__.py
+mo/utils/broadcasting.py
  mo/utils/class_registration.py
  mo/utils/cli_parser.py
  mo/utils/custom_replacement_config.py
  mo/utils/dsu.py
  mo/utils/error.py
  mo/utils/find_inputs.py
+mo/utils/get_ov_update_message.py
  mo/utils/graph.py
  mo/utils/guess_framework.py
  mo/utils/import_extensions.py
@@ -922,6 +929,7 @@ mo/utils/ir_reader/extenders/fakequantize_extender.py
  mo/utils/ir_reader/extenders/GRUCell_extender.py
  mo/utils/ir_reader/extenders/interpolate_extender.py
  mo/utils/ir_reader/extenders/LSTMCell_extender.py
+mo/utils/ir_reader/extenders/non_max_suppression_extender.py
  mo/utils/ir_reader/extenders/non_zero_extender.py
  mo/utils/ir_reader/extenders/pad_extender.py
  mo/utils/ir_reader/extenders/parameter_extender.py
@@ -930,6 +938,7 @@ mo/utils/ir_reader/extenders/priorbox_clustered_extender.py
  mo/utils/ir_reader/extenders/priorbox_extender.py
  mo/utils/ir_reader/extenders/reorg_yolo_extender.py
  mo/utils/ir_reader/extenders/RNNCell_extender.py
+mo/utils/ir_reader/extenders/shape_of_extender.py
  mo/utils/ir_reader/extenders/strided_slice_extender.py
  mo/utils/ir_reader/extenders/tensoriterator_extender.py
  mo/utils/ir_reader/extenders/topk_extender.py
diff --git a/model-optimizer/extensions/back/CorrectName.py b/model-optimizer/extensions/back/CorrectName.py

new file mode 100644 (file)

index 0000000..1d1e9c0
--- /dev/null
+++ b/model-optimizer/extensions/back/CorrectName.py
@@ -0,0 +1,47 @@
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.back.replacement import BackReplacementPattern
+
+
+class RestoreOriginalFrameworkName(BackReplacementPattern):
+    """
+    This transformation corrects names of layers to their framework names.
+    To perform this correction, framework layer name should be in the attribute 'framework_node_name'.
+    In some cases, renaming is necessary only if some condition is fulfilled. Such condition should be a some
+    function in the attribute 'rename_condition'.
+
+    For example, in the transformation SoftmaxONNXFrontReplacer such condition is
+        lambda n: len(n.graph.get_op_nodes(name=node_name)) == 0
+    """
+
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes():
+            if not node.has_valid('framework_node_name'):
+                continue
+
+            if node.has_valid('rename_condition'):
+                need_renaming = node['rename_condition'](node)
+                del node['rename_condition']
+                if need_renaming:
+                    node.name = node['framework_node_name']
+            else:
+                node.name = node['framework_node_name']
+
+            del node['framework_node_name']
diff --git a/model-optimizer/extensions/back/NormalizeToNormalizeL2.py b/model-optimizer/extensions/back/NormalizeToNormalizeL2.py

index 3c04307..97acff2 100644 (file)
--- a/model-optimizer/extensions/back/NormalizeToNormalizeL2.py
+++ b/model-optimizer/extensions/back/NormalizeToNormalizeL2.py
@@ -18,9 +18,11 @@ import numpy as np
  from extensions.back.ElementwiseOpsToEltwiseOps import SimpleEltwiseToEltwiseOp
  from extensions.back.insert_compatibility_l2normalization import CompatibilityL2NormalizationPattern
  from extensions.ops.elementwise import Mul
+from extensions.ops.normalize_l2 import NormalizeL2Op
  from mo.back.replacement import BackReplacementPattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_with_const_inputs
  from mo.graph.graph import Graph, rename_node
-from mo.ops.const import Const
  
  
  class NormalizeToNormalizeL2(BackReplacementPattern):
@@ -72,19 +74,14 @@ class NormalizeToNormalizeL2(BackReplacementPattern):
          mul = Mul(graph, {'name': output_name}).create_node()
          rename_node(mul, output_name)
  
-        node.out_port(0).get_connection().set_source(mul.out_port(0))
-        node.out_port(0).connect(mul.in_port(0))
-        node.in_port(1).get_connection().get_source().connect(mul.in_port(1))
-        node.in_port(1).disconnect()
-
-        node['type'] = 'NormalizeL2'
-        node['eps_mode'] = 'add'
-        node['force_precision_in_ports'] = {1: 'int64'}
+        if not node.across_spatial:
+            axes = int64_array([1])
+        else:
+            axes = int64_array(np.arange(start=1, stop=node.in_port(0).data.get_shape().size))
  
-        axes_val = np.array([1]) if not node.across_spatial else \
-            np.arange(start=1, stop=node.in_port(0).data.get_shape().size)
-        axes = Const(graph, {'value': axes_val}).create_node()
-        node.in_port(1).connect(axes.out_port(0))
+        normalizel2 = create_op_with_const_inputs(graph, NormalizeL2Op, {1: axes}, {'eps_mode': 'add', 'eps': node.eps})
  
-        del node['across_spatial']
-        del node['channel_shared']
+        node.out_port(0).get_connection().set_source(mul.out_port(0))
+        node.in_port(1).get_connection().get_source().connect(mul.in_port(1))
+        normalizel2.out_port(0).connect(mul.in_port(0))
+        node.in_port(0).get_connection().set_destination(normalizel2.in_port(0))
diff --git a/model-optimizer/extensions/front/ChangeCastOutputType.py b/model-optimizer/extensions/front/ChangeCastOutputType.py

index d633a59..7dcbeaa 100644 (file)
--- a/model-optimizer/extensions/front/ChangeCastOutputType.py
+++ b/model-optimizer/extensions/front/ChangeCastOutputType.py
@@ -26,7 +26,6 @@ from mo.middle.passes.convert_data_type import data_type_str_to_np
  
  class ChangeCastOutputType(FrontReplacementSubgraph):
      """
-    Change the Cast to int64 to int32 since not all plugins support int64 data type.
      Change the Cast to fp64 to fp32 since not all plugins support fp64 data type.
      Change the Cast to fp32 to fp16 when generating IR for fp16.
      """
@@ -42,10 +41,6 @@ class ChangeCastOutputType(FrontReplacementSubgraph):
  
      def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
          node = match['cast']
-        if node.dst_type == np.int64:
-            log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, np.int32, node.name))
-            node.dst_type = np.int32
-
          if node.dst_type == np.float64:
              log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, np.float32, node.name))
              node.dst_type = np.float32
diff --git a/model-optimizer/extensions/front/GeLUMerger_Erf.py b/model-optimizer/extensions/front/GeLUMerger_Erf.py

index 765c431..a212402 100644 (file)
--- a/model-optimizer/extensions/front/GeLUMerger_Erf.py
+++ b/model-optimizer/extensions/front/GeLUMerger_Erf.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  from math import sqrt, fabs
  
  from extensions.ops.gelu import GeLUOP
@@ -35,50 +36,43 @@ class GeLUMergerErf(FrontReplacementSubgraph):
                  ('div',  dict(op='Div')),
                  ('erf',  dict(op='Erf')),
                  ('add',  dict(op='Add')),
+                ('mul_param', dict(op='Const')),
+                ('div_param', dict(op='Const')),
+                ('add_param', dict(op='Const')),
+
              ],
              edges=[
                  ('mul', 'mul0'),
                  ('div', 'erf'),
                  ('erf', 'add'),
-                ('add', 'mul0')
+                ('add', 'mul0'),
+                ('mul_param', 'mul'),
+                ('div_param', 'div'),
+                ('add_param', 'add'),
              ])
  
      def replace_sub_graph(self, graph: Graph, match: dict):
          # Gaussian Error Linear Unit
          # f(x) = 0.5 * x * (1 + erf(x / sqrt(2))
-        add = match['add']
-        mul = match['mul']
          div = match['div']
          inp_port = div.in_port(0).get_source()
          inp = inp_port.node
          log.debug('Found potential Erf-based GeLU pattern after {} with name {}'.format(inp.op, inp.name))
  
          # take the values of the mul, add and div
-        div_param = None
-        mul_param = None
-        add_param = None
-        if div.in_port(0).get_source().node.soft_get('type') == 'Const':
-            div_param = div.in_port(0).get_source().node.value
-        elif div.in_port(1).get_source().node.soft_get('type') == 'Const':
-            div_param = div.in_port(1).get_source().node.value
-
-        if mul.in_port(0).get_source().node.soft_get('type') == 'Const':
-            mul_param = mul.in_port(0).get_source().node.value
-        elif mul.in_port(1).get_source().node.soft_get('type') == 'Const':
-            mul_param = mul.in_port(1).get_source().node.value
-
-        if add.in_port(0).get_source().node.soft_get('type') == 'Const':
-            add_param = add.in_port(0).get_source().node.value
-        elif add.in_port(1).get_source().node.soft_get('type') == 'Const':
-            add_param = add.in_port(1).get_source().node.value
+        div_param = match['div_param']
+        add_param = match['add_param']
+        mul_param = match['mul_param']
  
-        if mul_param is None or div_param is None or add_param is None:
-            return
+        if add_param.value.size == 1 and mul_param.value.size == 1 and div_param.value.size == 1:
+            mul_param = match['mul_param'].value.item()
+            add_param = match['add_param'].value.item()
+            div_param = match['div_param'].value.item()
  
-        sqrt2 = sqrt(2.0)
-        # check that the values match the approximation
-        if fabs(div_param - sqrt2) < 1e-06 and mul_param == 0.5 and add_param == 1.0:
-            log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
-            gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
-            inp_port.connect(gelu.in_port(0))
-            match['mul0'].out_port(0).get_connection().set_source(gelu.out_port(0))
+            sqrt2 = sqrt(2.0)
+            # check that the values match the approximation
+            if fabs(div_param - sqrt2) < 1e-06 and mul_param == 0.5 and add_param == 1.0:
+                log.debug('Confirmed Erf-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
+                gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
+                inp_port.connect(gelu.in_port(0))
+                match['mul0'].out_port(0).get_connection().set_source(gelu.out_port(0))
diff --git a/model-optimizer/extensions/front/GeLUMerger_Tanh.py b/model-optimizer/extensions/front/GeLUMerger_Tanh.py

new file mode 100644 (file)

index 0000000..22ced0a
--- /dev/null
+++ b/model-optimizer/extensions/front/GeLUMerger_Tanh.py
@@ -0,0 +1,80 @@
+"""
+ Copyright (C) 2017-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from math import sqrt, fabs, pi
+
+from extensions.ops.gelu import GeLUOP
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
+
+
+class GeLUMergerTanh(FrontReplacementSubgraph):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].generate_experimental_IR_V10]
+
+    def pattern(self):
+        log.info('Enabled GeLU Merger for approximation with TanH')
+        return dict(
+            nodes=[
+                ('pow',  dict(op='Pow')),
+                ('mul',  dict(op='Mul')),
+                ('mul0', dict(op='Mul')),
+                ('mul1', dict(op='Mul')),
+                ('mul2', dict(op='Mul')),
+                ('tanh', dict(op='Tanh')),
+                ('add',  dict(op='Add')),
+                ('add0', dict(op='Add')),
+                ('mul_param', dict(op='Const')),
+                ('mul0_param', dict(op='Const')),
+                ('mul1_param', dict(op='Const')),
+            ],
+            edges=[
+                ('pow',  'mul'),
+                ('mul',  'add'),
+                ('add',  'mul0'),
+                ('mul0', 'tanh'),
+                ('tanh', 'add0'),
+                ('add0', 'mul1'),
+                ('mul1', 'mul2'),
+                ('mul_param',  'mul'),
+                ('mul0_param', 'mul0'),
+                ('mul1_param', 'mul1'),
+            ])
+
+    def replace_sub_graph(self, graph: Graph, match: dict):
+        # Gaussian Error Linear Unit, TanH based approximation:
+        # 0.5*x*(1 + tanh([sqrt(2/pi)]*[x + 0.044715x3])
+        inp_port = match['pow'].in_port(0).get_source()
+        inp = inp_port.node
+        log.debug('Found potential TanH-based GeLU pattern after {} with name {}'.format(inp.op, inp.name))
+
+        # take the values of the mul ops
+        mul_param  = match['mul_param']
+        mul0_param = match['mul0_param']
+        mul1_param = match['mul1_param']
+        if mul0_param.value.size == 1 and mul_param.value.size == 1 and mul1_param.value.size == 1:
+            mul_param = match['mul_param'].value.item()
+            mul0_param = match['mul0_param'].value.item()
+            mul1_param = match['mul1_param'].value.item()
+            sqrt2pi = sqrt(2.0/pi)
+            # check that the values match the approximation
+            if fabs(mul0_param - sqrt2pi) < 1e-06 and fabs(mul_param - 0.044715) < 1e-06 and mul1_param == 0.5:
+                log.debug('Confirmed TanH-based GELU pattern after {} with name {}'.format(inp.op, inp.name))
+                gelu = GeLUOP(graph, dict(name=inp.name + '/GELU_')).create_node()
+                inp_port.connect(gelu.in_port(0))
+                match['mul2'].out_port(0).get_connection().set_source(gelu.out_port(0))
diff --git a/model-optimizer/extensions/front/GeLUMerger_test.py b/model-optimizer/extensions/front/GeLUMerger_test.py

new file mode 100644 (file)

index 0000000..6c6b698
--- /dev/null
+++ b/model-optimizer/extensions/front/GeLUMerger_test.py
@@ -0,0 +1,125 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from math import sqrt
+
+from extensions.front.GeLUMerger_Erf import GeLUMergerErf
+from extensions.front.GeLUMerger_Tanh import GeLUMergerTanh
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes_erf = {
+    'inp': {'kind': 'op', 'op': 'AnyOp'},
+    'mul': {'kind': 'op', 'op': 'Mul'},
+    'mul0': {'kind': 'op', 'op': 'Mul'},
+    'div': {'kind': 'op', 'op': 'Div'},
+    'erf': {'kind': 'op', 'op': 'Erf'},
+    'add': {'kind': 'op', 'op': 'Add'},
+    'mul_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+    'div_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+    'add_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+    'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+nodes_attributes_tanh = {
+    'inp': {'kind': 'op', 'op': 'AnyOp'},
+    'pow': {'kind': 'op', 'op': 'Pow'},
+    'mul': {'kind': 'op', 'op': 'Mul'},
+    'mul0': {'kind': 'op', 'op': 'Mul'},
+    'mul1': {'kind': 'op', 'op': 'Mul'},
+    'mul2': {'kind': 'op', 'op': 'Mul'},
+    'tanh': {'kind': 'op', 'op': 'Tanh'},
+    'add': {'kind': 'op', 'op': 'Add'},
+    'add0': {'kind': 'op', 'op': 'Add'},
+    'mul_param': {'kind': 'op',  'type': 'Const', 'op': 'Const'},
+    'mul0_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+    'mul1_param': {'kind': 'op', 'type': 'Const', 'op': 'Const'},
+    'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+nodes_attributes_ref = {
+    'inp': {'kind': 'op', 'op': 'AnyOp'},
+    'gelu': {'kind': 'op', 'op': 'Gelu'},
+    'out': {'kind': 'op', 'op': 'AnyOp'},
+}
+
+class TestGeLUMergerReplacement(unittest.TestCase):
+    def test_GeLUMergerErf_test_1(self):
+        graph = build_graph(nodes_attributes_erf,
+                            [('inp', 'mul0', {'out': 0}),
+                             ('inp', 'div',  {'out': 0}),
+                             ('mul', 'mul0'),
+                             ('div', 'erf'),
+                             ('erf', 'add'),
+                             ('add', 'mul0'),
+                             ('mul_param', 'mul'),
+                             ('div_param', 'div'),
+                             ('add_param', 'add'),
+                             ('mul0', 'out'),
+                             ],
+                            {'mul_param': {'shape': np.array([1]), 'value': np.array(0.5)},
+                             'add_param': {'shape': np.array([1]), 'value': np.array(1.0)},
+                             'div_param': {'shape': np.array([1]), 'value': np.array(sqrt(2.0))}
+                             },
+                            nodes_with_edges_only=True)
+        graph_ref = build_graph(nodes_attributes_ref,
+                            [('inp', 'gelu'),
+                             ('gelu', 'out')],
+                            {}, nodes_with_edges_only=True)
+        graph.stage = 'front'
+
+        replacer = GeLUMergerErf()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_GeLUMergerTanh_test_2(self):
+        graph = build_graph(nodes_attributes_tanh,
+                            [('inp', 'mul2', {'out': 0}),
+                             ('inp', 'add',  {'out': 0}),
+                             ('inp', 'pow',  {'out': 0}),
+                             ('pow', 'mul'),
+                             ('mul', 'add'),
+                             ('add', 'mul0'),
+                             ('mul0', 'tanh'),
+                             ('tanh', 'add0'),
+                             ('add0', 'mul1'),
+                             ('mul1', 'mul2'),
+                             ('mul_param', 'mul'),
+                             ('mul0_param', 'mul0'),
+                             ('mul1_param', 'mul1'),
+                             ('mul2', 'out'),
+                             ],
+                            {'mul0_param': {'shape': np.array([1]), 'value': np.array(sqrt(2.0/3.1415926))},
+                             'mul1_param': {'shape': np.array([1]), 'value': np.array(0.5)},
+                             'mul_param':  {'shape': np.array([1]), 'value': np.array(0.044715)}
+                             },
+                            nodes_with_edges_only=True)
+        graph_ref = build_graph(nodes_attributes_ref,
+                            [('inp', 'gelu'),
+                             ('gelu', 'out')],
+                            {}, nodes_with_edges_only=True)
+        graph.stage = 'front'
+
+        replacer = GeLUMergerTanh()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'out', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+\ No newline at end of file
diff --git a/model-optimizer/extensions/front/Log1p.py b/model-optimizer/extensions/front/Log1p.py

index 5bfce9d..c37b7f3 100644 (file)
--- a/model-optimizer/extensions/front/Log1p.py
+++ b/model-optimizer/extensions/front/Log1p.py
@@ -15,10 +15,10 @@
  """
  import numpy as np
  
-from extensions.ops.Log import LogOp
+from extensions.ops.activation_ops import Log
  from extensions.ops.elementwise import Add
  from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph, Node, rename_nodes
  from mo.ops.const import Const
  
  
@@ -31,18 +31,19 @@ class Log1p(FrontReplacementOp):
      enabled = True
  
      def replace_op(self, graph: Graph, node: Node):
+        node_name = node.soft_get('name', node.id)
          const_dtype = np.float32
          if node.has_valid('data_type'):
              const_dtype = node.data_type
          const = Const(graph, {'value': np.array([1], dtype=const_dtype)}).create_node()
          add = Add(graph, {'name': node.name + '/Add_'}).create_node()
-        log = LogOp(graph, {'name': node.name + '/Log_'}).create_node()
+        log = Log(graph, {'name': node.name + '/Log_'}).create_node()
  
          # Connect nodes: input -> Add -> Log
          const.out_port(0).connect(add.in_port(0))
          node.in_port(0).get_connection().set_destination(add.in_port(1))
          add.out_port(0).connect(log.in_port(0))
+        rename_nodes([(node, node_name + '/delete'), (log, node_name)])
  
          # The "explicit" version of the return value is: [(out_node.id, 0)])
          return [log.id]
-
diff --git a/model-optimizer/extensions/front/LogSoftmax.py b/model-optimizer/extensions/front/LogSoftmax.py

index e199ef4..4a7e2de 100644 (file)
--- a/model-optimizer/extensions/front/LogSoftmax.py
+++ b/model-optimizer/extensions/front/LogSoftmax.py
@@ -13,15 +13,13 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-from extensions.ops.Log import LogOp
  from extensions.ops.ReduceOps import ReduceMax, ReduceSum
+from extensions.ops.activation_ops import Exp, Log
  from extensions.ops.elementwise import Sub
-from extensions.ops.exp import ExpOp
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.tf.graph_utils import create_op_with_const_inputs
  from mo.graph.graph import Graph, Node, rename_nodes
-from mo.ops.const import Const
  
  
  class LogSoftmaxFrontReplacer(FrontReplacementOp):
@@ -69,12 +67,12 @@ class LogSoftmaxFrontReplacer(FrontReplacementOp):
          reduce_max_node.out_port(0).connect(first_sub_node.in_port(1))
  
          # Creating of Exp -> ReduceSum -> Log block
-        exp_node = ExpOp(graph,  {'name': node_name + '/Exp_'}).create_node()
+        exp_node = Exp(graph,  {'name': node_name + '/Exp_'}).create_node()
          reduce_sum_node = create_op_with_const_inputs(graph,
                                                        ReduceSum,
                                                        {1: int64_array([node.axis])},
                                                        op_attrs={'name': node_name + '/ReduceSum_', 'keep_dims': True})
-        log_node = LogOp(graph, {'name': node_name + '/Log_'}).create_node()
+        log_node = Log(graph, {'name': node_name + '/Log_'}).create_node()
  
          first_sub_node.out_port(0).connect(exp_node.in_port(0))
          exp_node.out_port(0).connect(reduce_sum_node.in_port(0))
diff --git a/model-optimizer/extensions/front/mxnet/slice_like_ext.py b/model-optimizer/extensions/front/mxnet/slice_like_ext.py

index 5d803fe..6074471 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/slice_like_ext.py
+++ b/model-optimizer/extensions/front/mxnet/slice_like_ext.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.slice_like import SliceLike
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
-from mo.ops.crop import Crop
  
  
  class SliceLikeFrontExtractor(FrontExtractorOp):
@@ -26,15 +26,11 @@ class SliceLikeFrontExtractor(FrontExtractorOp):
      @classmethod
      def extract(cls, node):
          attrs = get_mxnet_layer_attrs(node.symbol_dict)
-        axes = attrs.tuple("axes", int, [])
-        offset = [0 for i in range(0, axes[-1])]
+        axes = list(attrs.tuple("axes", int, []))
          node_attrs = {
-            'axis': 1,
-            'offset': offset,
-            'dim': offset,
              'axes': axes
          }
  
          # update the attributes of the node
-        Crop.update_node_stat(node, node_attrs)
+        SliceLike.update_node_stat(node, node_attrs)
          return cls.enabled
diff --git a/model-optimizer/extensions/front/mxnet/ssd_anchor_reshape.py b/model-optimizer/extensions/front/mxnet/ssd_anchor_reshape.py

index bd986fc..a1f7350 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_anchor_reshape.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_anchor_reshape.py
@@ -19,6 +19,7 @@ import numpy as np
  from extensions.front.mxnet.eltwise_scalar_replacers import MulScalarFrontReplacer
  from extensions.front.mxnet.ssd_detection_output_replacer import SsdPatternDetectionOutputReplacer
  from extensions.front.split_normalizer import AttributedSplitToSplit
+from extensions.ops.slice_like import SliceLike
  from mo.front.common.replacement import FrontReplacementSubgraph
  from mo.graph.graph import Graph, Node
  from mo.middle.pattern_match import find_pattern_matches
@@ -64,7 +65,7 @@ class SsdPatternAnchorReshape(FrontReplacementSubgraph):
              nodes=[
                  ('power', dict(op='Mul')),
                  ('anchor', dict(op='Const')),
-                ('slice_like', dict(op='Crop')),
+                ('slice_like', dict(op='slice_like')),
                  ('reshape1', dict(op='Reshape')),
                  ('reshape2', dict(op='Reshape')),
                  ('reshape3', dict(op='Reshape'))
@@ -91,10 +92,9 @@ class SsdPatternAnchorReshape(FrontReplacementSubgraph):
  
          variants = np.array([variants_dict['mul_scalar1x'], variants_dict['mul_scalar1y'],
                               variants_dict['mul_scalar2x'], variants_dict['mul_scalar2y']] * int(const.value.size / 4)).reshape(const.value.shape)
-        priorbox_variants = Const(graph, dict(value=variants, symbol_dict={'name': const.id + '/priorbox_variants'})).create_node()
-        variants_slice_like = Crop(graph, dict(axis=slice_like.axis, offset=slice_like.offset, dim=slice_like.dim, axes=slice_like.axes,
-                                               symbol_dict={'name': slice_like.id + '/variants_slice_like'})) \
-            .create_node()
+        priorbox_variants = Const(graph, dict(value=variants, name=const.id + '/priorbox_variants')).create_node()
+        variants_slice_like = SliceLike(graph, dict(axes=slice_like.axes,
+                                                    name=slice_like.id + '/variants_slice_like')).create_node()
          variants_slice_like.in_port(0).connect(priorbox_variants.out_port(0))
          variants_slice_like.in_port(1).connect(crop_shape.out_port(0))
  
diff --git a/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py b/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py

index 8d0f061..0033a49 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py
@@ -68,7 +68,7 @@ class SsdPatternDetectionOutputReplacer(FrontReplacementSubgraph):
              nodes=[
                  ('power', dict(op='Mul')),
                  ('anchor', dict(op='Const')),
-                ('slice_like', dict(op='Crop')),
+                ('slice_like', dict(op='slice_like')),
                  ('reshape1', dict(op='Reshape')),
                  ('reshape2', dict(op='Reshape')),
                  ('reshape3', dict(op='Reshape')),
diff --git a/model-optimizer/extensions/front/onnx/expand_ext.py b/model-optimizer/extensions/front/onnx/expand_ext.py

index 8e14af2..2e33741 100644 (file)
--- a/model-optimizer/extensions/front/onnx/expand_ext.py
+++ b/model-optimizer/extensions/front/onnx/expand_ext.py
@@ -24,5 +24,5 @@ class ExpandExtractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node):
-        Broadcast.update_node_stat(node)
+        Broadcast.update_node_stat(node, {'mode': 'bidirectional'})
          return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/non_max_suppression_ext.py b/model-optimizer/extensions/front/onnx/non_max_suppression_ext.py

index 84e2564..3471da2 100644 (file)
--- a/model-optimizer/extensions/front/onnx/non_max_suppression_ext.py
+++ b/model-optimizer/extensions/front/onnx/non_max_suppression_ext.py
@@ -13,6 +13,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
  
  from extensions.ops.non_max_suppression import NonMaxSuppression
  from mo.front.extractor import FrontExtractorOp
@@ -28,5 +29,6 @@ class NonMaxSuppressionExtractor(FrontExtractorOp):
          encoding_map = {0: 'corner', 1: 'center'}
          center_point_box = onnx_attr(node, 'center_point_box', 'i', default=0)
          NonMaxSuppression.update_node_stat(node, {'sort_result_descending': 0,
+                                                  'output_type': np.int64,
                                                    'box_encoding': encoding_map[center_point_box]})
          return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/pooling_ext.py b/model-optimizer/extensions/front/onnx/pooling_ext.py

index b86fef2..3e41e66 100644 (file)
--- a/model-optimizer/extensions/front/onnx/pooling_ext.py
+++ b/model-optimizer/extensions/front/onnx/pooling_ext.py
@@ -89,7 +89,7 @@ def common_onnx_pool_extractor(node):
              node.soft_get('name', node.id), pads))
  
          # Try to convert slightly incorrect models with insufficient pad parameters
-        assert pads.size * 2 == kernel_shape.size
+        assert pads.size == kernel_shape.size
          pads = np.concatenate([pads, pads])
          log.warning('Extended pads to {}'.format(pads))
  
diff --git a/model-optimizer/extensions/front/onnx/shape_ext.py b/model-optimizer/extensions/front/onnx/shape_ext.py

index 1f18cbb..e32a09a 100644 (file)
--- a/model-optimizer/extensions/front/onnx/shape_ext.py
+++ b/model-optimizer/extensions/front/onnx/shape_ext.py
@@ -13,6 +13,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
  
  from mo.front.extractor import FrontExtractorOp
  from mo.ops.shape import Shape
@@ -24,5 +25,5 @@ class ShapeFrontExtractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node):
-        Shape.update_node_stat(node)
+        Shape.update_node_stat(node, {'output_type': np.int64})
          return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/softmaxONNX_to_softmax.py b/model-optimizer/extensions/front/onnx/softmaxONNX_to_softmax.py

new file mode 100644 (file)

index 0000000..45d33bd
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/softmaxONNX_to_softmax.py
@@ -0,0 +1,60 @@
+"""
+ Copyright (C) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph, Node, rename_nodes
+from mo.ops.flatten import FlattenONNX
+from mo.ops.reshape import Reshape
+from mo.ops.shape import Shape
+from mo.ops.softmax import Softmax
+
+
+class SoftmaxONNXFrontReplacer(FrontReplacementOp):
+    """
+    Replace SoftmaxONNX operation with FlattenONNX -> Softmax -> Reshape subgraph
+    """
+    op = "SoftMaxONNX"
+    enabled = True
+
+    def run_before(self):
+        from extensions.front.onnx.flattenONNX_to_reshape import FlattenONNXToReshape
+        return [FlattenONNXToReshape]
+
+    def replace_op(self, graph: Graph, node: Node):
+        node_name = node.soft_get('name', node.id)
+        assert node.has_valid('axis'), 'The node "{}" does not have mandatory attribute "axis"'.format(node_name)
+
+        flatten_node = FlattenONNX(graph, {'name': node_name + '/FlattenONNX_', 'axis': node.axis}).create_node()
+        shape_node = Shape(graph, {'name': node_name + '/ShapeOf_'}).create_node()
+        softmax_node = Softmax(graph, {'name': node_name + '/Softmax_',
+                                       'axis': 1,
+                                       'framework_node_name': node_name,
+                                       'rename_condition': lambda n: len(n.graph.get_op_nodes(name=node_name)) == 0
+                                       }).create_node()
+        reshape_node = Reshape(graph, {}).create_node()
+
+        rename_nodes([(node, node_name + '/delete'), (reshape_node, node_name)])
+
+        flatten_node.out_port(0).connect(softmax_node.in_port(0))
+        softmax_node.out_port(0).connect(reshape_node.in_port(0))
+        shape_node.out_port(0).connect(reshape_node.in_port(1))
+
+        source = node.in_port(0).get_source()
+
+        flatten_node.in_port(0).connect(source)
+        shape_node.in_port(0).connect(source)
+
+        return [reshape_node.id]
diff --git a/model-optimizer/extensions/front/onnx/softmax_ext.py b/model-optimizer/extensions/front/onnx/softmax_ext.py

index beb27c4..0a3c524 100644 (file)
--- a/model-optimizer/extensions/front/onnx/softmax_ext.py
+++ b/model-optimizer/extensions/front/onnx/softmax_ext.py
@@ -16,7 +16,7 @@
  
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
-from mo.ops.softmax import LogSoftmaxONNX, Softmax
+from mo.ops.softmax import LogSoftmaxONNX, SoftmaxONNX
  
  
  class SoftmaxExtractor(FrontExtractorOp):
@@ -26,7 +26,7 @@ class SoftmaxExtractor(FrontExtractorOp):
      @classmethod
      def extract(cls, node):
          axis = onnx_attr(node, 'axis', 'i', default=1)
-        Softmax.update_node_stat(node, {'axis': axis})
+        SoftmaxONNX.update_node_stat(node, {'axis': axis})
          return cls.enabled
  
  
diff --git a/model-optimizer/extensions/front/rank_decomposer.py b/model-optimizer/extensions/front/rank_decomposer.py

index 0f58202..b7dcb4e 100644 (file)
--- a/model-optimizer/extensions/front/rank_decomposer.py
+++ b/model-optimizer/extensions/front/rank_decomposer.py
@@ -17,7 +17,7 @@
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.tf.graph_utils import create_op_node_with_second_input
-from mo.graph.graph import Graph
+from mo.graph.graph import Graph, rename_nodes
  from mo.ops.shape import Shape
  from mo.ops.squeeze import Squeeze
  
@@ -30,11 +30,16 @@ class RankDecomposer(FrontReplacementOp):
          node = match['op']
          name = node.soft_get('name', node.id)
  
-        shape_of = Shape(graph, {'name': name + '/shape_of'}).create_node()
-        rank_1d = Shape(graph, {'name': name + '/rank_of'}).create_node()
-        rank_0d = create_op_node_with_second_input(graph, Squeeze, int64_array([0]), {'name': name + '/0d_rank_of'},
-                                                   rank_1d)
+        assert node.has_valid('output_type'), \
+            'Rank node should have `output_type` attribute, but it`s not for node {}'.format(name)
+
+        shape_of = Shape(graph, {'name': name + '/shape_of', 'output_type': node.output_type}).create_node()
+        rank_1d = Shape(graph, {'name': name + '/rank_of', 'output_type': node.output_type}).create_node()
+        rank_0d = create_op_node_with_second_input(
+            graph, Squeeze, int64_array(0), {'name': name + '/0d_rank_of'}, rank_1d)
  
          shape_of.out_port(0).connect(rank_1d.in_port(0))
          node.out_port(0).get_connection().set_source(rank_0d.out_port(0))
          node.in_port(0).get_connection().set_destination(shape_of.in_port(0))
+
+        rename_nodes([(node, name + '/ToBeDeleted'), (rank_0d, name)])
diff --git a/model-optimizer/extensions/front/rank_decomposer_test.py b/model-optimizer/extensions/front/rank_decomposer_test.py

new file mode 100644 (file)

index 0000000..f3f5047
--- /dev/null
+++ b/model-optimizer/extensions/front/rank_decomposer_test.py
@@ -0,0 +1,70 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from generator import generator, generate
+
+from extensions.front.rank_decomposer import RankDecomposer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, regular_op_with_empty_data, result, connect, \
+    valued_const_with_data
+
+nodes = lambda output_type: {
+    **regular_op_with_empty_data('input', {'type': 'Parameter'}),
+    **regular_op_with_empty_data('rank', {'op': 'Rank', 'type': None, 'output_type': output_type, 'name': 'my_rank'}),
+    **result(),
+
+    **regular_op_with_empty_data('shape', {'type': 'ShapeOf', 'output_type': output_type}),
+    **regular_op_with_empty_data('rank_1D', {'type': 'ShapeOf', 'output_type': output_type}),
+    **valued_const_with_data('zero', int64_array(0)),
+    **regular_op_with_empty_data('rank_0D', {'type': 'Squeeze'}),
+}
+
+
+@generator
+class RankDecomposerTest(unittest.TestCase):
+
+    @generate(np.int32, np.int64)
+    def test_rank_decomposer(self, output_type):
+        graph = build_graph(nodes_attrs=nodes(output_type), edges=[
+            *connect('input', 'rank'),
+            *connect('rank', 'output'),
+        ], nodes_with_edges_only=True)
+        RankDecomposer().find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(nodes_attrs=nodes(output_type), edges=[
+            *connect('input', 'shape'),
+            *connect('shape', 'rank_1D'),
+            *connect('rank_1D', '0:rank_0D'),
+            *connect('zero', '1:rank_0D'),
+            *connect('rank_0D', 'output'),
+        ], nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+        self.assertEqual(graph.get_op_nodes(type='Squeeze')[0]['name'], 'my_rank',
+                         'Name is not inherited from original node for RankDecomposer')
+        print(output_type)
+
+    def test_rank_decomposer_assertion(self):
+        graph = build_graph(nodes_attrs=nodes(None), edges=[
+            *connect('input', 'rank'),
+            *connect('rank', 'output'),
+        ], nodes_with_edges_only=True)
+        self.assertRaises(AssertionError, RankDecomposer().find_and_replace_pattern, graph)
diff --git a/model-optimizer/extensions/front/tf/SizeReplacer.py b/model-optimizer/extensions/front/tf/SizeReplacer.py

index 07be412..42fefdd 100644 (file)
--- a/model-optimizer/extensions/front/tf/SizeReplacer.py
+++ b/model-optimizer/extensions/front/tf/SizeReplacer.py
@@ -16,8 +16,8 @@
  from extensions.ops.ReduceOps import ReduceProd
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
-from mo.ops.const import Const
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Graph, rename_nodes
  from mo.ops.shape import Shape
  
  
@@ -28,15 +28,16 @@ class SizeFrontReplacer(FrontReplacementOp):
      op = "Size"
      enabled = True
  
-    def replace_op(self, graph: Graph, node: Node):
-        shape = Shape(graph, {'name': node.name + '/Shape/'}).create_node()
-        reduce_prod = ReduceProd(graph, {'name': shape.name + 'ReduceProd/', 'keep_dims': False}).create_node()
-        reduce_axis = Const(graph, {'value': int64_array([0])}).create_node()
+    def replace_sub_graph(self, graph: Graph, match: dict):
+        node = match['op']
+        name = node.soft_get('name', node.id)
+        assert node.has_valid('output_type'), \
+            'Size node should have `output_type` attribute, but it`s not for node {}'.format(name)
  
-        # Connect nodes
+        shape = Shape(graph, {'name': name + '/Shape/', 'output_type': node.output_type}).create_node()
          node.in_port(0).get_connection().set_destination(shape.in_port(0))
-        reduce_prod.in_port(0).get_connection().set_source(shape.out_port(0))
-        reduce_prod.in_port(1).get_connection().set_source(reduce_axis.out_port(0))
+        reduce_prod = create_op_node_with_second_input(
+            graph, ReduceProd, int64_array([0]), {'name': shape.name + 'ReduceProd/', 'keep_dims': False}, shape)
+        node.out_port(0).get_connection().set_source(reduce_prod.out_port(0))
  
-        # The "explicit" version of the return value is: [(out_node.id, 0)])
-        return [reduce_prod.id]
+        rename_nodes([(node, name + '/ToBeDeleted'), (reduce_prod, name)])
diff --git a/model-optimizer/extensions/front/tf/argmax_ext.py b/model-optimizer/extensions/front/tf/argmax_ext.py

index dbe36ab..47f531b 100644 (file)
--- a/model-optimizer/extensions/front/tf/argmax_ext.py
+++ b/model-optimizer/extensions/front/tf/argmax_ext.py
@@ -13,8 +13,11 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
+
  from extensions.ops.argmax import ArgMaxOp
  from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
  
  
  class ArgMaxFrontExtractor(FrontExtractorOp):
@@ -24,5 +27,7 @@ class ArgMaxFrontExtractor(FrontExtractorOp):
      @classmethod
      def extract(cls, node):
          ArgMaxOp.update_node_stat(node, {'out_max_val': 0, 'top_k': 1, 'axis': None,
-                                         'dim_attrs': ['axis'], 'keepdims': 0, 'remove_values_output': True})
+                                         'dim_attrs': ['axis'], 'keepdims': 0, 'remove_values_output': True,
+                                         'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int64),
+                                         })
          return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/non_max_suppression_ext.py b/model-optimizer/extensions/front/tf/non_max_suppression_ext.py

index cfc9170..2e3930f 100644 (file)
--- a/model-optimizer/extensions/front/tf/non_max_suppression_ext.py
+++ b/model-optimizer/extensions/front/tf/non_max_suppression_ext.py
@@ -15,6 +15,8 @@
  """
  import logging as log
  
+import numpy as np
+
  from extensions.ops.non_max_suppression import NonMaxSuppression
  from mo.front.extractor import FrontExtractorOp
  
@@ -25,7 +27,7 @@ class NonMaxSuppressionV3Extractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node):
-        attrs = {'sort_result_descending': 1, 'center_point_box': 0}
+        attrs = {'sort_result_descending': 1, 'center_point_box': 0, 'output_type': np.int32}
          NonMaxSuppression.update_node_stat(node, attrs)
          return cls.enabled
  
@@ -40,7 +42,7 @@ class NonMaxSuppressionV4Extractor(FrontExtractorOp):
          if not pad_to_max_output_size:
              log.warning('The attribute "pad_to_max_output_size" of node {} is equal to False which is not supported.'
                          'Forcing it to be equal to True'.format(node.soft_get('name')))
-        attrs = {'sort_result_descending': 1, 'box_encoding': 'corner'}
+        attrs = {'sort_result_descending': 1, 'box_encoding': 'corner', 'output_type': np.int32}
          NonMaxSuppression.update_node_stat(node, attrs)
          return cls.enabled
  
@@ -55,6 +57,6 @@ class NonMaxSuppressionV5Extractor(FrontExtractorOp):
          if not pad_to_max_output_size:
              log.warning('The attribute "pad_to_max_output_size" of node {} is equal to False which is not supported.'
                          'Forcing it to be equal to True'.format(node.soft_get('name')))
-        attrs = {'sort_result_descending': 1, 'box_encoding': 'corner'}
+        attrs = {'sort_result_descending': 1, 'box_encoding': 'corner', 'output_type': np.int32}
          NonMaxSuppression.update_node_stat(node, attrs)
          return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/reshape_related_ext.py b/model-optimizer/extensions/front/tf/reshape_related_ext.py

index 22f960a..51632d2 100644 (file)
--- a/model-optimizer/extensions/front/tf/reshape_related_ext.py
+++ b/model-optimizer/extensions/front/tf/reshape_related_ext.py
@@ -31,7 +31,7 @@ class RankFrontExtractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node: Node):
-        Rank.update_node_stat(node)
+        Rank.update_node_stat(node, {'output_type': np.int32})
          return cls.enabled
  
  
@@ -51,7 +51,7 @@ class ShapeExtractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node: Node):
-        Shape.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
+        Shape.update_node_stat(node, {'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
          return cls.enabled
  
  
@@ -61,7 +61,7 @@ class SizeFrontExtractor(FrontExtractorOp):
  
      @classmethod
      def extract(cls, node):
-        Size.update_node_stat(node)
+        Size.update_node_stat(node, {'output_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
          return cls.enabled
  
  
diff --git a/model-optimizer/extensions/front/tf/size_replacer_test.py b/model-optimizer/extensions/front/tf/size_replacer_test.py

new file mode 100644 (file)

index 0000000..03daf1a
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/size_replacer_test.py
@@ -0,0 +1,68 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+from generator import generator, generate
+
+from extensions.front.tf.SizeReplacer import SizeFrontReplacer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, regular_op_with_empty_data, result, connect, \
+    valued_const_with_data
+
+nodes = lambda output_type: {
+    **regular_op_with_empty_data('input', {'type': 'Parameter'}),
+    **regular_op_with_empty_data('size', {'op': 'Size', 'type': None, 'output_type': output_type, 'name': 'my_size'}),
+    **result(),
+
+    **regular_op_with_empty_data('shape', {'type': 'ShapeOf', 'output_type': output_type}),
+    **valued_const_with_data('zero', int64_array([0])),
+    **regular_op_with_empty_data('reduce', {'type': 'ReduceProd', 'keep_dims': False}),
+}
+
+
+@generator
+class SizeReplacerTest(unittest.TestCase):
+
+    @generate(np.int32, np.int64)
+    def test_size_replacer(self, output_type):
+        graph = build_graph(nodes_attrs=nodes(output_type), edges=[
+            *connect('input', 'size'),
+            *connect('size', 'output'),
+        ], nodes_with_edges_only=True)
+        SizeFrontReplacer().find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(nodes_attrs=nodes(output_type), edges=[
+            *connect('input', 'shape'),
+            *connect('shape', '0:reduce'),
+            *connect('zero', '1:reduce'),
+            *connect('reduce', 'output'),
+        ], nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+        self.assertEqual(graph.get_op_nodes(type='ReduceProd')[0]['name'], 'my_size',
+                         'Name is not inherited from original node for SizeReplacer')
+        print(output_type)
+
+    def test_size_replacer_assertion(self):
+        graph = build_graph(nodes_attrs=nodes(None), edges=[
+            *connect('input', 'size'),
+            *connect('size', 'output'),
+        ], nodes_with_edges_only=True)
+        self.assertRaises(AssertionError, SizeFrontReplacer().find_and_replace_pattern, graph)
diff --git a/model-optimizer/extensions/front/tf/topk_ext.py b/model-optimizer/extensions/front/tf/topk_ext.py

index 7206994..7c338b8 100644 (file)
--- a/model-optimizer/extensions/front/tf/topk_ext.py
+++ b/model-optimizer/extensions/front/tf/topk_ext.py
@@ -13,6 +13,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
  
  from extensions.ops.topk import TopK
  from mo.front.extractor import FrontExtractorOp
@@ -25,7 +26,8 @@ class TopKExtractor(FrontExtractorOp):
      @classmethod
      def extract(cls, node):
          sort = 'value' if node.pb.attr['sorted'] else 'none'
-        TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'k' : node.pb.attr['k'].i})
+        TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'k': node.pb.attr['k'].i,
+                                     'index_element_type': np.int32})
  
          return cls.enabled
  
@@ -37,5 +39,5 @@ class TopKV2Extractor(FrontExtractorOp):
      @classmethod
      def extract(cls, node):
          sort = 'value' if node.pb.attr['sorted'] else 'none'
-        TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort})
+        TopK.update_node_stat(node, {'mode': 'max', 'axis': -1, 'sort': sort, 'index_element_type': np.int32})
          return cls.enabled
diff --git a/model-optimizer/extensions/middle/AnchorToPriorBox.py b/model-optimizer/extensions/middle/AnchorToPriorBox.py

index 05bf168..30dbf2a 100644 (file)
--- a/model-optimizer/extensions/middle/AnchorToPriorBox.py
+++ b/model-optimizer/extensions/middle/AnchorToPriorBox.py
@@ -16,6 +16,7 @@
  
  import numpy as np
  
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
  from mo.graph.graph import Graph
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
@@ -33,12 +34,15 @@ class AnchorToPriorBoxes(MiddleReplacementPattern):
          from extensions.middle.pass_separator import MiddleStart
          return [MiddleStart]
  
+    def run_before(self):
+        return [SliceLikeToStridedSlice]
+
      def pattern(self):
          return dict(
              nodes=[
                  ('const', dict(op='Const')),
                  ('const_data', dict(kind='data')),
-                ('slice_like', dict(op='Crop')),
+                ('slice_like', dict(op='slice_like')),
                  ('slice_like_out', dict(kind='data')),
                  ('reshape', dict(op='Reshape')),
              ],
diff --git a/model-optimizer/extensions/middle/ArgMaxToTopK.py b/model-optimizer/extensions/middle/ArgMaxToTopK.py

index e7dc55e..2b603b0 100644 (file)
--- a/model-optimizer/extensions/middle/ArgMaxToTopK.py
+++ b/model-optimizer/extensions/middle/ArgMaxToTopK.py
@@ -40,6 +40,7 @@ class ArgMaxToTopK(MiddleReplacementPattern):
  
      def replace_pattern(self, graph: Graph, match: dict):
          node = match['argmax']
+        node_name = node.soft_get('name', node.id)
  
          connected_ports = [port for port in node.in_ports().values() if not port.disconnected()]
          if len(connected_ports) == 2:
@@ -47,9 +48,11 @@ class ArgMaxToTopK(MiddleReplacementPattern):
          else:
              axis = node.axis
  
-        assert axis is not None, 'The "axis" should be defined for node "{}"'.format(node.soft_get('name'))
+        assert axis is not None, 'The "axis" should be defined for node "{}"'.format(node_name)
+        assert node.has_and_set('output_type'), 'The data type is not set for node "{}"'.format(node_name)
          topk_node = TopK(graph, {'axis': axis, 'mode': 'max', 'sort': 'index',
-                                 'remove_values_output': node.has_and_set('remove_values_output')}).create_node()
+                                 'remove_values_output': node.has_and_set('remove_values_output'),
+                                 'index_element_type': node.output_type}).create_node()
          node.in_port(0).get_connection().set_destination(topk_node.in_port(0))
          if node.has_and_set('out_max_val'):  # in this mode the ArgMax produces tuples (max_ind, max_value)
              concat_node = Concat(graph, {'axis': 1, 'name': node.name + '/Concat'}).create_node()
diff --git a/model-optimizer/extensions/middle/SliceLikeToStridedSlice.py b/model-optimizer/extensions/middle/SliceLikeToStridedSlice.py

new file mode 100644 (file)

index 0000000..c9145a9
--- /dev/null
+++ b/model-optimizer/extensions/middle/SliceLikeToStridedSlice.py
@@ -0,0 +1,112 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+from typing import Dict
+
+from mo.front.tf.graph_utils import create_op_with_const_inputs
+from mo.graph.graph import Graph, Node, rename_nodes
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.shape import Shape
+from mo.ops.strided_slice import StridedSlice
+from mo.utils.shape import get_shape_values_by_range_idxs, new_shape_node_from_shape_nodes, \
+    get_shape_and_rank_nodes_by_port
+
+
+class SliceLikeToStridedSlice(MiddleReplacementPattern):
+    """
+    Replace mxnet slice_like operation with StridedSlice in reshapable way.
+    The begin parameter for StridedSlice is always a zero vector.
+    The end parameter depends on the slice_like inputs and axes.
+
+    1. If slice_like inputs has the same ranks, we can use second input shape (shape_like) as the end parameter for
+       StridedSlice. Axes parameter will form end_mask, that allows to use slice only on the desired axes.
+       Example:
+       input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4], axes = [2, 3].
+       In that case end = shape_like = [1, 2, 3, 4], but end_mask = [0, 0, 1, 1], so output_shape = [1, 64, 3, 4]
+
+    2. Axes parameter has the last dimension of the first input shape (in that case shape_like >= input_shape).
+       Here we can use only a part of shape_like as the end parameter.
+       Example:
+           input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4, 5], axes = [2, 3].
+           end = shape_like[:4] = [1, 2, 3, 4], end_mask = [0, 0, 1, 1], output_shape = [1, 64, 3, 4]
+
+    3. Usual case, where we form end parameter by concatenate parts of shape_like and input_shape.
+       Examples:
+           input_shape = [1, 64, 128, 256, 512], shape_like = [1, 2, 3, 4], axes = [2, 3].
+           end = shape_like[:4] + input_shape[4:] = [1, 2, 3, 4, 512],
+           end_mask = [0, 0, 1, 1, 0], output_shape = [1, 64, 3, 4, 512]
+
+           input_shape = [1, 64, 128, 256], shape_like = [1, 2, 3, 4, 5], axes = [0, 2].
+           end = shape_like[:3] + input_shape[3:] = [1, 2, 3, 256],
+           end_mask = [1, 0, 1, 0], output_shape = [1, 64, 3, 256]
+    """
+
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'mxnet']
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('op', dict(kind='op', op='slice_like'))
+            ],
+            edges=[]
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: Dict[str, Node]):
+        node = match['op']
+        name = node.soft_get('name', node.id)
+        input_shape = node.in_port(0).data.get_shape()
+        second_input_shape = node.in_port(1).data.get_shape()
+
+        begin_mask = np.zeros(len(input_shape), dtype=np.int64)
+        end_mask = np.zeros(len(input_shape), dtype=np.int64)
+
+        for i in node.axes:
+            end_mask[i] = np.int64(1)
+
+        new_axis_mask = np.zeros(len(input_shape), dtype=np.int64)
+        shrink_axis_mask = np.zeros(len(input_shape), dtype=np.int64)
+        ellipsis_mask = np.zeros(len(input_shape), dtype=np.int64)
+
+        ss = create_op_with_const_inputs(graph, StridedSlice,
+                                         port_value_dict={1: np.zeros(len(input_shape), dtype=np.int64)},
+                                         op_attrs={'name': 'StridedSlice', 'begin_mask': begin_mask,
+                                                   'end_mask': end_mask, 'new_axis_mask': new_axis_mask,
+                                                   'shrink_axis_mask': shrink_axis_mask,
+                                                   'ellipsis_mask': ellipsis_mask})
+        if input_shape.size == second_input_shape.size:
+            end = Shape(graph, dict(name=name + '/End')).create_node()
+            end.in_port(0).connect(node.in_port(1).get_source())
+            ss.in_port(2).connect(end.out_port(0))
+        else:
+            shape_like, rank_like = get_shape_and_rank_nodes_by_port(node.in_port(1).get_source())
+            end_first_part = get_shape_values_by_range_idxs(shape_like, rank_like, 0, node.axes[-1], include_end=True)
+            if input_shape.size - 1 == node.axes[-1]:
+                ss.in_port(2).connect(end_first_part.out_port(0))
+            else:
+                shape, rank = get_shape_and_rank_nodes_by_port(node.in_port(0).get_source())
+                end_second_part = get_shape_values_by_range_idxs(shape, rank, node.axes[-1], -1, include_begin=False,
+                                                                 include_end=True)
+                end = new_shape_node_from_shape_nodes([end_first_part, end_second_part])
+                ss.in_port(2).connect(end.out_port(0))
+
+        node.in_port(0).get_connection().set_destination(ss.in_port(0))
+        node.in_port(1).disconnect()
+        node.out_port(0).get_connection().set_source(ss.out_port(0))
+
+        rename_nodes([(node, name + '/ShouldBeDeleted'), (ss, name)])
diff --git a/model-optimizer/extensions/middle/SliceLikeToStridedSlice_test.py b/model-optimizer/extensions/middle/SliceLikeToStridedSlice_test.py

new file mode 100644 (file)

index 0000000..b8bd16b
--- /dev/null
+++ b/model-optimizer/extensions/middle/SliceLikeToStridedSlice_test.py
@@ -0,0 +1,260 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+    'input': {'kind': 'op', 'op': 'Const'},
+    'input_data': {'kind': 'data'},
+
+    'shape_like_input': {'kind': 'op', 'op': 'Const'},
+    'shape_like_input_data': {'kind': 'data'},
+
+    'slice_like': {'kind': 'op', 'op': 'slice_like'},
+    'slice_like_data': {'kind': 'data', 'shape': None, 'value': None},
+
+    'result': {'kind': 'op', 'op': 'Result'},
+
+    'shape': {'kind': 'op', 'op': 'ShapeOf'},
+    'shape_data': {'kind': 'data'},
+    'rank_1_d': {'kind': 'op', 'op': 'ShapeOf'},
+    'rank_1_d_data': {'kind': 'data'},
+    'rank': {'kind': 'op', 'op': 'Squeeze'},
+    'rank_data': {'kind': 'data'},
+    'rank_const': {'kind': 'op', 'op': 'Const'},
+    'rank_const_data': {'kind': 'data'},
+
+    'shape_like': {'kind': 'op', 'op': 'ShapeOf'},
+    'shape_like_data': {'kind': 'data'},
+    'rank_like_1_d': {'kind': 'op', 'op': 'ShapeOf'},
+    'rank_like_1_d_data': {'kind': 'data'},
+    'rank_like': {'kind': 'op', 'op': 'Squeeze'},
+    'rank_like_const': {'kind': 'op', 'op': 'Const'},
+    'rank_like_const_data': {'kind': 'data'},
+
+    'begin': {'kind': 'op', 'op': 'Const'},
+    'begin_data': {'kind': 'data'},
+    'ss': {'kind': 'op', 'op': 'StridedSlice'},
+
+    'start_idx_like': {'kind': 'op', 'op': 'Const'},
+    'start_idx_like_data': {'kind': 'data'},
+    'end_idx_like': {'kind': 'op', 'op': 'Const'},
+    'end_idx_like_data': {'kind': 'data'},
+    'end_idx_like_const': {'kind': 'op', 'op': 'Const'},
+    'end_idx_like_const_data': {'kind': 'data'},
+    'end_idx_like_add': {'kind': 'op', 'op': 'Add'},
+    'end_idx_like_add_data': {'kind': 'data'},
+    'delta_like': {'kind': 'op', 'op': 'Const'},
+    'delta_like_data': {'kind': 'data'},
+    'range_like': {'kind': 'op', 'op': 'Range'},
+    'range_like_data': {'kind': 'data'},
+    'gather_like': {'kind': 'op', 'op': 't_gather'},
+    'gather_like_data': {'kind': 'data'},
+    'gather_like_axis': {'kind': 'op', 'op': 'Const'},
+    'gather_like_axis_data': {'kind': 'data'},
+    'concat': {'kind': 'op', 'op': 'Concat'},
+    'concat_data': {'kind': 'data'},
+
+    'start_idx': {'kind': 'op', 'op': 'Const'},
+    'start_idx_data': {'kind': 'data'},
+    'start_idx_const': {'kind': 'op', 'op': 'Const'},
+    'start_idx_const_data': {'kind': 'data'},
+    'start_idx_add': {'kind': 'op', 'op': 'Add'},
+    'start_idx_add_data': {'kind': 'data'},
+    'end_idx': {'kind': 'op', 'op': 'Add'},
+    'end_idx_data': {'kind': 'data'},
+    'end_idx_axis': {'kind': 'op', 'op': 'Const'},
+    'end_idx_axis_data': {'kind': 'data'},
+    'end_idx_const': {'kind': 'op', 'op': 'Const'},
+    'end_idx_const_data': {'kind': 'data'},
+    'end_idx_add': {'kind': 'op', 'op': 'Add'},
+    'end_idx_add_data': {'kind': 'data'},
+    'delta': {'kind': 'op', 'op': 'Const'},
+    'delta_data': {'kind': 'data'},
+    'range': {'kind': 'op', 'op': 'Range'},
+    'range_data': {'kind': 'data'},
+    't_gather': {'kind': 'op', 'op': 't_gather'},
+    'gather_data': {'kind': 'data'},
+    'gather_axis': {'kind': 'op', 'op': 'Const'},
+    'gather_axis_data': {'kind': 'data'}
+
+}
+
+edges = [
+    ('input', 'input_data'),
+    ('input_data', 'slice_like', {'in': 0}),
+    ('shape_like_input', 'shape_like_input_data'),
+    ('shape_like_input_data', 'slice_like', {'in': 1}),
+    ('slice_like', 'slice_like_data'),
+    ('slice_like_data', 'result')
+]
+
+same_input_shapes_dims_edges = [
+    ('input', 'input_data'),
+    ('input_data', 'ss', {'in': 0}),
+    ('ss', 'slice_like_data'),
+    ('slice_like_data', 'result'),
+    ('shape_like_input', 'shape_like_input_data'),
+    ('shape_like_input_data', 'shape_like'),
+    ('shape_like', 'shape_like_data'),
+    ('shape_like_data', 'ss', {'in': 2}),
+    ('begin', 'begin_data'),
+    ('begin_data', 'ss', {'in': 1})
+]
+
+shape_like_sub_graph_edges = [
+    ('input', 'input_data'),
+    ('input_data', 'ss', {'in': 0}),
+    ('ss', 'slice_like_data'),
+    ('slice_like_data', 'result'),
+    ('begin', 'begin_data'),
+    ('begin_data', 'ss', {'in': 1}),
+    ('shape_like_input', 'shape_like_input_data'),
+    ('shape_like_input_data', 'shape_like'),
+    ('shape_like', 'shape_like_data'),
+    ('shape_like_data', 'rank_like_1_d'),
+    ('rank_like_1_d', 'rank_like_1_d_data'),
+    ('rank_like_1_d_data', 'rank_like', {'in': 0}),
+    ('rank_like_const', 'rank_like_const_data'),
+    ('rank_like_const_data', 'rank_like', {'in': 1}),
+    ('end_idx_like', 'end_idx_like_data'),
+    ('end_idx_like_const', 'end_idx_like_const_data'),
+    ('end_idx_like_data', 'end_idx_like_add', {'in': 0}),
+    ('end_idx_like_const_data', 'end_idx_like_add', {'in': 1}),
+    ('end_idx_like_add', 'end_idx_like_add_data'),
+    ('end_idx_like_add_data', 'range_like', {'in': 1}),
+    ('start_idx_like', 'start_idx_like_data'),
+    ('start_idx_like_data', 'range_like', {'in': 0}),
+    ('delta_like', 'delta_like_data'),
+    ('delta_like_data', 'range_like', {'in': 2}),
+    ('range_like', 'range_like_data'),
+    ('range_like_data', 'gather_like', {'in': 1}),
+    ('shape_like_data', 'gather_like', {'in': 0}),
+    ('gather_like_axis', 'gather_like_axis_data'),
+    ('gather_like_axis_data', 'gather_like', {'in': 2}),
+    ('gather_like', 'gather_like_data')
+]
+
+last_axis_index = shape_like_sub_graph_edges + [('gather_like_data', 'ss', {'in': 2})]
+
+input_sub_graph_edges = [
+    ('input_data', 'shape'),
+    ('shape', 'shape_data'),
+    ('shape_data', 'rank_1_d'),
+    ('rank_1_d', 'rank_1_d_data'),
+    ('rank_1_d_data', 'rank', {'in': 0}),
+    ('rank_const', 'rank_const_data'),
+    ('rank_const_data', 'rank', {'in': 1}),
+    ('rank', 'rank_data'),
+    ('rank_data', 'end_idx', {'in': 0}),
+    ('end_idx_axis', 'end_idx_axis_data'),
+    ('end_idx_axis_data', 'end_idx', {'in': 1}),
+    ('end_idx', 'end_idx_data'),
+    ('end_idx_data', 'end_idx_add', {'in': 0}),
+    ('end_idx_const', 'end_idx_const_data'),
+    ('end_idx_const_data', 'end_idx_add', {'in': 1}),
+    ('start_idx', 'start_idx_data'),
+    ('start_idx_data', 'start_idx_add', {'in': 0}),
+    ('start_idx_const', 'start_idx_const_data'),
+    ('start_idx_const_data', 'start_idx_add', {'in': 1}),
+    ('end_idx_add', 'end_idx_add_data'),
+    ('start_idx_add', 'start_idx_add_data'),
+    ('delta', 'delta_data'),
+    ('start_idx_add_data', 'range', {'in': 0}),
+    ('end_idx_add_data', 'range', {'in': 1}),
+    ('delta_data', 'range', {'in': 2}),
+    ('range', 'range_data'),
+    ('range_data', 't_gather', {'in': 1}),
+    ('shape_data', 't_gather', {'in': 0}),
+    ('gather_axis', 'gather_axis_data'),
+    ('gather_axis_data', 't_gather', {'in': 2}),
+    ('t_gather', 'gather_data'),
+    ('gather_data', 'concat', {'in': 1}),
+    ('concat', 'concat_data'),
+    ('concat_data', 'ss', {'in': 2}),
+    ('gather_like_data', 'concat', {'in': 0})
+]
+
+input_part_shape_edges = shape_like_sub_graph_edges + input_sub_graph_edges
+
+
+class SliceLikeToStridedSliceTest(unittest.TestCase):
+
+    def test_1(self):
+        graph = build_graph(
+            nodes_attributes,
+            edges,
+            update_attributes={
+                'input_data': {'shape': int64_array([1, 224, 224, 3])},
+                'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2])},
+                'slice_like': {'axes': int64_array([2, 3])}
+            },
+            nodes_with_edges_only=True
+        )
+        SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+        ref_graph = build_graph(
+            nodes_attributes,
+            same_input_shapes_dims_edges,
+            nodes_with_edges_only=True
+        )
+
+        flag, resp = compare_graphs(graph, ref_graph, 'result')
+        self.assertTrue(flag, resp)
+
+    def test_2(self):
+        graph = build_graph(
+            nodes_attributes,
+            edges,
+            update_attributes={
+                'input_data': {'shape': int64_array([1, 224, 224, 3])},
+                'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2, 2])},
+                'slice_like': {'axes': int64_array([2, 3])}
+            },
+            nodes_with_edges_only=True
+        )
+        SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+        ref_graph = build_graph(
+            nodes_attributes,
+            last_axis_index,
+            nodes_with_edges_only=True
+        )
+
+        flag, resp = compare_graphs(graph, ref_graph, 'result')
+        self.assertTrue(flag, resp)
+
+    def test_3(self):
+        graph = build_graph(
+            nodes_attributes,
+            edges,
+            update_attributes={
+                'input_data': {'shape': int64_array([1, 224, 224, 3])},
+                'shape_like_input_data': {'shape': int64_array([2, 2, 2, 2, 2])},
+                'slice_like': {'axes': int64_array([1, 2])}
+            },
+            nodes_with_edges_only=True
+        )
+        SliceLikeToStridedSlice().find_and_replace_pattern(graph)
+        ref_graph = build_graph(
+            nodes_attributes,
+            input_part_shape_edges,
+            nodes_with_edges_only=True
+        )
+        flag, resp = compare_graphs(graph, ref_graph, 'result')
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/middle/UselessStridedSlice.py b/model-optimizer/extensions/middle/UselessStridedSlice.py

deleted file mode 100644 (file)

index 55fcf8c..0000000
--- a/model-optimizer/extensions/middle/UselessStridedSlice.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import logging as log
-
-import numpy as np
-
-from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
-from extensions.middle.SliceConverter import ConvertSlice
-from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Graph
-from mo.middle.passes.eliminate import remove_op_node_with_data_node
-from mo.middle.replacement import MiddleReplacementPattern
-
-
-class UselessStridedSliceEraser(MiddleReplacementPattern):
-    enabled = True
-    force_shape_inference = True
-
-    def run_before(self):
-        return [ConvertGroupedStridedSlice]
-
-    def run_after(self):
-        return [ConvertSlice]
-
-    @staticmethod
-    def pattern():
-        return dict(
-            nodes=[('strided_slice', dict(kind='op', op='StridedSlice'))],
-            edges=[]
-        )
-
-    @staticmethod
-    def replace_pattern(graph: Graph, match: dict):
-        node_ss = match['strided_slice']
-        # slices = [elem for elem in node_ss.slices if elem is not None]
-        # node_ss.slices = np.array(slices)
-
-        if node_ss.out_port(0).data.get_value() is not None:
-            # StridedSlices(SS) in shape-calculating sub-graphs that should not be deleted that easily
-            # Example:
-            # In RetinaNetFilteredDetectionsReplacement we have SS that slices first batch
-            # We delete such SS for batch 1, but it should be performed while reshaping the model
-            return
-
-        output_data_node = node_ss.out_node(0)
-        input_data_node = node_ss.in_node(0)
-
-        out_shape = output_data_node.shape
-
-        if not np.all(node_ss.shrink_axis_mask == 0):
-            out_shape = list(out_shape)
-            for i in range(len(node_ss.shrink_axis_mask)):
-                if node_ss.shrink_axis_mask[i] == 1:
-                    out_shape.insert(i, 1)
-            out_shape = int64_array(out_shape)
-
-        if not np.all(node_ss.new_axis_mask == 0):
-            out_shape = list(out_shape)
-            for i in reversed(range(len(node_ss.new_axis_mask))):
-                if node_ss.new_axis_mask[i] == 1:
-                    out_shape.pop(i)
-            out_shape = int64_array(out_shape)
-
-        if np.array_equal(input_data_node.shape, out_shape) and \
-                all(elem.step == 1 for elem in match['strided_slice'].slices):
-            if not np.all(node_ss.shrink_axis_mask == 0):
-                ConvertGroupedStridedSlice.add_squeeze_for_shrink(graph, node_ss)
-            if not np.all(node_ss.new_axis_mask == 0):
-                ConvertGroupedStridedSlice.add_unsqueeze_for_new(graph, node_ss)
-
-            log.info("Useless StridedSlice op '{}' has been detected".format(match['strided_slice'].id))
-            # remove inputs to Strided Slice so it has just one input with data so we can use 'remove_op_node' function
-            graph.remove_edge(match['strided_slice'].in_node(1).id, match['strided_slice'].id)
-            graph.remove_edge(match['strided_slice'].in_node(2).id, match['strided_slice'].id)
-            if len(match['strided_slice'].in_nodes()) > 3:
-                graph.remove_edge(match['strided_slice'].in_node(3).id, match['strided_slice'].id)
-
-            remove_op_node_with_data_node(graph, match['strided_slice'])
diff --git a/model-optimizer/extensions/middle/UselessStridedSlice_test.py b/model-optimizer/extensions/middle/UselessStridedSlice_test.py

deleted file mode 100644 (file)

index 4b90ff9..0000000
--- a/model-optimizer/extensions/middle/UselessStridedSlice_test.py
+++ /dev/null
@@ -1,282 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import unittest
-
-from extensions.middle.UselessStridedSlice import UselessStridedSliceEraser
-from mo.front.common.partial_infer.utils import int64_array
-from mo.middle.passes.eliminate import shape_inference
-from mo.utils.ir_engine.compare_graphs import compare_graphs
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {
-    # input data
-    'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
-    'placeholder_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data', 'data_type': None},
-    #
-    'strided_slice': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice',
-                      'shrink_axis_mask': int64_array([0, 0, 0]), 'new_axis_mask': int64_array([0, 0, 0]),
-                      'slices': [slice(0, 4, 1), slice(0, 1, 1), slice(0, 6, 1)]},
-    'strided_slice_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data'},
-    'strided_slice_input_1_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
-    'strided_slice_input_2_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
-    'strided_slice_input_3_data': {'value': None, 'shape': int64_array([3]), 'kind': 'data'},
-    #
-    'strided_slice_2': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice',
-                        'shrink_axis_mask': int64_array([0, 0, 0]), 'new_axis_mask': int64_array([0, 0, 0]),
-                        'slices': [slice(0, 4, 1), slice(0, 1, 1), slice(0, 6, 1)]},
-    'strided_slice_2_data': {'value': None, 'shape': int64_array([4, 1, 6]), 'kind': 'data'},
-    # Output operation
-    'output_op': {'kind': 'op', 'op': 'Result'},
-    # squeeze op
-    'squeeze': {'type': 'Squeeze', 'kind': 'op', 'op': 'Squeeze'},
-    'squeeze_const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array([1])},
-    'squeeze_const_data': {'kind': 'data'},
-    # unsqueeze op
-    'unsqueeze': {'type': None, 'kind': 'op', 'op': 'Unsqueeze'},
-    'unsqueeze_const': {'type': 'Const', 'kind': 'op', 'op': 'Const', 'value': int64_array([1])},
-    'unsqueeze_const_data': {'kind': 'data'},
-    'unsqueeze_data': {'value': None, 'shape': int64_array([4, 6]), 'kind': 'data'},
-}
-
-
-class UselessStridedSliceTests(unittest.TestCase):
-    def test_single_stride_slice_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {},
-                            nodes_with_edges_only=True
-                            )
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'output_op'),
-                                 ],
-                                {'placeholder_data': {'shape': int64_array([4, 1, 6])}},
-                                nodes_with_edges_only=True
-                                )
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_single_stride_slice_with_shrink_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {'strided_slice': {'shrink_axis_mask': int64_array([0, 1, 0])},
-                             'strided_slice_data': {'shape': int64_array([4, 6])}},
-                            nodes_with_edges_only=True
-                            )
-        graph.graph['layout'] = 'NCHW'
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'squeeze'),
-                                 ('squeeze_const', 'squeeze_const_data'),
-                                 ('squeeze_const_data', 'squeeze'),
-                                 ('squeeze', 'strided_slice_data'),
-                                 ('strided_slice_data', 'output_op')
-                                 ],
-                                {'placeholder_data': {'shape': int64_array([4, 1, 6])},
-                                 'strided_slice_data': {'shape': int64_array([4, 6])}},
-                                nodes_with_edges_only=True
-                                )
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_single_stride_slice_with_new_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {'strided_slice': {'new_axis_mask': int64_array([0, 1, 0, 0])},
-                             'strided_slice_data': {'shape': int64_array([4, 1, 1, 6])}},
-                            nodes_with_edges_only=True
-                            )
-        graph.graph['layout'] = 'NCHW'
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'unsqueeze'),
-                                 ('unsqueeze_const', 'unsqueeze_const_data'),
-                                 ('unsqueeze_const_data', 'unsqueeze'),
-                                 ('unsqueeze', 'strided_slice_data'),
-                                 ('strided_slice_data', 'output_op')
-                                 ],
-                                {'placeholder_data': {'shape': int64_array([4, 1, 6])},
-                                 'strided_slice_data': {'shape': int64_array([4, 1, 1, 6])}},
-                                nodes_with_edges_only=True
-                                )
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_single_stride_slice_with_shrink_and_new_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {'strided_slice': {'shrink_axis_mask': int64_array([0, 1, 0, 0]),
-                                               'new_axis_mask': int64_array([0, 0, 1, 0])},
-                             'strided_slice_data': {'shape': int64_array([4, 1, 6])}},
-                            nodes_with_edges_only=True
-                            )
-        graph.graph['layout'] = 'NCHW'
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'unsqueeze'),
-                                 ('unsqueeze_const', 'unsqueeze_const_data'),
-                                 ('unsqueeze_const_data', 'unsqueeze'),
-                                 ('unsqueeze', 'unsqueeze_data'),
-                                 ('unsqueeze_data', 'squeeze'),
-                                 ('squeeze_const', 'squeeze_const_data'),
-                                 ('squeeze_const_data', 'squeeze'),
-                                 ('squeeze', 'strided_slice_data'),
-                                 ('strided_slice_data', 'output_op')
-                                 ],
-                                {'placeholder_data': {'shape': int64_array([4, 1, 6])},
-                                 'unsqueeze_data': {'shape': int64_array([4, 1, 1, 6])},
-                                 'strided_slice_data': {'shape': int64_array([4, 1, 6])},
-                                 'unsqueeze_const': {'value': int64_array([2])},
-                                 },
-                                nodes_with_edges_only=True
-                                )
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_single_stride_slice_with_new_and_shrink_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {'strided_slice': {'shrink_axis_mask': int64_array([0, 0, 1, 0]),
-                                               'new_axis_mask': int64_array([0, 1, 0, 0])},
-                             'strided_slice_data': {'shape': int64_array([4, 1, 6])}},
-                            nodes_with_edges_only=True
-                            )
-        graph.graph['layout'] = 'NCHW'
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'unsqueeze'),
-                                 ('unsqueeze_const', 'unsqueeze_const_data'),
-                                 ('unsqueeze_const_data', 'unsqueeze'),
-                                 ('unsqueeze', 'unsqueeze_data'),
-                                 ('unsqueeze_data', 'squeeze'),
-                                 ('squeeze_const', 'squeeze_const_data'),
-                                 ('squeeze_const_data', 'squeeze'),
-                                 ('squeeze', 'strided_slice_data'),
-                                 ('strided_slice_data', 'output_op')
-                                 ],
-                                {'unsqueeze_data': {'shape': int64_array([4, 1, 1, 6])},
-                                 'strided_slice_data': {'shape': int64_array([4, 1, 6])},
-                                 'squeeze_const': {'value': int64_array([2])},
-                                 },
-                                nodes_with_edges_only=True
-                                )
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_consecutive_stride_slices_removal(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'strided_slice_2'),
-                             ('strided_slice_input_1_data', 'strided_slice_2'),
-                             ('strided_slice_input_2_data', 'strided_slice_2'),
-                             ('strided_slice_input_3_data', 'strided_slice_2'),
-                             ('strided_slice_2', 'strided_slice_2_data'),
-                             ('strided_slice_2_data', 'output_op'),
-                             ],
-                            {},
-                            nodes_with_edges_only=True
-                            )
-
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        shape_inference(graph)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder', 'placeholder_data'),
-                                 ('placeholder_data', 'output_op'),
-                                 ],
-                                {'placeholder_data': {'shape': int64_array([4, 1, 6])}}
-                                )
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
-
-    def test_negative(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder', 'placeholder_data'),
-                             ('placeholder_data', 'strided_slice'),
-                             ('strided_slice_input_1_data', 'strided_slice'),
-                             ('strided_slice_input_2_data', 'strided_slice'),
-                             ('strided_slice_input_3_data', 'strided_slice'),
-                             ('strided_slice', 'strided_slice_data'),
-                             ('strided_slice_data', 'output_op'),
-                             ],
-                            {'strided_slice_data': {'value': []}},
-                            nodes_with_edges_only=True
-                            )
-        graph_ref = graph.copy()
-        UselessStridedSliceEraser().find_and_replace_pattern(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'output_op', check_op_attrs=True)
-        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/middle/ssd_anchors_to_const.py b/model-optimizer/extensions/middle/ssd_anchors_to_const.py

index bf9b657..a6896ca 100644 (file)
--- a/model-optimizer/extensions/middle/ssd_anchors_to_const.py
+++ b/model-optimizer/extensions/middle/ssd_anchors_to_const.py
@@ -17,6 +17,7 @@
  import numpy as np
  
  from extensions.middle.AnchorToPriorBox import AnchorToPriorBoxes
+from extensions.middle.SliceLikeToStridedSlice import SliceLikeToStridedSlice
  from mo.graph.graph import Graph
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
@@ -33,6 +34,9 @@ class SsdAnchorsMiddleReplacer(MiddleReplacementPattern):
      def run_after(self):
          return [AnchorToPriorBoxes]
  
+    def run_before(self):
+        return [SliceLikeToStridedSlice]
+
  
      def pattern(self):
          return dict(
diff --git a/model-optimizer/extensions/ops/BlockLSTM.py b/model-optimizer/extensions/ops/BlockLSTM.py

index 4256ad5..de670de 100644 (file)
--- a/model-optimizer/extensions/ops/BlockLSTM.py
+++ b/model-optimizer/extensions/ops/BlockLSTM.py
@@ -26,7 +26,7 @@ class BlockLSTM(Op):
          mandatory_props = {
              'op': __class__.op,
              'infer': __class__.infer,
-            'type': __class__.op,
+            'type': None,
          }
          super().__init__(graph, mandatory_props, attrs)
  
diff --git a/model-optimizer/extensions/ops/Cast.py b/model-optimizer/extensions/ops/Cast.py

index 572d217..abc2506 100644 (file)
--- a/model-optimizer/extensions/ops/Cast.py
+++ b/model-optimizer/extensions/ops/Cast.py
@@ -30,6 +30,7 @@ class Cast(Op):
          mandatory_props = {
              'op': __class__.op,
              'type': 'Convert',
+            'version': 'opset1',
              'infer': __class__.infer,
              'type_infer': __class__.type_infer,
              'dst_type': None,
diff --git a/model-optimizer/extensions/ops/DetectionOutput.py b/model-optimizer/extensions/ops/DetectionOutput.py

index 8b70f1e..9a7f52c 100644 (file)
--- a/model-optimizer/extensions/ops/DetectionOutput.py
+++ b/model-optimizer/extensions/ops/DetectionOutput.py
@@ -28,6 +28,7 @@ class DetectionOutput(Op):
          super().__init__(graph, {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
              'in_ports_count': 3,
              'out_ports_count': 1,
              'infer': multi_box_detection_infer,
diff --git a/model-optimizer/extensions/ops/Enter.py b/model-optimizer/extensions/ops/Enter.py

index 167c6b2..8c93305 100644 (file)
--- a/model-optimizer/extensions/ops/Enter.py
+++ b/model-optimizer/extensions/ops/Enter.py
@@ -25,7 +25,7 @@ class Enter(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'in_ports_count': 1,
              'infer': Enter.enter_infer,
diff --git a/model-optimizer/extensions/ops/Exit.py b/model-optimizer/extensions/ops/Exit.py

index cd2a48f..ba64e89 100644 (file)
--- a/model-optimizer/extensions/ops/Exit.py
+++ b/model-optimizer/extensions/ops/Exit.py
@@ -25,7 +25,7 @@ class Exit(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': Exit.exit_infer,
              'in_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/GRUCell.py b/model-optimizer/extensions/ops/GRUCell.py

index d05980a..fbcb2fb 100644 (file)
--- a/model-optimizer/extensions/ops/GRUCell.py
+++ b/model-optimizer/extensions/ops/GRUCell.py
@@ -39,6 +39,7 @@ class GRUCell(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 4,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/GatherTree.py b/model-optimizer/extensions/ops/GatherTree.py

index 004845e..4dc40de 100644 (file)
--- a/model-optimizer/extensions/ops/GatherTree.py
+++ b/model-optimizer/extensions/ops/GatherTree.py
@@ -25,6 +25,7 @@ class GatherTree(Op):
          mandatory_props = {
              'op': __class__.op,
              'type': __class__.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'in_ports_count': 4,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/Log.py b/model-optimizer/extensions/ops/Log.py

deleted file mode 100644 (file)

index a5e07e1..0000000
--- a/model-optimizer/extensions/ops/Log.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import numpy as np
-
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
-
-
-class LogOp(Op):
-    op = 'Log'
-
-    def __init__(self, graph: Graph, attrs: dict):
-        mandatory_props = {
-            'type': __class__.op,
-            'op': __class__.op,
-            'infer': __class__.infer,
-            'in_ports_count': 1,
-            'out_ports_count': 1,
-        }
-        super().__init__(graph, mandatory_props, attrs)
-
-    @staticmethod
-    def infer(node: Node):
-        assert len(node.in_nodes()) == 1
-        assert len(node.out_nodes()) == 1
-        input_node = node.in_node()
-
-        node.out_node(0).shape = input_node.shape.copy()
-        if input_node.has_valid('value'):
-            node.out_node(0).value = np.array(np.log(input_node.value))
diff --git a/model-optimizer/extensions/ops/MatMul.py b/model-optimizer/extensions/ops/MatMul.py

index cc2b321..d96e47c 100644 (file)
--- a/model-optimizer/extensions/ops/MatMul.py
+++ b/model-optimizer/extensions/ops/MatMul.py
@@ -69,6 +69,7 @@ class MatMul(Op):
          mandatory_props = {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
              'transpose_a': False,
              'transpose_b': False,
              'infer': __class__.infer,
diff --git a/model-optimizer/extensions/ops/NextIteration.py b/model-optimizer/extensions/ops/NextIteration.py

index 80b7602..35b28e3 100644 (file)
--- a/model-optimizer/extensions/ops/NextIteration.py
+++ b/model-optimizer/extensions/ops/NextIteration.py
@@ -25,7 +25,7 @@ class NextIteration(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'in_ports_count': 1,
              'infer': NextIteration.enter_infer,
diff --git a/model-optimizer/extensions/ops/RNNCell.py b/model-optimizer/extensions/ops/RNNCell.py

index db62e3d..cb0a94a 100644 (file)
--- a/model-optimizer/extensions/ops/RNNCell.py
+++ b/model-optimizer/extensions/ops/RNNCell.py
@@ -39,6 +39,7 @@ class RNNCell(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 4,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/ReduceOps.py b/model-optimizer/extensions/ops/ReduceOps.py

index 65d05fa..7867ace 100644 (file)
--- a/model-optimizer/extensions/ops/ReduceOps.py
+++ b/model-optimizer/extensions/ops/ReduceOps.py
@@ -90,6 +90,7 @@ class ReduceOp(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op_type,
+            'version': 'opset1',
              'infer': reduce_infer,
              'keep_dims': 0,
              'in_ports_count': 2,
diff --git a/model-optimizer/extensions/ops/TensorArray.py b/model-optimizer/extensions/ops/TensorArray.py

index 9426142..fcb6e29 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArray.py
+++ b/model-optimizer/extensions/ops/TensorArray.py
@@ -25,7 +25,7 @@ class TensorArray(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArray.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/TensorArrayGather.py b/model-optimizer/extensions/ops/TensorArrayGather.py

index eddc0a8..82a0362 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayGather.py
+++ b/model-optimizer/extensions/ops/TensorArrayGather.py
@@ -26,7 +26,7 @@ class TensorArrayGather(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArrayGather.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/TensorArrayRead.py b/model-optimizer/extensions/ops/TensorArrayRead.py

index d16b2f4..0fd4782 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayRead.py
+++ b/model-optimizer/extensions/ops/TensorArrayRead.py
@@ -25,7 +25,7 @@ class TensorArrayReader(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArrayReader.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/TensorArrayScatter.py b/model-optimizer/extensions/ops/TensorArrayScatter.py

index 176c34d..6575f27 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayScatter.py
+++ b/model-optimizer/extensions/ops/TensorArrayScatter.py
@@ -26,7 +26,7 @@ class TensorArrayScatter(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArrayScatter.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/TensorArraySize.py b/model-optimizer/extensions/ops/TensorArraySize.py

index c4951d5..114def2 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArraySize.py
+++ b/model-optimizer/extensions/ops/TensorArraySize.py
@@ -25,7 +25,7 @@ class TensorArraySize(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArraySize.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/TensorArrayWrite.py b/model-optimizer/extensions/ops/TensorArrayWrite.py

index fa4d50a..6c8171c 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayWrite.py
+++ b/model-optimizer/extensions/ops/TensorArrayWrite.py
@@ -26,7 +26,7 @@ class TensorArrayWriter(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': TensorArrayWriter.array_infer,
          }
diff --git a/model-optimizer/extensions/ops/accum.py b/model-optimizer/extensions/ops/accum.py

index 12a614d..d528339 100644 (file)
--- a/model-optimizer/extensions/ops/accum.py
+++ b/model-optimizer/extensions/ops/accum.py
@@ -27,6 +27,7 @@ class AccumOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'top_height': 0,
              'top_width': 0,
              'size_divisible_by': 0,
diff --git a/model-optimizer/extensions/ops/activation_ops.py b/model-optimizer/extensions/ops/activation_ops.py

index fb105ea..cd4282b 100644 (file)
--- a/model-optimizer/extensions/ops/activation_ops.py
+++ b/model-optimizer/extensions/ops/activation_ops.py
@@ -34,6 +34,7 @@ class Activation(Op):
              'type': self.op,
              'op': self.op,
              'operation': self.operation,
+            'version': 'opset1',
              'infer': self.infer,
              'in_ports_count': 1,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/argmax.py b/model-optimizer/extensions/ops/argmax.py

index 52c1a93..366f130 100644 (file)
--- a/model-optimizer/extensions/ops/argmax.py
+++ b/model-optimizer/extensions/ops/argmax.py
@@ -31,6 +31,7 @@ class ArgMaxOp(Op):
              'type': __class__.op,
              'op': __class__.op,
              'infer': ArgMaxOp.argmax_infer,
+            'output_type': np.int64,
              'in_ports_count': 2,
              'out_ports_count': 1,
          }
diff --git a/model-optimizer/extensions/ops/axpy.py b/model-optimizer/extensions/ops/axpy.py

index ce3fff5..c243876 100644 (file)
--- a/model-optimizer/extensions/ops/axpy.py
+++ b/model-optimizer/extensions/ops/axpy.py
@@ -27,7 +27,7 @@ class AxpyOp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': None
          }, attrs)
diff --git a/model-optimizer/extensions/ops/bn.py b/model-optimizer/extensions/ops/bn.py

index 94b5e2e..ae3edc0 100644 (file)
--- a/model-optimizer/extensions/ops/bn.py
+++ b/model-optimizer/extensions/ops/bn.py
@@ -27,7 +27,7 @@ class BNOp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'in_ports_count': 5,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/box_nms.py b/model-optimizer/extensions/ops/box_nms.py

index 9f96aff..affcccc 100644 (file)
--- a/model-optimizer/extensions/ops/box_nms.py
+++ b/model-optimizer/extensions/ops/box_nms.py
@@ -27,7 +27,7 @@ class BoxNms(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'coord_start': 2,
              'force_suppress': False,
diff --git a/model-optimizer/extensions/ops/bucketize.py b/model-optimizer/extensions/ops/bucketize.py

index 9255a73..9e98a62 100644 (file)
--- a/model-optimizer/extensions/ops/bucketize.py
+++ b/model-optimizer/extensions/ops/bucketize.py
@@ -28,6 +28,7 @@ class Bucketize(Op):
              'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'type_infer': self.type_infer,
              'infer': self.infer,
              'in_ports_count': 2,
diff --git a/model-optimizer/extensions/ops/constant_fill.py b/model-optimizer/extensions/ops/constant_fill.py

index 2e97141..46cabbf 100644 (file)
--- a/model-optimizer/extensions/ops/constant_fill.py
+++ b/model-optimizer/extensions/ops/constant_fill.py
@@ -30,7 +30,7 @@ class ConstantFill(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'input_as_shape': 1,
              'in_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/copyop.py b/model-optimizer/extensions/ops/copyop.py

index 20d5d8a..dbd0977 100644 (file)
--- a/model-optimizer/extensions/ops/copyop.py
+++ b/model-optimizer/extensions/ops/copyop.py
@@ -27,7 +27,7 @@ class CopyOp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': None
          }, attrs)
diff --git a/model-optimizer/extensions/ops/correlation.py b/model-optimizer/extensions/ops/correlation.py

index c214ad2..b039643 100644 (file)
--- a/model-optimizer/extensions/ops/correlation.py
+++ b/model-optimizer/extensions/ops/correlation.py
@@ -31,6 +31,7 @@ class CorrelationOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': CorrelationOp.corr_infer
diff --git a/model-optimizer/extensions/ops/ctc_greedy_decoder.py b/model-optimizer/extensions/ops/ctc_greedy_decoder.py

index 716f173..9262eac 100644 (file)
--- a/model-optimizer/extensions/ops/ctc_greedy_decoder.py
+++ b/model-optimizer/extensions/ops/ctc_greedy_decoder.py
@@ -27,6 +27,7 @@ class CTCGreedyDecoderOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'in_ports_count': 2,
              'out_ports_count': 1,
              'infer': CTCGreedyDecoderOp.ctc_greedy_decoder_infer
diff --git a/model-optimizer/extensions/ops/data_augmentation.py b/model-optimizer/extensions/ops/data_augmentation.py

index 8676c1d..55d9074 100644 (file)
--- a/model-optimizer/extensions/ops/data_augmentation.py
+++ b/model-optimizer/extensions/ops/data_augmentation.py
@@ -29,6 +29,7 @@ class DataAugmentationOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': DataAugmentationOp.data_augmentation_infer
diff --git a/model-optimizer/extensions/ops/depth_to_space.py b/model-optimizer/extensions/ops/depth_to_space.py

index a559f08..8b5fe68 100644 (file)
--- a/model-optimizer/extensions/ops/depth_to_space.py
+++ b/model-optimizer/extensions/ops/depth_to_space.py
@@ -30,6 +30,7 @@ class DepthToSpaceOp(Op):
          mandatory_props = {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'mode': 'blocks_first',
  
diff --git a/model-optimizer/extensions/ops/detectionoutput_onnx.py b/model-optimizer/extensions/ops/detectionoutput_onnx.py

index ab89c12..0b41185 100644 (file)
--- a/model-optimizer/extensions/ops/detectionoutput_onnx.py
+++ b/model-optimizer/extensions/ops/detectionoutput_onnx.py
@@ -28,6 +28,7 @@ class ExperimentalDetectronDetectionOutput(Op):
          mandatory_props = dict(
              type=__class__.op,
              op=__class__.op,
+            version='experimental',
              infer=__class__.infer,
              type_infer=self.type_infer,
              in_ports_count=4,
diff --git a/model-optimizer/extensions/ops/elementwise.py b/model-optimizer/extensions/ops/elementwise.py

index ab9915b..754287f 100644 (file)
--- a/model-optimizer/extensions/ops/elementwise.py
+++ b/model-optimizer/extensions/ops/elementwise.py
@@ -35,6 +35,7 @@ class Elementwise(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op_type,
+            'version': 'opset1',
              'infer': lambda node: eltwise_infer(node, self.operation),
              'type_infer': self.type_infer,
              'can_be_bias': True,
@@ -202,6 +203,7 @@ class Round(Elementwise):
      enabled = False
      op = 'Round'
      op_type = None
+    version = 'extension'
      operation = staticmethod(lambda a: np.round(a))
  
  
diff --git a/model-optimizer/extensions/ops/exp.py b/model-optimizer/extensions/ops/exp.py

deleted file mode 100644 (file)

index c04beb9..0000000
--- a/model-optimizer/extensions/ops/exp.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import numpy as np
-
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
-
-
-class ExpOp(Op):
-    op = 'Exp'
-
-    def __init__(self, graph: Graph, attrs: dict):
-        mandatory_props = {
-            'type': __class__.op,
-            'op': __class__.op,
-            'infer': __class__.infer,
-            'in_ports_count': 1,
-            'out_ports_count': 1,
-        }
-        super().__init__(graph, mandatory_props, attrs)
-
-    @staticmethod
-    def infer(node: Node):
-        assert len(node.in_nodes()) == 1
-        assert len(node.out_nodes()) == 1
-        input_node = node.in_node()
-        assert input_node.has_valid('shape')
-        node.out_node().shape = input_node.shape.copy()
-        if input_node.has_valid('value'):
-            node.out_node().value = np.exp(input_node.value)
diff --git a/model-optimizer/extensions/ops/exp_test.py b/model-optimizer/extensions/ops/exp_test.py

deleted file mode 100644 (file)

index 30eee10..0000000
--- a/model-optimizer/extensions/ops/exp_test.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import unittest
-
-import numpy as np
-
-from extensions.ops.exp import ExpOp
-from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
-
-nodes_attributes = {'node_1': {'op': 'Identity', 'kind': 'op'},
-                    'exp': {'op': 'Exp', 'kind': 'op'},
-                    'node_3': {'op': 'Identity', 'kind': 'op'},
-                    'op_output': {'kind': 'op', 'op': 'Result'}
-                    }
-
-
-class TestExpOp(unittest.TestCase):
-    def test_shape_only(self):
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'exp'),
-                             ('exp', 'node_3'),
-                             ('node_3', 'op_output')
-                             ],
-                            {'node_3': {'shape': None},
-                             'node_1': {'shape': np.array([1, 3, 10, 20])},
-                             })
-
-        exp_node = Node(graph, 'exp')
-        ExpOp.infer(exp_node)
-        exp_shape = np.array([1, 3, 10, 20])
-        res_shape = graph.node['node_3']['shape']
-        for i in range(0, len(exp_shape)):
-            self.assertEqual(exp_shape[i], res_shape[i])
-
-    def test_shape_and_value(self):
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'exp'),
-                             ('exp', 'node_3'),
-                             ('node_3', 'op_output')
-                             ],
-                            {
-                                'node_3': {
-                                    'shape': None,
-                                    'value': None,
-                                },
-                                'node_1': {
-                                    'shape': np.array([2]),
-                                    'value': np.array([0, 1], dtype=np.float32),
-                                },
-                            })
-
-        exp_node = Node(graph, 'exp')
-        ExpOp.infer(exp_node)
-        exp_shape = np.array([2])
-        exp_value = np.array([1, 2.7182818], dtype=np.float32)
-        res_shape = graph.node['node_3']['shape']
-        res_value = graph.node['node_3']['value']
-        for i in range(0, len(exp_shape)):
-            self.assertEqual(exp_shape[i], res_shape[i])
-        for i in range(0, len(exp_value)):
-            self.assertAlmostEqual(exp_value[i], res_value[i], places=6)
diff --git a/model-optimizer/extensions/ops/fakequantize.py b/model-optimizer/extensions/ops/fakequantize.py

index 40569f4..f9981b1 100644 (file)
--- a/model-optimizer/extensions/ops/fakequantize.py
+++ b/model-optimizer/extensions/ops/fakequantize.py
@@ -43,6 +43,7 @@ class FakeQuantize(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'levels': None,
              'is_eltwise': True,
              # flag to switch between dumping FakeQuantize as statistics and keeping it as layer in IR
diff --git a/model-optimizer/extensions/ops/gather.py b/model-optimizer/extensions/ops/gather.py

index 1a25a69..6c444cb 100644 (file)
--- a/model-optimizer/extensions/ops/gather.py
+++ b/model-optimizer/extensions/ops/gather.py
@@ -30,6 +30,7 @@ class Gather(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
diff --git a/model-optimizer/extensions/ops/grn.py b/model-optimizer/extensions/ops/grn.py

index 8f20683..afcb881 100644 (file)
--- a/model-optimizer/extensions/ops/grn.py
+++ b/model-optimizer/extensions/ops/grn.py
@@ -26,6 +26,7 @@ class GRNOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': copy_shape_infer
diff --git a/model-optimizer/extensions/ops/hard_sigmoid.py b/model-optimizer/extensions/ops/hard_sigmoid.py

index f410932..52126de 100644 (file)
--- a/model-optimizer/extensions/ops/hard_sigmoid.py
+++ b/model-optimizer/extensions/ops/hard_sigmoid.py
@@ -28,6 +28,7 @@ class HardSigmoid(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
              'infer': self.infer,
              'in_ports_count': 3,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/interpolate.py b/model-optimizer/extensions/ops/interpolate.py

index 27f465a..0a0e9d2 100644 (file)
--- a/model-optimizer/extensions/ops/interpolate.py
+++ b/model-optimizer/extensions/ops/interpolate.py
@@ -26,6 +26,7 @@ class Interpolate(Op):
          mandatory_props = {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'axes': None,
              'mode': None,
diff --git a/model-optimizer/extensions/ops/lstm_cell.py b/model-optimizer/extensions/ops/lstm_cell.py

index aaae4c2..218266d 100644 (file)
--- a/model-optimizer/extensions/ops/lstm_cell.py
+++ b/model-optimizer/extensions/ops/lstm_cell.py
@@ -42,6 +42,7 @@ class LSTMCell(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'in_ports_count': 5,
              'out_ports_count': 2,
diff --git a/model-optimizer/extensions/ops/mvn.py b/model-optimizer/extensions/ops/mvn.py

index 80ae4ea..79cc286 100644 (file)
--- a/model-optimizer/extensions/ops/mvn.py
+++ b/model-optimizer/extensions/ops/mvn.py
@@ -29,6 +29,7 @@ class MVN(Op):
              'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset2',
              'eps': None,
              'across_channels': 0,
              'normalize_variance': 1,
diff --git a/model-optimizer/extensions/ops/non_max_suppression.py b/model-optimizer/extensions/ops/non_max_suppression.py

index 82b512d..66bca86 100644 (file)
--- a/model-optimizer/extensions/ops/non_max_suppression.py
+++ b/model-optimizer/extensions/ops/non_max_suppression.py
@@ -19,7 +19,9 @@ import numpy as np
  
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
  from mo.ops.op import Op
+from mo.utils.error import Error
  
  
  class NonMaxSuppression(Op):
@@ -29,7 +31,9 @@ class NonMaxSuppression(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset3',
              'infer': __class__.infer,
+            'output_type': np.int64,
              'center_point_box': 0,
              'box_encoding': 'corner',
              'in_ports_count': 5,
@@ -43,15 +47,16 @@ class NonMaxSuppression(Op):
  
      def supported_attrs(self):
          if self.ir_version < 10:
-            return [
-                'center_point_box',
-            ]
+            return ['center_point_box']
          else:
-            return [
-                'sort_result_descending',
-                'box_encoding'
-            ]
-
+            version = self.get_opset()
+            if version == 'opset3':
+                return ['sort_result_descending', 'box_encoding',
+                        ('output_type', lambda node: np_data_type_to_destination_type(node.output_type))]
+            elif version == 'opset1':
+                return ['sort_result_descending', 'box_encoding']
+            else:
+                raise Error('Unsupported operation opset version "{}"'.format(version))
  
      @staticmethod
      def infer(node: Node):
@@ -76,5 +81,10 @@ class NonMaxSuppression(Op):
  
      @staticmethod
      def type_infer(node):
-        node.out_port(0).set_data_type(np.int64 if node.graph.graph['cmd_params'].generate_experimental_IR_V10 else
-                                       np.int32)
+        if not node.graph.graph['cmd_params'].generate_experimental_IR_V10:
+            node.out_port(0).set_data_type(np.int32)
+        else:
+            if node.get_opset() == 'opset3':
+                node.out_port(0).set_data_type(node.output_type)
+            else:
+                node.out_port(0).set_data_type(np.int64)
diff --git a/model-optimizer/extensions/ops/non_max_suppression_test.py b/model-optimizer/extensions/ops/non_max_suppression_test.py

new file mode 100644 (file)

index 0000000..bf3c1ff
--- /dev/null
+++ b/model-optimizer/extensions/ops/non_max_suppression_test.py
@@ -0,0 +1,90 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.non_max_suppression import NonMaxSuppression
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, valued_const_with_data, result, \
+    connect, FakeAttr
+
+
+class TestNonMaxSuppressionInfer(unittest.TestCase):
+    def setUp(self):
+        nodes = {
+            **regular_op_with_shaped_data('boxes', [10, 100, 4], {'type': 'Parameter'}),
+            **regular_op_with_shaped_data('scores', [10, 5, 100], {'type': 'Parameter'}),
+            **valued_const_with_data('max_output_per_class', int64_array(10)),
+            **regular_op_with_shaped_data('nms', None, {'op': 'NonMaxSuppression', 'type': 'NonMaxSuppression',
+                                                        'name': 'nms'}),
+            **result('output'),
+        }
+
+        self.graph = build_graph(nodes, [
+            *connect('boxes', '0:nms'),
+            *connect('scores', '1:nms'),
+            *connect('max_output_per_class', '2:nms'),
+            *connect('nms', 'output'),
+        ], nodes_with_edges_only=True)
+
+    def test_nms_infer_v7(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=False, ir_version=7)
+
+        nms_node = Node(self.graph, 'nms')
+        nms_node['version'] = 'opset1'
+        NonMaxSuppression.infer(nms_node)
+        NonMaxSuppression.type_infer(nms_node)
+
+        self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+        self.assertTrue(nms_node.out_port(0).get_data_type() == np.int32)
+
+    def test_nms_infer_v10_opset1(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        nms_node = Node(self.graph, 'nms')
+        nms_node['version'] = 'opset1'
+        NonMaxSuppression.infer(nms_node)
+        NonMaxSuppression.type_infer(nms_node)
+
+        self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+        self.assertTrue(nms_node.out_port(0).get_data_type() == np.int64)
+
+    def test_nms_infer_v10_i64_opset3(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        nms_node = Node(self.graph, 'nms')
+        nms_node['version'] = 'opset3'
+        nms_node['output_type'] = np.int64
+        NonMaxSuppression.infer(nms_node)
+        NonMaxSuppression.type_infer(nms_node)
+
+        self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+        self.assertTrue(nms_node.out_port(0).get_data_type() == np.int64)
+
+    def test_nms_infer_v10_i32_opset3(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        nms_node = Node(self.graph, 'nms')
+        nms_node['version'] = 'opset3'
+        nms_node['output_type'] = np.int32
+        NonMaxSuppression.infer(nms_node)
+        NonMaxSuppression.type_infer(nms_node)
+
+        self.assertTrue(np.array_equal(nms_node.out_port(0).data.get_shape(), [100, 3]))
+        self.assertTrue(nms_node.out_port(0).get_data_type() == np.int32)
diff --git a/model-optimizer/extensions/ops/normalize.py b/model-optimizer/extensions/ops/normalize.py

index 429a54c..bbb47df 100644 (file)
--- a/model-optimizer/extensions/ops/normalize.py
+++ b/model-optimizer/extensions/ops/normalize.py
@@ -27,7 +27,6 @@ class NormalizeOp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
              'eps': None,
diff --git a/model-optimizer/extensions/ops/normalize_l2.py b/model-optimizer/extensions/ops/normalize_l2.py

index ffd9045..a0d19f1 100644 (file)
--- a/model-optimizer/extensions/ops/normalize_l2.py
+++ b/model-optimizer/extensions/ops/normalize_l2.py
@@ -25,9 +25,9 @@ class NormalizeL2Op(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'eps': None,
              'p': None,
              'eps_mode': None,
diff --git a/model-optimizer/extensions/ops/one_hot.py b/model-optimizer/extensions/ops/one_hot.py

index fa635bb..948390b 100644 (file)
--- a/model-optimizer/extensions/ops/one_hot.py
+++ b/model-optimizer/extensions/ops/one_hot.py
@@ -27,6 +27,7 @@ class OneHot(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'axis': -1,
              'infer': __class__.infer,
              'on_value': None,
diff --git a/model-optimizer/extensions/ops/parameter.py b/model-optimizer/extensions/ops/parameter.py

index dc5a4e0..a260b73 100644 (file)
--- a/model-optimizer/extensions/ops/parameter.py
+++ b/model-optimizer/extensions/ops/parameter.py
@@ -28,6 +28,7 @@ class Parameter(Op):
          mandatory_props = {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
              'is_input': True,
diff --git a/model-optimizer/extensions/ops/power_file.py b/model-optimizer/extensions/ops/power_file.py

index 5a8e46b..32b2012 100644 (file)
--- a/model-optimizer/extensions/ops/power_file.py
+++ b/model-optimizer/extensions/ops/power_file.py
@@ -27,6 +27,7 @@ class PowerFileOp(Op):
          super().__init__(graph, {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': copy_shape_infer
diff --git a/model-optimizer/extensions/ops/prediction_heatmap.py b/model-optimizer/extensions/ops/prediction_heatmap.py

index 80b8968..94eabb4 100644 (file)
--- a/model-optimizer/extensions/ops/prediction_heatmap.py
+++ b/model-optimizer/extensions/ops/prediction_heatmap.py
@@ -27,6 +27,7 @@ class PredictionHeatmapOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'extension',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': PredictionHeatmapOp.infer
diff --git a/model-optimizer/extensions/ops/prelu.py b/model-optimizer/extensions/ops/prelu.py

index 4ebe599..716af9a 100644 (file)
--- a/model-optimizer/extensions/ops/prelu.py
+++ b/model-optimizer/extensions/ops/prelu.py
@@ -30,6 +30,7 @@ class PreluOp(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
diff --git a/model-optimizer/extensions/ops/priorbox.py b/model-optimizer/extensions/ops/priorbox.py

index e925f58..e21d0f6 100644 (file)
--- a/model-optimizer/extensions/ops/priorbox.py
+++ b/model-optimizer/extensions/ops/priorbox.py
@@ -29,6 +29,7 @@ class PriorBoxOp(Op):
          mandatory_props = {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
              'flip': 1,
              'max_size': np.array([]),
              'min_size': np.array([]),
diff --git a/model-optimizer/extensions/ops/priorbox_clustered.py b/model-optimizer/extensions/ops/priorbox_clustered.py

index db7e914..3f9847e 100644 (file)
--- a/model-optimizer/extensions/ops/priorbox_clustered.py
+++ b/model-optimizer/extensions/ops/priorbox_clustered.py
@@ -29,6 +29,7 @@ class PriorBoxClusteredOp(Op):
          mandatory_props = {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
              'in_ports_count': 2,
              'out_ports_count': 1,
              'infer': self.priorbox_clustered_infer,
diff --git a/model-optimizer/extensions/ops/priorgridgenerator_onnx.py b/model-optimizer/extensions/ops/priorgridgenerator_onnx.py

index 676556e..176a1a3 100644 (file)
--- a/model-optimizer/extensions/ops/priorgridgenerator_onnx.py
+++ b/model-optimizer/extensions/ops/priorgridgenerator_onnx.py
@@ -26,6 +26,7 @@ class ExperimentalDetectronPriorGridGenerator(Op):
          mandatory_props = dict(
              type=__class__.op,
              op=__class__.op,
+            version='experimental',
              infer=__class__.infer,
          )
          super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/proposal.py b/model-optimizer/extensions/ops/proposal.py

index ba92ac1..afd25a7 100644 (file)
--- a/model-optimizer/extensions/ops/proposal.py
+++ b/model-optimizer/extensions/ops/proposal.py
@@ -27,6 +27,7 @@ class ProposalOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'post_nms_topn': 300,  # default in caffe-shared
              'infer': ProposalOp.proposal_infer,
              'in_ports_count': 3,
diff --git a/model-optimizer/extensions/ops/proposal_onnx.py b/model-optimizer/extensions/ops/proposal_onnx.py

index 637c857..3265d21 100644 (file)
--- a/model-optimizer/extensions/ops/proposal_onnx.py
+++ b/model-optimizer/extensions/ops/proposal_onnx.py
@@ -26,6 +26,7 @@ class ExperimentalDetectronGenerateProposalsSingleImage(Op):
          mandatory_props = dict(
              type=__class__.op,
              op=__class__.op,
+            version='experimental',
              infer=__class__.infer
          )
  
diff --git a/model-optimizer/extensions/ops/psroipooling.py b/model-optimizer/extensions/ops/psroipooling.py

index f2ab82d..a04152a 100644 (file)
--- a/model-optimizer/extensions/ops/psroipooling.py
+++ b/model-optimizer/extensions/ops/psroipooling.py
@@ -27,6 +27,7 @@ class PSROIPoolingOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset2',
              'mode': 'average',
              'in_ports_count': 2,
              'out_ports_count': 1,
@@ -83,6 +84,7 @@ class DeformablePSROIPoolingOp(PSROIPoolingOp):
          updated_attrs = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'mode': 'bilinear_deformable',
              'in_ports_count': 3,
              'trans_std': 0,
diff --git a/model-optimizer/extensions/ops/range.py b/model-optimizer/extensions/ops/range.py

index c3cc788..4bb80c0 100644 (file)
--- a/model-optimizer/extensions/ops/range.py
+++ b/model-optimizer/extensions/ops/range.py
@@ -29,6 +29,7 @@ class Range(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'in_ports_count': 3,
              'out_ports_count': 1,
              'infer': __class__.infer,
diff --git a/model-optimizer/extensions/ops/rank.py b/model-optimizer/extensions/ops/rank.py

index 867c2c0..c295758 100644 (file)
--- a/model-optimizer/extensions/ops/rank.py
+++ b/model-optimizer/extensions/ops/rank.py
@@ -13,6 +13,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
  
  from mo.graph.graph import Graph
  from mo.ops.op import Op
@@ -23,9 +24,12 @@ class Rank(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'op': __class__.op,
              'type': None,
+            'op': self.op,
+
+            'output_type': np.int64,
              'infer': None,
+
              'in_ports_count': 1,
              'out_ports_count': 1,
          }
diff --git a/model-optimizer/extensions/ops/regionyolo.py b/model-optimizer/extensions/ops/regionyolo.py

index 98c0827..bab2835 100644 (file)
--- a/model-optimizer/extensions/ops/regionyolo.py
+++ b/model-optimizer/extensions/ops/regionyolo.py
@@ -30,6 +30,7 @@ class RegionYoloOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': RegionYoloOp.regionyolo_infer
diff --git a/model-optimizer/extensions/ops/reorgyolo.py b/model-optimizer/extensions/ops/reorgyolo.py

index 58f2635..3665545 100644 (file)
--- a/model-optimizer/extensions/ops/reorgyolo.py
+++ b/model-optimizer/extensions/ops/reorgyolo.py
@@ -27,6 +27,7 @@ class ReorgYoloOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset2',
              'infer': ReorgYoloOp.reorgyolo_infer
          }
          super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/reverse_sequence.py b/model-optimizer/extensions/ops/reverse_sequence.py

index 1c8310e..f87ab3c 100644 (file)
--- a/model-optimizer/extensions/ops/reverse_sequence.py
+++ b/model-optimizer/extensions/ops/reverse_sequence.py
@@ -24,6 +24,7 @@ class ReverseSequence(Op):
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
              'type': __class__.op,
+            'version': 'opset1',
              'seq_axis': None,
              'batch_axis': 0,
              'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/roifeatureextractor_onnx.py b/model-optimizer/extensions/ops/roifeatureextractor_onnx.py

index 255fff0..87d6a06 100644 (file)
--- a/model-optimizer/extensions/ops/roifeatureextractor_onnx.py
+++ b/model-optimizer/extensions/ops/roifeatureextractor_onnx.py
@@ -26,6 +26,7 @@ class ExperimentalDetectronROIFeatureExtractor(Op):
          mandatory_props = dict(
              type=__class__.op,
              op=__class__.op,
+            version='experimental',
              infer=__class__.infer,
              in_ports_count=5,
              out_ports_count=2,
diff --git a/model-optimizer/extensions/ops/select.py b/model-optimizer/extensions/ops/select.py

index 4d64b39..8e7f4dc 100644 (file)
--- a/model-optimizer/extensions/ops/select.py
+++ b/model-optimizer/extensions/ops/select.py
@@ -28,6 +28,7 @@ class Select(Op):
          mandatory_props = {
              'op': __class__.op,
              'type': __class__.op,
+            'version': 'opset1',
              'in_ports_count': 3,
              'out_ports_count': 1,
              'infer': __class__.infer,
diff --git a/model-optimizer/extensions/ops/simplernms.py b/model-optimizer/extensions/ops/simplernms.py

index af7490a..4400ce5 100644 (file)
--- a/model-optimizer/extensions/ops/simplernms.py
+++ b/model-optimizer/extensions/ops/simplernms.py
@@ -30,6 +30,7 @@ class SimplerNMSOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'in_ports_count': 3,
              'out_ports_count': 1,
              'infer': SimplerNMSOp.simplernms_infer
diff --git a/model-optimizer/extensions/ops/size.py b/model-optimizer/extensions/ops/size.py

index cb853af..282af22 100644 (file)
--- a/model-optimizer/extensions/ops/size.py
+++ b/model-optimizer/extensions/ops/size.py
@@ -14,25 +14,41 @@
   limitations under the License.
  """
  
-import networkx as nx
  import numpy as np
  
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
  
  
  class Size(Op):
      op = 'Size'
  
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
+        assert 'output_type' in attrs, 'Size has mandatory `output_type` attribute'
+
          mandatory_props = {
-            'op': __class__.op,
-            'infer': __class__.infer,
+            'type': None,
+            'op': self.op,
+
+            'output_type': np.int64,
+            'infer': self.infer,
          }
          super().__init__(graph, mandatory_props, attrs)
  
      @staticmethod
      def infer(node: Node):
-        size = np.prod(node.in_node().shape)
-        value = np.array(size, dtype=np.int)
-        node.out_node().shape = np.array(value.shape, dtype=np.int64)
+        name = node.soft_get('name', node.id)
+        connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+        assert len(connected_in_ports) == 1, \
+            'Size operation should have exact one input node, but it has {}'.format(len(connected_in_ports))
+
+        input_shape = node.in_port(0).data.get_shape()
+        assert input_shape is not None, \
+            'Input shape is undefined for Size node `{}`'.format(node.soft_get('name', node.id))
+
+        assert node.has_valid('output_type'), \
+            '`output_type` attribute is not set for Size node `{}`'.format(name)
+        assert node.output_type in [np.int64, np.int32], \
+            'Size `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+
+        node.out_port(0).data.set_value(np.array(np.prod(input_shape), dtype=node.output_type))
diff --git a/model-optimizer/extensions/ops/slice_like.py b/model-optimizer/extensions/ops/slice_like.py

new file mode 100644 (file)

index 0000000..ed02881
--- /dev/null
+++ b/model-optimizer/extensions/ops/slice_like.py
@@ -0,0 +1,54 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class SliceLike(Op):
+    op = 'slice_like'
+    enabled = True
+
+    def __init__(self, graph: Graph, attrs: dict):
+        assert 'axes' in attrs, 'Please set mandatory `axes` attribute for `slice_like` operation'
+        super().__init__(graph, {
+            'type': None,
+            'op': self.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
+            'infer': self.infer,
+        }, attrs)
+
+    @staticmethod
+    def infer(node):
+        input_shape = node.in_port(0).data.get_shape()
+        shape_like = node.in_port(1).data.get_shape()
+
+        new_shape = np.copy(input_shape)
+        if node.axes is not None:
+            node.axes = sorted([get_canonical_axis_index(input_shape, i) for i in node.axes])
+            for i in node.axes:
+                new_shape[i] = shape_like[i]
+        else:
+            assert input_shape.size == shape_like.size,\
+                'Input shape ranks are inconsistent: {} and {}'.format(input_shape.size, shape_like.size)
+            node.axes = int64_array(range(shape_like.size))
+            new_shape = np.copy(shape_like)
+
+        node.out_port(0).data.set_shape(new_shape)
diff --git a/model-optimizer/extensions/ops/slice_like_test.py b/model-optimizer/extensions/ops/slice_like_test.py

new file mode 100644 (file)

index 0000000..dc1abd8
--- /dev/null
+++ b/model-optimizer/extensions/ops/slice_like_test.py
@@ -0,0 +1,71 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+
+from extensions.ops.slice_like import SliceLike
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+    'input_data': {'kind': 'data', 'shape': int64_array([3, 4]), 'value': None},
+    'shape_like_data': {'kind': 'data', 'shape': int64_array([2, 3]), 'value': None},
+    'slice_like': {'kind': 'op', 'op': 'slice_data'},
+    'out_data': {'kind': 'data', 'shape': None, 'value': None}
+}
+
+edges = [
+    ('input_data', 'slice_like', {'in': 0}),
+    ('shape_like_data', 'slice_like', {'in': 1}),
+    ('slice_like', 'out_data')
+]
+
+
+class SliceLikeTest(unittest.TestCase):
+
+    def test_1(self):
+        graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': None}})
+        slice_like = Node(graph, 'slice_like')
+        SliceLike.infer(slice_like)
+        ref_shape = int64_array([2, 3])
+        res_shape = graph.node['out_data']['shape']
+        self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+    def test_2(self):
+        graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (0, 1)}})
+        slice_like = Node(graph, 'slice_like')
+        SliceLike.infer(slice_like)
+        ref_shape = int64_array([2, 3])
+        res_shape = graph.node['out_data']['shape']
+        self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+    def test_3(self):
+        graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (0,)}})
+        slice_like = Node(graph, 'slice_like')
+        SliceLike.infer(slice_like)
+        ref_shape = int64_array([2, 4])
+        res_shape = graph.node['out_data']['shape']
+        self.assertTrue(np.array_equal(res_shape, ref_shape))
+
+    def test_4(self):
+        graph = build_graph(nodes_attributes, edges, {'slice_like': {'axes': (-1,)}})
+        slice_like = Node(graph, 'slice_like')
+        SliceLike.infer(slice_like)
+        ref_shape = int64_array([3, 3])
+        res_shape = graph.node['out_data']['shape']
+        self.assertTrue(np.array_equal(res_shape, ref_shape))
diff --git a/model-optimizer/extensions/ops/space_to_depth.py b/model-optimizer/extensions/ops/space_to_depth.py

index f91d365..4f0a724 100644 (file)
--- a/model-optimizer/extensions/ops/space_to_depth.py
+++ b/model-optimizer/extensions/ops/space_to_depth.py
@@ -30,6 +30,7 @@ class SpaceToDepth(Op):
          mandatory_props = {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'mode': 'blocks_first',
  
diff --git a/model-optimizer/extensions/ops/sparse_fill_empty_rows.py b/model-optimizer/extensions/ops/sparse_fill_empty_rows.py

index 4f76e5e..012fa40 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_fill_empty_rows.py
+++ b/model-optimizer/extensions/ops/sparse_fill_empty_rows.py
@@ -43,6 +43,7 @@ class SparseFillEmptyRows(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 4,
              'out_ports_count': 3
diff --git a/model-optimizer/extensions/ops/sparse_reshape.py b/model-optimizer/extensions/ops/sparse_reshape.py

index 7866863..2a2c75c 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_reshape.py
+++ b/model-optimizer/extensions/ops/sparse_reshape.py
@@ -28,8 +28,7 @@ class SparseReshape(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          mandatory_props = {
-            'kind': 'op',
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'infer': self.infer,
              'in_ports_count': 3,
@@ -44,7 +43,6 @@ class SparseReshape(Op):
      def infer(node: Node):
          input_indices_shape = node.in_port(0).data.get_shape()
          input_shape_value = node.in_port(1).data.get_value()
-        input_shape_shape = node.in_port(1).data.get_shape()
          new_shape_value = node.in_port(2).data.get_value()
          new_shape_shape = node.in_port(2).data.get_shape()
  
@@ -62,4 +60,4 @@ class SparseReshape(Op):
          output_indices_shape = np.concatenate((input_indices_shape[0:1], new_shape_shape))
          node.out_port(0).data.set_shape(output_indices_shape)
  
-        #TODO: implement for constant input indices value
+        # TODO: implement for constant input indices value
diff --git a/model-optimizer/extensions/ops/sparse_segment_mean.py b/model-optimizer/extensions/ops/sparse_segment_mean.py

index 8eecc7f..80e2c8d 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_mean.py
+++ b/model-optimizer/extensions/ops/sparse_segment_mean.py
@@ -40,6 +40,7 @@ class SparseSegmentMean(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 3,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/sparse_segment_sqrtn.py b/model-optimizer/extensions/ops/sparse_segment_sqrtn.py

index 84473c9..f2b8361 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_sqrtn.py
+++ b/model-optimizer/extensions/ops/sparse_segment_sqrtn.py
@@ -40,6 +40,7 @@ class SparseSegmentSqrtN(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 3,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/sparse_segment_sum.py b/model-optimizer/extensions/ops/sparse_segment_sum.py

index 47989ab..18db477 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_sum.py
+++ b/model-optimizer/extensions/ops/sparse_segment_sum.py
@@ -40,6 +40,7 @@ class SparseSegmentSum(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 3,
              'out_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/sparse_to_dense.py b/model-optimizer/extensions/ops/sparse_to_dense.py

index bfb3f36..50867cc 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_to_dense.py
+++ b/model-optimizer/extensions/ops/sparse_to_dense.py
@@ -39,9 +39,9 @@ class SparseToDense(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'type_infer': self.type_infer,
              'infer': self.infer,
              'in_ports_count': 4,
diff --git a/model-optimizer/extensions/ops/sparse_weighted_sum.py b/model-optimizer/extensions/ops/sparse_weighted_sum.py

index ad7b275..ff76b21 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_weighted_sum.py
+++ b/model-optimizer/extensions/ops/sparse_weighted_sum.py
@@ -25,15 +25,14 @@ class ExperimentalSparseWeightedSum(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'reduce_op': None,
              'type_infer': self.type_infer,
              'infer': self.infer,
              'in_ports_count': 6,
              'out_ports_count': 1,
-            'version': 'experimental',
          }, attrs)
  
      def supported_attrs(self):
diff --git a/model-optimizer/extensions/ops/spatial_transformer.py b/model-optimizer/extensions/ops/spatial_transformer.py

index 49888ef..62f3ffe 100644 (file)
--- a/model-optimizer/extensions/ops/spatial_transformer.py
+++ b/model-optimizer/extensions/ops/spatial_transformer.py
@@ -27,6 +27,7 @@ class SpatialTransformOp(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'in_ports_count': 1,
              'out_ports_count': 1,
              'infer': SpatialTransformOp.sp_infer
diff --git a/model-optimizer/extensions/ops/split.py b/model-optimizer/extensions/ops/split.py

index 23935ed..5cefb61 100644 (file)
--- a/model-optimizer/extensions/ops/split.py
+++ b/model-optimizer/extensions/ops/split.py
@@ -164,6 +164,7 @@ class AttributedVariadicSplit(VariadicSplitBase):
          super().__init__(graph, {
              'op': self.op,
              'type': 'VariadicSplit',
+            'version': 'opset1',
  
              'infer': self.infer,
  
@@ -236,6 +237,7 @@ class Split(SplitBase):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
@@ -261,6 +263,7 @@ class AttributedSplit(SplitBase):
          super().__init__(graph, {
              'op': self.op,
              'type': 'Split',
+            'version': 'opset1',
  
              'axis': 1,
  
diff --git a/model-optimizer/extensions/ops/stop_gradient.py b/model-optimizer/extensions/ops/stop_gradient.py

index 45df1af..5137887 100644 (file)
--- a/model-optimizer/extensions/ops/stop_gradient.py
+++ b/model-optimizer/extensions/ops/stop_gradient.py
@@ -25,7 +25,7 @@ class StopGradientOp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'type': __class__.op,
+            'type': None,
              'op': __class__.op,
              'identity': True,
              'in_ports_count': 1,
diff --git a/model-optimizer/extensions/ops/tensor_iterator.py b/model-optimizer/extensions/ops/tensor_iterator.py

index 49fbd6a..5d43ffb 100644 (file)
--- a/model-optimizer/extensions/ops/tensor_iterator.py
+++ b/model-optimizer/extensions/ops/tensor_iterator.py
@@ -34,6 +34,7 @@ class TensorIterator(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'input_port_map': [],  # a list of dicts with such attrs as external_port_id, etc.
              'output_port_map': [],  # a list of dicts with such attrs as external_port_id, etc.
              'back_edges': [],  # a list of dicts with such attrs as from_layer, from_port, etc.
diff --git a/model-optimizer/extensions/ops/topk.py b/model-optimizer/extensions/ops/topk.py

index cde7c67..dd39d2b 100644 (file)
--- a/model-optimizer/extensions/ops/topk.py
+++ b/model-optimizer/extensions/ops/topk.py
@@ -17,6 +17,7 @@
  import numpy as np
  
  from mo.graph.graph import Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
  from mo.ops.op import Op, PermuteAttrs
  from mo.utils.error import Error
  
@@ -29,8 +30,11 @@ class TopK(Op):
          super().__init__(graph, {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset3',
              'infer': self.infer,
              'type_infer': self.type_infer,
+
+            'index_element_type': np.int32,
              'axis': None,
              'mode': 'max',
              'sort': 'none',
@@ -41,7 +45,14 @@ class TopK(Op):
          }, attrs)
  
      def backend_attrs(self):
-        return ['axis', 'mode', 'sort']
+        version = self.get_opset()
+        if version == 'opset3':
+            return ['axis', 'mode', 'sort',
+                    ('index_element_type', lambda node: np_data_type_to_destination_type(node.index_element_type))]
+        elif version == 'opset1':
+            return ['axis', 'mode', 'sort']
+        else:
+            raise Error('Unknown opset version "{}"'.format(version))
  
      @staticmethod
      def infer(node):
@@ -75,4 +86,7 @@ class TopK(Op):
      @staticmethod
      def type_infer(node):
          node.out_port(0).set_data_type(node.in_port(0).get_data_type())
-        node.out_port(1).set_data_type(np.int32)
+        if node.get_opset() == 'opset3':
+            node.out_port(1).set_data_type(node.index_element_type)
+        else:
+            node.out_port(1).set_data_type(np.int32)
diff --git a/model-optimizer/extensions/ops/topk_test.py b/model-optimizer/extensions/ops/topk_test.py

new file mode 100644 (file)

index 0000000..f069622
--- /dev/null
+++ b/model-optimizer/extensions/ops/topk_test.py
@@ -0,0 +1,101 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.topk import TopK
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph, regular_op_with_shaped_data, valued_const_with_data, result, \
+    connect, FakeAttr
+
+
+class TestTopKInfer(unittest.TestCase):
+    def setUp(self):
+        nodes = {
+            **regular_op_with_shaped_data('data', [20, 100, 4], {'type': 'Parameter', 'value': None,
+                                                                 '_out_port_data_type': {0: np.float32}}),
+            **valued_const_with_data('k', int64_array(10)),
+            **regular_op_with_shaped_data('topk', None, {'op': 'TopK', 'type': 'TopK', 'name': 'topk', 'axis': 1}),
+            'topk_d2': {'kind': 'data', 'shape': None, 'value': None},
+            **result('output_1'),
+            **result('output_2'),
+        }
+
+        self.graph = build_graph(nodes, [
+            *connect('data', '0:topk'),
+            *connect('k', '1:topk'),
+            ('topk', 'topk_d', {'out': 0}),
+            ('topk', 'topk_d2', {'out': 1}),
+            ('topk_d', 'output_1'),
+            ('topk_d2', 'output_2'),
+        ], nodes_with_edges_only=True)
+
+    def test_topk_infer_v7(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=False, ir_version=7)
+
+        topk_node = Node(self.graph, 'topk')
+        topk_node['version'] = 'opset1'
+        TopK.infer(topk_node)
+        TopK.type_infer(topk_node)
+
+        self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+        self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
+
+    def test_topk_infer_v10_opset1(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        topk_node = Node(self.graph, 'topk')
+        topk_node['version'] = 'opset1'
+        TopK.infer(topk_node)
+        TopK.type_infer(topk_node)
+
+        self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+        self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
+
+    def test_topk_infer_v10_i64_opset3(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        topk_node = Node(self.graph, 'topk')
+        topk_node['version'] = 'opset3'
+        topk_node['index_element_type'] = np.int64
+        TopK.infer(topk_node)
+        TopK.type_infer(topk_node)
+
+        self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+        self.assertTrue(topk_node.out_port(1).get_data_type() == np.int64)
+
+    def test_topk_infer_v10_i32_opset3(self):
+        self.graph.graph['cmd_params'] = FakeAttr(generate_experimental_IR_V10=True, ir_version=10)
+
+        topk_node = Node(self.graph, 'topk')
+        topk_node['version'] = 'opset3'
+        topk_node['index_element_type'] = np.int32
+        TopK.infer(topk_node)
+        TopK.type_infer(topk_node)
+
+        self.assertTrue(np.array_equal(topk_node.out_port(0).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(np.array_equal(topk_node.out_port(1).data.get_shape(), int64_array([20, 10, 4])))
+        self.assertTrue(topk_node.out_port(0).get_data_type() == np.float32)
+        self.assertTrue(topk_node.out_port(1).get_data_type() == np.int32)
diff --git a/model-optimizer/extensions/ops/topkrois_onnx.py b/model-optimizer/extensions/ops/topkrois_onnx.py

index 43b3ed7..126585e 100644 (file)
--- a/model-optimizer/extensions/ops/topkrois_onnx.py
+++ b/model-optimizer/extensions/ops/topkrois_onnx.py
@@ -26,6 +26,7 @@ class ExperimentalDetectronTopKROIs(Op):
          mandatory_props = dict(
              type=__class__.op,
              op=__class__.op,
+            version='experimental',
              infer=__class__.infer
          )
          super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/transpose.py b/model-optimizer/extensions/ops/transpose.py

index 63807c5..930a57b 100644 (file)
--- a/model-optimizer/extensions/ops/transpose.py
+++ b/model-optimizer/extensions/ops/transpose.py
@@ -29,6 +29,7 @@ class Transpose(Op):
          super().__init__(graph, {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': self.infer,
              'force_precision_in_ports': {1: 'int64'},
              'in_ports_count': 2,
diff --git a/model-optimizer/extensions/ops/unique.py b/model-optimizer/extensions/ops/unique.py

index 38916f1..35b367f 100644 (file)
--- a/model-optimizer/extensions/ops/unique.py
+++ b/model-optimizer/extensions/ops/unique.py
@@ -47,6 +47,7 @@ class Unique(Op):
          mandatory_props = {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'experimental',
              'infer': __class__.infer,
              'in_ports_count': 1,
              'out_ports_count': 3
diff --git a/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg

new file mode 100644 (file)

index 0000000..ace2a1d

Binary files /dev/null and b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg differ
diff --git a/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg

new file mode 100644 (file)

index 0000000..84c0d90

Binary files /dev/null and b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg differ
diff --git a/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg

new file mode 100644 (file)

index 0000000..f6addfe

Binary files /dev/null and b/model-optimizer/install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg differ
diff --git a/model-optimizer/mo/front/extractor.py b/model-optimizer/mo/front/extractor.py

index c54845a..21129f7 100644 (file)
--- a/model-optimizer/mo/front/extractor.py
+++ b/model-optimizer/mo/front/extractor.py
@@ -749,7 +749,7 @@ def extract_node_attrs(graph: Graph, extractor: callable):
                  raise Error(
                      'Unexpected exception happened during extracting attributes for node {}.' +
                      '\nOriginal exception message: {}',
-                    new_attrs['name'] if 'name' in new_attrs else '<UNKNOWN>',
+                    node,
                      str(e)
                  ) from e
          if supported:
diff --git a/model-optimizer/mo/graph/graph.py b/model-optimizer/mo/graph/graph.py

index 32b70c3..e79eb12 100644 (file)
--- a/model-optimizer/mo/graph/graph.py
+++ b/model-optimizer/mo/graph/graph.py
@@ -51,6 +51,9 @@ class Node:
          attrs = self.graph.node[self.node]
          if not k in attrs:
              raise AttributeError("Attribute {} missing in {} node".format(k, self.name))
+        if k == 'version' and attrs.get(k, v) != v:
+            raise AttributeError("Attribute 'version' cannot be updated in {} node".format(self.name))
+
          attrs[k] = v
  
      def __getattr__(self, k):
@@ -60,6 +63,8 @@ class Node:
          return self.graph.node[self.node][k]
  
      def __setitem__(self, k, v):
+        if k == 'version' and self.graph.node[self.node].get(k, v) != v:
+            raise AttributeError("Attribute 'version' cannot be updated in {} node".format(self.name))
          self.graph.node[self.node][k] = v
  
      def __contains__(self, k):
@@ -534,6 +539,14 @@ class Node:
                  if idx not in self._out_ports:
                      self.add_output_port(idx=idx)
  
+    def get_opset(self):
+        """
+        Gets the operation set version where the operation was introduced.
+        If the version is not defined then consider it an extension
+        :return: the string with the opset name
+        """
+        return self.soft_get('version', 'extension')
+
  
  class Graph(nx.MultiDiGraph):
      def __init__(self, data=None, **attr):
diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py

index 92b5edc..bc74187 100644 (file)
--- a/model-optimizer/mo/main.py
+++ b/model-optimizer/mo/main.py
@@ -26,6 +26,7 @@ import numpy as np
  
  from extensions.back.SpecialNodesFinalization import RemoveConstOps, CreateConstNodesReplacement, RemoveOutputOps, \
      NormalizeTI
+from mo.utils.get_ov_update_message import get_ov_update_message
  from mo.graph.graph import Graph
  from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively, for_each_sub_graph_recursively
  from mo.pipeline.common import prepare_emit_ir, get_ir_version
@@ -304,7 +305,14 @@ def main(cli_parser: argparse.ArgumentParser, framework: str):
          if argv.generate_deprecated_IR_V7:
              from mo.middle.passes.convert_data_type import SUPPORTED_DATA_TYPES
              SUPPORTED_DATA_TYPES['bool'] = (np.bool, 'I32', 'boolean')
-        return driver(argv)
+
+        ov_update_message = None
+        if not hasattr(argv, 'silent') or not argv.silent:
+            ov_update_message = get_ov_update_message()
+        ret_code = driver(argv)
+        if ov_update_message:
+            print(ov_update_message)
+        return ret_code
      except (FileNotFoundError, NotADirectoryError) as e:
          log.error('File {} was not found'.format(str(e).split('No such file or directory:')[1]))
          log.debug(traceback.format_exc())
diff --git a/model-optimizer/mo/middle/passes/convert_data_type.py b/model-optimizer/mo/middle/passes/convert_data_type.py

index a687ccb..4fdc254 100644 (file)
--- a/model-optimizer/mo/middle/passes/convert_data_type.py
+++ b/model-optimizer/mo/middle/passes/convert_data_type.py
@@ -171,8 +171,5 @@ def convert_blobs(graph: Graph, data_type_str: str):
              try:
                  if node.value.dtype in [np.float32, np.float64, np.float16] and not node.has_and_set('correct_data_type'):
                      convert_node_blobs(graph, node, data_type_str_to_np(data_type_str))
-                # convert all I64 to I32 since plugins don't support I64:
-                if node.value.dtype == np.int64:
-                    convert_node_blobs(graph, node, np.int32)
              except Exception as e:
                  raise Error('Coudn\'t convert blob {}, details: {}', node.soft_get('name'), e) from e
diff --git a/model-optimizer/mo/ops/broadcast.py b/model-optimizer/mo/ops/broadcast.py

index 5ad53d5..119f7ef 100644 (file)
--- a/model-optimizer/mo/ops/broadcast.py
+++ b/model-optimizer/mo/ops/broadcast.py
@@ -14,11 +14,12 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from mo.graph.graph import Node, Graph
  from mo.graph.perm_inputs import PermuteInputs
  from mo.ops.op import Op
+from mo.utils.broadcasting import bi_directional_shape_broadcasting, uni_directional_shape_broadcasting, \
+    uni_directional_broadcasting, bi_directional_broadcasting
+from mo.utils.error import Error
  
  
  class Broadcast(Op):
@@ -27,7 +28,7 @@ class Broadcast(Op):
          Inputs:
              [0] - tensor to be broadcasted
              [1] - shape to be broadcast to
-            [2] - optional axis paramater that which axis are allowed to be broadcasted
+            [2] - optional axis parameter that which axis are allowed to be broadcasted
      """
  
      op = 'Broadcast'
@@ -35,9 +36,10 @@ class Broadcast(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset3',
+            'mode': 'numpy',
              'in_ports_count': 3,
              'out_ports_count': 1,
              'force_precision_in_ports':
@@ -47,17 +49,30 @@ class Broadcast(Op):
              'infer': __class__.infer,
          }, attrs)
  
+    def supported_attrs(self):
+        return ['mode']
+
      @staticmethod
      def infer(node: Node):
-        # TODO Add necessary checks and asserts
-        b_value = node.in_port(0).data.get_value()
-        b_shape = node.in_port(1).data.get_value()
-        assert b_shape is not None
-        node.out_port(0).data.set_shape(b_shape)
+        node_name = node.soft_get('name', node.id)
  
-        PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape')
+        input_shape = node.in_port(0).data.get_shape()
+        input_value = node.in_port(0).data.get_value()
+        target_shape = node.in_port(1).data.get_value()
+        assert target_shape is not None, 'Output shape is not defined for node "{}"'.format(node_name)
+        assert node.has_and_set('mode'), 'Broadcasting mode is not defined for node "{}"'.format(node_name)
  
-        if b_value is not None and not node.has_and_set('stop_value_propagation'):
-            new_value = np.broadcast_to(b_value, b_shape)
-            node.out_port(0).data.set_value(new_value)
+        if node.mode == 'numpy':
+            node.out_port(0).data.set_shape(uni_directional_shape_broadcasting(input_shape, target_shape))
+        elif node.mode == 'bidirectional':
+            node.out_port(0).data.set_shape(bi_directional_shape_broadcasting(input_shape, target_shape))
+        else:
+            raise Error('The node "{}" has unsupported mode "{}"'.format(node_name, node.mode))
+
+        PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape')
  
+        if input_value is not None and not node.has_and_set('stop_value_propagation'):
+            if node.mode == 'numpy':
+                node.out_port(0).data.set_value(uni_directional_broadcasting(input_value, target_shape))
+            elif node.mode == 'bidirectional':
+                node.out_port(0).data.set_value(bi_directional_broadcasting(input_value, target_shape))
diff --git a/model-optimizer/mo/ops/clamp.py b/model-optimizer/mo/ops/clamp.py

index d848a25..8205bc9 100644 (file)
--- a/model-optimizer/mo/ops/clamp.py
+++ b/model-optimizer/mo/ops/clamp.py
@@ -24,9 +24,9 @@ class Clamp(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': copy_shape_infer,
              'in_ports_count': 1,
              'out_ports_count': 1,
diff --git a/model-optimizer/mo/ops/concat.py b/model-optimizer/mo/ops/concat.py

index 5d59f8d..1026943 100644 (file)
--- a/model-optimizer/mo/ops/concat.py
+++ b/model-optimizer/mo/ops/concat.py
@@ -26,6 +26,7 @@ class Concat(Op):
          super().__init__(graph, {
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'axis': 1,
              'infer': concat_infer,
              'out_ports_count': 1,
diff --git a/model-optimizer/mo/ops/const.py b/model-optimizer/mo/ops/const.py

index 2634b4e..1fcd532 100644 (file)
--- a/model-optimizer/mo/ops/const.py
+++ b/model-optimizer/mo/ops/const.py
@@ -31,6 +31,7 @@ class Const(Op):
          super().__init__(graph, {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
              'infer': self.infer,
              'value': None,
              'shape': None,
diff --git a/model-optimizer/mo/ops/convolution.py b/model-optimizer/mo/ops/convolution.py

index 47371a6..8d85a7f 100644 (file)
--- a/model-optimizer/mo/ops/convolution.py
+++ b/model-optimizer/mo/ops/convolution.py
@@ -32,9 +32,9 @@ class Convolution(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'multiplication_transparent': True,
              'multiplication_transparent_ports': [(0, 0), (1, 0)],
diff --git a/model-optimizer/mo/ops/crop.py b/model-optimizer/mo/ops/crop.py

index df6f811..55ddcea 100644 (file)
--- a/model-optimizer/mo/ops/crop.py
+++ b/model-optimizer/mo/ops/crop.py
@@ -28,7 +28,6 @@ class Crop(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
              'infer': __class__.infer,
diff --git a/model-optimizer/mo/ops/deconvolution.py b/model-optimizer/mo/ops/deconvolution.py

index 2fc8251..8733c9a 100644 (file)
--- a/model-optimizer/mo/ops/deconvolution.py
+++ b/model-optimizer/mo/ops/deconvolution.py
@@ -30,9 +30,9 @@ class Deconvolution(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'in_ports_count': 3,
              'out_ports_count': 1,
@@ -155,4 +155,4 @@ class Deconvolution(Op):
              node.graph.remove_edge(node.in_node(2).id, node.id)
              node['shape_input'] = False
          else:
-            node['force_precision_in_ports'] = {2: 'int64'}
-\ No newline at end of file
+            node['force_precision_in_ports'] = {2: 'int64'}
diff --git a/model-optimizer/mo/ops/deformable_convolution.py b/model-optimizer/mo/ops/deformable_convolution.py

index 1fca42d..e2e3f43 100644 (file)
--- a/model-optimizer/mo/ops/deformable_convolution.py
+++ b/model-optimizer/mo/ops/deformable_convolution.py
@@ -24,9 +24,9 @@ class DeformableConvolution(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': Convolution.infer,
              'multiplication_transparent': True,
              'multiplication_transparent_ports': [(0, 0), (2, 0)],
diff --git a/model-optimizer/mo/ops/lrn.py b/model-optimizer/mo/ops/lrn.py

index 6ad2311..7f319a5 100644 (file)
--- a/model-optimizer/mo/ops/lrn.py
+++ b/model-optimizer/mo/ops/lrn.py
@@ -34,6 +34,7 @@ class LRN(Op):
          super().__init__(graph, {
              'type': self.op,
              'op': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
@@ -70,6 +71,7 @@ class AttributedLRN(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': 'Norm',
+            'version': 'opset1',
  
              'bias': 1,
              'region': 'across',
diff --git a/model-optimizer/mo/ops/op.py b/model-optimizer/mo/ops/op.py

index 2f95226..8282ff6 100644 (file)
--- a/model-optimizer/mo/ops/op.py
+++ b/model-optimizer/mo/ops/op.py
@@ -297,6 +297,15 @@ class Op(object):
              node[k] = v
          node.update_node()
  
+    def get_opset(self):
+        """
+        Gets the operation set version where the operation was introduced.
+        If the version is not defined then consider it an extension
+        :return: the string with the opset name
+        """
+        return self.attrs.get('version', 'extension')
+
+
      @classmethod
      def update_node_stat(cls, node: Node, attrs: dict = None):
          if attrs is None:
diff --git a/model-optimizer/mo/ops/pad.py b/model-optimizer/mo/ops/pad.py

index cf1ab65..d4a5f8d 100644 (file)
--- a/model-optimizer/mo/ops/pad.py
+++ b/model-optimizer/mo/ops/pad.py
@@ -37,6 +37,7 @@ class Pad(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'in_ports_count': 4,
              'out_ports_count': 1,
diff --git a/model-optimizer/mo/ops/pooling.py b/model-optimizer/mo/ops/pooling.py

index 1266666..8b098a4 100644 (file)
--- a/model-optimizer/mo/ops/pooling.py
+++ b/model-optimizer/mo/ops/pooling.py
@@ -30,9 +30,9 @@ class Pooling(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
              'infer': __class__.infer,
              'in_ports_count': 1,
              'out_ports_count': 1,
diff --git a/model-optimizer/mo/ops/reshape.py b/model-optimizer/mo/ops/reshape.py

index a889d27..5532953 100644 (file)
--- a/model-optimizer/mo/ops/reshape.py
+++ b/model-optimizer/mo/ops/reshape.py
@@ -28,6 +28,7 @@ class Reshape(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
diff --git a/model-optimizer/mo/ops/result.py b/model-optimizer/mo/ops/result.py

index 6f26b57..32d9af0 100644 (file)
--- a/model-optimizer/mo/ops/result.py
+++ b/model-optimizer/mo/ops/result.py
@@ -29,6 +29,7 @@ class Result(Op):
          super().__init__(graph, {
              'op': __class__.op,
              'type': __class__.op,
+            'version': 'opset1',
              'infer': lambda x: None,
              'value': None,
              'data_type': None,
diff --git a/model-optimizer/mo/ops/roipooling.py b/model-optimizer/mo/ops/roipooling.py

index d4dbfdf..7139539 100644 (file)
--- a/model-optimizer/mo/ops/roipooling.py
+++ b/model-optimizer/mo/ops/roipooling.py
@@ -24,11 +24,12 @@ class ROIPooling(Op):
  
      def __init__(self, graph, attrs: dict):
          super().__init__(graph, {
+            'type': __class__.op,
+            'op': __class__.op,
+            'version': 'opset2',
              'pooled_h': None,
              'pooled_w': None,
              'spatial_scale': 0.0625,
-            'type': __class__.op,
-            'op': __class__.op,
              'infer': roipooling_infer,
              'in_ports_count': 2,
              'out_ports_count': 1,
diff --git a/model-optimizer/mo/ops/scale_shift.py b/model-optimizer/mo/ops/scale_shift.py

index dfeda12..2c2ef87 100644 (file)
--- a/model-optimizer/mo/ops/scale_shift.py
+++ b/model-optimizer/mo/ops/scale_shift.py
@@ -26,7 +26,6 @@ class ScaleShiftOp(Op):
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
              'infer': copy_shape_infer,
-            'kind': 'op',
              'type': __class__.op,
              'op': __class__.op,
              'in_ports_count': 3,
diff --git a/model-optimizer/mo/ops/shape.py b/model-optimizer/mo/ops/shape.py

index 5819c54..741eeef 100644 (file)
--- a/model-optimizer/mo/ops/shape.py
+++ b/model-optimizer/mo/ops/shape.py
@@ -14,12 +14,12 @@
   limitations under the License.
  """
  
-import logging as log
-
  import numpy as np
  
  from mo.graph.graph import Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
  from mo.ops.op import Op
+from mo.utils.error import Error
  
  
  class Shape(Op):
@@ -28,37 +28,51 @@ class Shape(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'type': __class__.op,
-            'op': __class__.op,
-            'infer': __class__.infer,
+            'type': self.op,
+            'op': self.op,
+            'version': 'opset3',
+
+            'output_type': np.int64,
+            'infer': self.infer,
              'type_infer': self.type_infer,
+
              'in_ports_count': 1,
              'out_ports_count': 1,
          }, attrs)
  
-    def supported_attrs(self):
-        return []
+    def backend_attrs(self):
+        version = self.get_opset()
+        if version == 'opset3':
+            return [
+                ('output_type', lambda node: np_data_type_to_destination_type(node.output_type)),
+            ]
+        elif version == 'opset1':
+            return []
+        else:
+            raise Error('Unknown opset version "{}"'.format(version))
+
  
      @staticmethod
      def infer(node):
-        if len(node.in_nodes()) != 1:
-            log.warning('ShapeOf operation should have exact one input node, but it has {}'.format(len(node.in_nodes())))
-            return
-
-        if node.in_node(0).shape is not None:
-            value = np.array(node.in_node(0).shape)
-            node.out_node().shape = np.array(value.shape, dtype=np.int64)
-
-            if not node.has_and_set('stop_value_propagation'):
-                if node.has_valid('data_type'):
-                    node.out_node().value = np.array(value, dtype=node.data_type)
-                else:
-                    node.out_node().value = np.array(value)
-            node.out_node().shape = np.array(value.shape, dtype=np.int64)
+        name = node.soft_get('name', node.id)
+        connected_in_ports = [port for port in node.in_ports().values() if not port.disconnected()]
+        assert len(connected_in_ports) == 1, \
+            'ShapeOf operation should have exact one input node, but it has {}'.format(len(connected_in_ports))
+
+        input_shape = node.in_port(0).data.get_shape()
+        assert input_shape is not None, \
+            'Input shape is undefined for ShapeOf node `{}`'.format(node.soft_get('name', node.id))
+
+        assert node.has_valid('output_type'), \
+            '`output_type` attribute is not set for ShapeOf node `{}`'.format(name)
+        assert node.output_type in [np.int64, np.int32], \
+            'ShapeOf `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+
+        if node.has_and_set('stop_value_propagation'):
+            node.out_port(0).data.set_shape(input_shape.shape)
          else:
-            log.info('Can\'t infer shape and value for shape operation due to undefined input shape')
+            node.out_port(0).data.set_value(np.array(input_shape, dtype=node.output_type))
  
      @staticmethod
      def type_infer(node):
-        node.out_port(0).set_data_type(np.int64 if node.graph.graph['cmd_params'].generate_experimental_IR_V10 else
-                                       np.int32)
+        node.out_port(0).set_data_type(node.output_type)
diff --git a/model-optimizer/mo/ops/softmax.py b/model-optimizer/mo/ops/softmax.py

index e95c429..8a6a246 100644 (file)
--- a/model-optimizer/mo/ops/softmax.py
+++ b/model-optimizer/mo/ops/softmax.py
@@ -25,11 +25,11 @@ class Softmax(Op):
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
-            'infer': Softmax.infer,
-            'kind': 'op',
-            'axis': 1,
              'type': __class__.op,
              'op': __class__.op,
+            'version': 'opset1',
+            'infer': Softmax.infer,
+            'axis': 1,
              'in_ports_count': 1,
              'out_ports_count': 1,
          }, attrs)
@@ -45,6 +45,22 @@ class Softmax(Op):
          PermuteAttrs.create_permute_attrs(node, attrs=[('axis', 'input:0')])
  
  
+class SoftmaxONNX(Op):
+    op = 'SoftMaxONNX'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'infer': None,
+            'axis': 1,
+            'type': None, # this operation will be replaced with a
+                          # Reshape(Softmax(Flatten(x, axis), -1), x.shape) sub-graph
+            'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }, attrs)
+
+
  class LogSoftmax(Op):
      op = 'LogSoftmax'
      enabled = False
diff --git a/model-optimizer/mo/ops/space_to_batch.py b/model-optimizer/mo/ops/space_to_batch.py

index 759dcaa..7e7316d 100644 (file)
--- a/model-optimizer/mo/ops/space_to_batch.py
+++ b/model-optimizer/mo/ops/space_to_batch.py
@@ -27,7 +27,6 @@ class SpaceToBatch(Op):
  
      def __init__(self, graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'op': self.op,
              'type': self.op,
              'in_ports_count': 3,
diff --git a/model-optimizer/mo/ops/squeeze.py b/model-optimizer/mo/ops/squeeze.py

index 15f3207..ee7597e 100644 (file)
--- a/model-optimizer/mo/ops/squeeze.py
+++ b/model-optimizer/mo/ops/squeeze.py
@@ -30,9 +30,9 @@ class Squeeze(Op):
  
      def __init__(self, graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'op': __class__.op,
              'type': __class__.op,
+            'version': 'opset1',
              'squeeze_dims': None,
              'reinterp_shape': True,
              'keep_at_least_1d': 0,
diff --git a/model-optimizer/mo/ops/strided_slice.py b/model-optimizer/mo/ops/strided_slice.py

index 1349a6d..fcdc130 100644 (file)
--- a/model-optimizer/mo/ops/strided_slice.py
+++ b/model-optimizer/mo/ops/strided_slice.py
@@ -74,6 +74,7 @@ class StridedSlice(Op):
          super().__init__(graph, {
              'type': __class__.op,
              'op': 'StridedSlice',
+            'version': 'opset1',
              'in_ports_count': 4,
              'out_ports_count': 1,
              'infer': __class__.infer
diff --git a/model-optimizer/mo/ops/tile.py b/model-optimizer/mo/ops/tile.py

index 015fdb5..e8aa123 100644 (file)
--- a/model-optimizer/mo/ops/tile.py
+++ b/model-optimizer/mo/ops/tile.py
@@ -30,6 +30,7 @@ class Tile(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': self.op,
+            'version': 'opset1',
  
              'infer': self.infer,
  
@@ -73,6 +74,7 @@ class AttributedTile(Op):
          super().__init__(graph, {
              'op': self.op,
              'type': 'Tile',
+            'version': 'opset1',
  
              'infer': self.infer,
  
diff --git a/model-optimizer/mo/ops/unsqueeze.py b/model-optimizer/mo/ops/unsqueeze.py

index f2a5895..946dc7b 100644 (file)
--- a/model-optimizer/mo/ops/unsqueeze.py
+++ b/model-optimizer/mo/ops/unsqueeze.py
@@ -32,9 +32,9 @@ class Unsqueeze(Op):
  
      def __init__(self, graph, attrs: dict):
          super().__init__(graph, {
-            'kind': 'op',
              'op': __class__.op,
              'type': __class__.op,
+            'version': 'opset1',
              'unsqueeze_dims': None,
              'reinterp_shape': True,
              'in_ports_count': 2,
diff --git a/model-optimizer/mo/pipeline/common.py b/model-optimizer/mo/pipeline/common.py

index 44610b4..d89cb73 100644 (file)
--- a/model-optimizer/mo/pipeline/common.py
+++ b/model-optimizer/mo/pipeline/common.py
@@ -184,22 +184,6 @@ def convert_inputs_of_specific_ops(graph: Graph):
                          in_port.get_connection().insert_node(Cast(graph, {'dst_type': np_type}).create_node())
  
  
-def convert_outputs_of_specific_ops(graph: Graph):
-    type_port = {'ShapeOf': {0: 'int32'},
-                 'NonMaxSuppression': {0: 'int32'},
-                 }
-
-    for node in graph.get_op_nodes():
-        if node.soft_get('type') in type_port:
-            ports_to_update = type_port[node.soft_get('type')]
-            for port_id, precision in ports_to_update.items():
-                if port_id in node.out_ports():
-                    log.debug('Insert Convert after op "{}" to type "{}"'.format(node.soft_get('name', node.id),
-                                                                                 precision))
-                    node.out_port(port_id).get_connection().insert_node(
-                        Cast(graph, {'dst_type': data_type_str_to_np(precision)}).create_node())
-
-
  def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_name: str,
                      mean_data: [list, None] = None, input_names: list = None, meta_info: dict = None):
      if input_names is None:
@@ -217,7 +201,6 @@ def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_
      # restore data type for specific inputs/outputs of specific ops to the data types required by nGraph
      if not graph.graph['cmd_params'].generate_deprecated_IR_V7:
          for_graph_and_each_sub_graph_recursively(graph, convert_inputs_of_specific_ops)
-        for_graph_and_each_sub_graph_recursively(graph, convert_outputs_of_specific_ops)
  
      if graph.graph['cmd_params'].generate_experimental_IR_V10:
          for_graph_and_each_sub_graph_recursively(graph, OpVersioning().find_and_replace_pattern)
diff --git a/model-optimizer/mo/utils/broadcasting.py b/model-optimizer/mo/utils/broadcasting.py

new file mode 100644 (file)

index 0000000..b03cfdc
--- /dev/null
+++ b/model-optimizer/mo/utils/broadcasting.py
@@ -0,0 +1,105 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import logging as log
+import numpy as np
+
+from mo.front.common.partial_infer.utils import int64_array
+
+
+def make_equal_rank(shape_1: np.array, shape_2: np.array):
+    """
+    Prepend shape with smaller length with 1. Return updates shapes
+    :param shape_1: first shape
+    :param shape_2: second shape
+    :return: tuple with updated shapes
+    """
+    while len(shape_1) < len(shape_2):
+        shape_1 = np.insert(shape_1, 0, 1)
+
+    while len(shape_2) < len(shape_1):
+        shape_2 = np.insert(shape_2, 0, 1)
+
+    return shape_1, shape_2
+
+
+def uni_directional_shape_broadcasting(input_shape: np.array, target_shape: np.array):
+    """
+    Uni-directional broadcasting of two shapes following the numpy semantic
+    :param input_shape: input shape to broadcast
+    :param target_shape: target shape
+    :return: broadcasted shape or None if broadcasting cannot be performed
+    """
+    input = input_shape.copy()
+
+    # in one-directional broadcasting the target shape rank can be higher or equal than input shape
+    if len(input_shape) > len(target_shape):
+        log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape, target_shape))
+        return None
+
+    # prepend input shape with 1s
+    input, target_shape = make_equal_rank(input, target_shape)
+
+    for left, right in zip(input, target_shape):
+        if left != right and left != 1:
+            log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape, target_shape))
+            return None
+
+    return target_shape
+
+
+def bi_directional_shape_broadcasting(input_shape_1: np.array, input_shape_2: np.array):
+    """
+    Bi-directional broadcasting of two shapes following numpy semantic
+    :param input_shape_1: first shape to broadcast
+    :param input_shape_2: second shape to broadcast
+    :return: broadcasted shape or None if broadcasting cannot be performed
+    """
+    shape_1 = input_shape_1.copy()
+    shape_2 = input_shape_2.copy()
+    shape_1, shape_2 = make_equal_rank(shape_1, shape_2)
+
+    for left, right in zip(shape_1, shape_2):
+        if left != right and left != 1 and right != 1:
+            log.debug('The shape "{}" cannot be broadcasted to "{}"'.format(input_shape_1, input_shape_2))
+            return None
+
+    return np.maximum(shape_1, shape_2)
+
+
+def uni_directional_broadcasting(input_value: np.array, target_shape: np.array):
+    """
+    Uni-directional broadcasting of input tensor to target shape following the numpy semantic
+    :param input_value: input value to broadcast
+    :param target_shape: target shape
+    :return: broadcasted value
+    """
+    assert uni_directional_shape_broadcasting(int64_array(input_value.shape), target_shape) is not None, \
+        'The tensor of shape "{}" cannot be uni-directionally broadcasted to shape "{}"'.format(input_value.shape,
+                                                                                                target_shape)
+    return np.broadcast_to(input_value, target_shape)
+
+
+def bi_directional_broadcasting(input_value: np.array, second_shape: np.array):
+    """
+    Bi-directional broadcasting of input tensor to target shape following the numpy semantic
+    :param input_value: input value to broadcast
+    :param second_shape: second tensor shape
+    :return: broadcasted value
+    """
+    assert bi_directional_shape_broadcasting(int64_array(input_value.shape), second_shape) is not None, \
+        'The tensor of shape "{}" cannot be bi-directionally broadcasted to shape "{}"'.format(input_value.shape,
+                                                                                               second_shape)
+    return np.array(input_value * np.ones(second_shape), dtype=input_value.dtype)
diff --git a/model-optimizer/mo/utils/broadcasting_test.py b/model-optimizer/mo/utils/broadcasting_test.py

new file mode 100644 (file)

index 0000000..38fcb9b
--- /dev/null
+++ b/model-optimizer/mo/utils/broadcasting_test.py
@@ -0,0 +1,44 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.broadcasting import bi_directional_broadcasting, bi_directional_shape_broadcasting, uni_directional_broadcasting, uni_directional_shape_broadcasting
+from generator import generator, generate
+import numpy as np
+import unittest
+
+
+@generator
+class TestingBroadcasting(unittest.TestCase):
+    @generate(*[([], [20, 30, 10], [20, 30, 10]),
+                ([1], [20, 30, 10], [20, 30, 10]),
+                ([1, 1, 10], [20, 30, 10], [20, 30, 10]),
+                ([20, 1, 10], [20, 30, 10], [20, 30, 10]),
+                ([20, 30, 10], [20, 30, 10], [20, 30, 10]),
+                ([20, 30, 10], [5, 7, 20, 30, 10], [5, 7, 20, 30, 10]),
+                ([1, 2], [20, 3, 10, 2], [20, 3, 10, 2]),
+                ([1, 1], [1], None),
+                ([5, 10], [1, 10], None),
+                ])
+    def test_uni_directional_broadcasting(self, input_shape, target_shape, expected_shape):
+        self.assertTrue(np.array_equal(uni_directional_shape_broadcasting(input_shape, target_shape), expected_shape))
+
+        input_value = np.array(np.random.rand(*input_shape))
+        if expected_shape is not None:
+            expected_value = np.broadcast_to(input_value, int64_array(target_shape))
+            self.assertTrue(np.array_equal(uni_directional_broadcasting(input_value, int64_array(target_shape)), expected_value))
+        else:
+            with self.assertRaisesRegex(Exception, '.*cannot be uni-directionally broadcasted.*'):
+                uni_directional_broadcasting(input_value, int64_array(target_shape))
diff --git a/model-optimizer/mo/utils/get_ov_update_message.py b/model-optimizer/mo/utils/get_ov_update_message.py

new file mode 100644 (file)

index 0000000..b485e57
--- /dev/null
+++ b/model-optimizer/mo/utils/get_ov_update_message.py
@@ -0,0 +1,29 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import datetime
+
+msg_fmt = 'It\'s been a while, check for a new version of ' + \
+          'Intel(R) Distribution of OpenVINO(TM) toolkit here {0} or on the GitHub*'
+
+
+def get_ov_update_message():
+    expected_update_date = datetime.date(year=2020, month=10, day=1)
+    current_date = datetime.date.today()
+
+    link = 'https://software.intel.com/en-us/openvino-toolkit/choose-download?cid=&source=upgrade&content=2020_3_LTS'
+
+    return msg_fmt.format(link) if current_date >= expected_update_date else None
diff --git a/model-optimizer/mo/utils/ir_engine/compare_graphs.py b/model-optimizer/mo/utils/ir_engine/compare_graphs.py

index 23eb5e5..d85ff3d 100644 (file)
--- a/model-optimizer/mo/utils/ir_engine/compare_graphs.py
+++ b/model-optimizer/mo/utils/ir_engine/compare_graphs.py
@@ -171,4 +171,5 @@ def values_are_equal(value, value_ref):
          eps = 5e-2
      else:
          eps = 1e-4
-    return np.allclose(value_ref, value, rtol=eps, atol=eps)
-\ No newline at end of file
+    return np.allclose(value_ref, value, rtol=eps, atol=eps)
+
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/non_max_suppression_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/non_max_suppression_extender.py

new file mode 100644 (file)

index 0000000..1c0d240
--- /dev/null
+++ b/model-optimizer/mo/utils/ir_reader/extenders/non_max_suppression_extender.py
@@ -0,0 +1,27 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class NonMaxSuppressionExtender(Extender):
+    op = 'NonMaxSuppression'
+
+    @staticmethod
+    def extend(op: Node):
+        if op.has_valid('output_type'):
+            op['output_type'] = destination_type_to_np_data_type(op.output_type)
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/shape_of_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/shape_of_extender.py

new file mode 100644 (file)

index 0000000..4f20d95
--- /dev/null
+++ b/model-optimizer/mo/utils/ir_reader/extenders/shape_of_extender.py
@@ -0,0 +1,27 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
+
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class ShapeOfExtender(Extender):
+    op = 'ShapeOf'
+
+    @staticmethod
+    def extend(op: Node):
+        op['output_type'] = destination_type_to_np_data_type(op.output_type)
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py

index 8f7c6e0..a413ed5 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py
@@ -14,14 +14,17 @@
   limitations under the License.
  """
  
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
  from mo.utils.graph import Node
  from mo.utils.ir_reader.extender import Extender
  
  
-class TopK_extender(Extender):
+class TopKExtender(Extender):
      op = 'TopK'
  
      @staticmethod
      def extend(op: Node):
          if op.graph.graph['cmd_params'].framework in ('tf', 'caffe'):
              op['remove_values_output'] = True
+        if op.has_valid('index_element_type'):
+            op['index_element_type'] = destination_type_to_np_data_type(op.index_element_type)
diff --git a/model-optimizer/mo/utils/shape.py b/model-optimizer/mo/utils/shape.py

index 9f18646..c4ae63b 100644 (file)
--- a/model-optimizer/mo/utils/shape.py
+++ b/model-optimizer/mo/utils/shape.py
@@ -17,9 +17,13 @@ from extensions.ops.elementwise import Add
  from extensions.ops.gather import Gather
  from extensions.ops.range import Range
  from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node
+from mo.front.tf.graph_utils import create_op_node_with_second_input
+from mo.graph.graph import Node, Graph
+from mo.graph.port import Port
  from mo.ops.concat import Concat
  from mo.ops.const import Const
+from mo.ops.shape import Shape
+from mo.ops.squeeze import Squeeze
  
  
  def get_canonical_axis_index_node(rank: Node, axis: int) -> Node:
@@ -189,3 +193,26 @@ def new_shape_node_from_shape_nodes(input_shape_nodes: list):
          new_shape_node.add_input_port(ind)
          new_shape_node.in_port(ind).connect(input_node.out_port(0))
      return new_shape_node
+
+
+def get_shape_and_rank_nodes_by_port(port: Port, return_as_a_scalar: bool = True):
+    """
+    The function returns nodes producing shape and rank of the data from the desired port in order to use those
+    operations on the middle/back phase
+    :param port: Port object that specifies node output port
+    :param return_as_a_scalar: boolean flag to return 1D or 0D rank
+    :return: shape and rank nodes
+    """
+    input_node_name = port.node.soft_get('name', port.node.id)
+    graph = port.node.graph
+
+    shape = Shape(graph, dict(name=input_node_name + '/ShapeOf')).create_node()
+    rank_1_d = Shape(graph, dict(name=input_node_name + '/1dRankOf')).create_node()
+    rank_1_d.in_port(0).connect(shape.out_port(0))
+    shape.in_port(0).connect(port)
+    if not return_as_a_scalar:
+        return shape, rank_1_d
+
+    rank = create_op_node_with_second_input(graph, Squeeze, int64_array([0]), {'name': input_node_name + '/0dRankOf'},
+                                            rank_1_d)
+    return shape, rank
diff --git a/ngraph b/ngraph

index a909d3e..2989542 160000 (submodule)
--- a/ngraph
+++ b/ngraph
@@ -1 +1 @@
-Subproject commit a909d3e0b6d12036be4d913e43b18408bd8bf0b6
+Subproject commit 2989542b5d1b656b19012edb119b5b379dbedc8a
diff --git a/scripts/demo/README.txt b/scripts/demo/README.txt

index a4e8fe7..3f9c871 100644 (file)
--- a/scripts/demo/README.txt
+++ b/scripts/demo/README.txt
@@ -27,7 +27,9 @@ The "demo" folder contains three scripts:
  
  3. Benchmark demo using public SqueezeNet topology (demo_benchmark_app.sh|bat) 
  
-To run the demos, run demo_squeezenet_download_convert_run.sh or demo_security_barrier_camera.sh or demo_benchmark_app.sh (*.bat on Windows) scripts from the console without parameters, for example:
+4. Speech recognition demo utilizing models trained on open LibriSpeech dataset
+
+To run the demos, run demo_squeezenet_download_convert_run.sh or demo_security_barrier_camera.sh or demo_benchmark_app.sh or demo_speech_recognition.sh (*.bat on Windows) scripts from the console without parameters, for example:
  
  ./demo_squeezenet_download_convert_run.sh
  
@@ -80,4 +82,19 @@ The demo script does the following:
  
  The benchmark app prints performance counters, resulting latency, and throughput values.
   
-For more information about the Inference Engine benchmark app, refer to the documentation available in the sample folder.
-\ No newline at end of file
+For more information about the Inference Engine benchmark app, refer to the documentation available in the sample folder.
+
+Speech Recognition Demo Using LibriSpeech models
+================================================
+
+The demo illustrates live speech recognition - transcribing speech from microphone or offline (from wave file).
+The demo is also capable of live close captioning of an audio clip or movie, where signal is intercepted from the speaker. 
+
+The demo script does the following:
+
+  - Downloads US English models trained on LibriSpeech dataset prepared for direct usage by the Inference Engine
+  - Installs the required components
+  - Runs the command line offline demo
+  - As a final step, runs live speech recognition application with graphical interface
+
+The GUI application prints the speech transcribed from input signal in window. Up to two channels can be transcribed in parallel: microphone & speakers streams.
diff --git a/scripts/demo/demo_benchmark_app.bat b/scripts/demo/demo_benchmark_app.bat

index 4b29fb3..d6ffee7 100644 (file)
--- a/scripts/demo/demo_benchmark_app.bat
+++ b/scripts/demo/demo_benchmark_app.bat
@@ -103,13 +103,13 @@ for /F "tokens=* usebackq" %%d in (
  set ir_dir=%irs_path%\%model_dir%\%target_precision%
  
  echo Download public %model_name% model
-echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
-python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
+python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
  echo %model_name% model downloading completed
  
  timeout 7
  
-if exist %ir_dir% (
+if exist "%ir_dir%" (
      echo.
      echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
      echo If you want to convert a model again, remove the entire %ir_dir% folder.
@@ -220,7 +220,7 @@ echo.
  echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
  echo.
  timeout 3
-echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
+echo "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
  "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
  
  if ERRORLEVEL 1 GOTO errorHandling
diff --git a/scripts/demo/demo_benchmark_app.sh b/scripts/demo/demo_benchmark_app.sh

index 6ed2702..d04ca74 100644 (file)
--- a/scripts/demo/demo_benchmark_app.sh
+++ b/scripts/demo/demo_benchmark_app.sh
@@ -170,7 +170,7 @@ if [ ! -e "$ir_dir" ]; then
      printf "Install Model Optimizer dependencies\n\n"
      cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
      . ./install_prerequisites.sh caffe
-    cd $cur_path
+    cd "$cur_path"
  
      # Step 3. Convert a model with Model Optimizer
      printf "${dashes}"
diff --git a/scripts/demo/demo_security_barrier_camera.bat b/scripts/demo/demo_security_barrier_camera.bat

index 648dae5..75284fd 100644 (file)
--- a/scripts/demo/demo_security_barrier_camera.bat
+++ b/scripts/demo/demo_security_barrier_camera.bat
@@ -87,8 +87,8 @@ if ERRORLEVEL 1 GOTO errorHandling
  set models_path=%BUILD_FOLDER%\openvino_models\ir
  set models_cache=%BUILD_FOLDER%\openvino_models\cache
  
-if not exist %models_cache% (
-  mkdir %models_cache%
+if not exist "%models_cache%" (
+  mkdir "%models_cache%"
  )
  
  set downloader_dir=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\downloader
diff --git a/scripts/demo/demo_squeezenet_download_convert_run.bat b/scripts/demo/demo_squeezenet_download_convert_run.bat

index f9dd0e2..088b0dc 100644 (file)
--- a/scripts/demo/demo_squeezenet_download_convert_run.bat
+++ b/scripts/demo/demo_squeezenet_download_convert_run.bat
@@ -98,13 +98,13 @@ for /F "tokens=* usebackq" %%d in (
  set ir_dir=%irs_path%\%model_dir%\%target_precision%
  
  echo Download public %model_name% model
-echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
-python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
+python "%downloader_dir%\downloader.py" --name "%model_name%" --output_dir "%models_path%" --cache_dir "%models_cache%"
  echo %model_name% model downloading completed
  
  timeout 7
  
-if exist %ir_dir% (
+if exist "%ir_dir%" (
      echo.
      echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
      echo If you want to convert a model again, remove the entire %ir_dir% folder.
@@ -215,7 +215,7 @@ echo.
  echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
  echo.
  timeout 3
-echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
+echo "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
  "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
  
  if ERRORLEVEL 1 GOTO errorHandling
diff --git a/scripts/demo/demo_squeezenet_download_convert_run.sh b/scripts/demo/demo_squeezenet_download_convert_run.sh

index b2acf1a..bd36524 100644 (file)
--- a/scripts/demo/demo_squeezenet_download_convert_run.sh
+++ b/scripts/demo/demo_squeezenet_download_convert_run.sh
@@ -166,7 +166,7 @@ if [ ! -e "$ir_dir" ]; then
      printf "Install Model Optimizer dependencies\n\n"
      cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
      . ./install_prerequisites.sh caffe
-    cd $cur_path
+    cd "$cur_path"
  
      # Step 3. Convert a model with Model Optimizer
      printf "${dashes}"
diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh

index 3ce0d70..4682914 100644 (file)
--- a/scripts/setupvars/setupvars.sh
+++ b/scripts/setupvars/setupvars.sh
@@ -62,13 +62,16 @@ if [ -e $INSTALLDIR/deployment_tools/inference_engine ]; then
      else
          export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:$INSTALLDIR/deployment_tools/inference_engine/external/gna/lib:$INSTALLDIR/deployment_tools/inference_engine/external/mkltiny_lnx/lib:$INSTALLDIR/deployment_tools/inference_engine/external/tbb/lib:$IE_PLUGINS_PATH:$LD_LIBRARY_PATH
      fi
+
+    export KMB_INSTALL_DIR=$INSTALLDIR/deployment_tools/inference_engine/external/hddl_unite
+    export LD_LIBRARY_PATH=$KMB_INSTALL_DIR/lib:$LD_LIBRARY_PATH
  fi
  
  if [ -e $INSTALLDIR/deployment_tools/ngraph ]; then
      export LD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/ngraph/lib:$LD_LIBRARY_PATH
      export ngraph_DIR=$INSTALLDIR/deployment_tools/ngraph/cmake
  fi
-    
+
  if [ -e "$INSTALLDIR/opencv" ]; then
      if [ -f "$INSTALLDIR/opencv/setupvars.sh" ]; then
          source "$INSTALLDIR/opencv/setupvars.sh"
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7d356d0
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_references_config.xml

new file mode 100644 (file)

index 0000000..32ef748
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_references_config.xml
@@ -0,0 +1,533 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..1a50d72
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/nightly_configs/desktop_test_config.xml
@@ -0,0 +1,156 @@
+<?xml version="1.0"?>
+<attributes>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>caffe/FP32/caffenet/caffenet.xml</value>
+        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+        <value>caffe/FP32/lenet/lenet.xml</value>
+        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+        <value>caffe/FP32/vgg16/vgg16.xml</value>
+        <value>caffe/FP32/vgg19/vgg19.xml</value>
+        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>caffe/FP32/dilation/dilation.xml</value>
+        <value>caffe/FP32/dssd/dssd.xml</value>
+        <value>caffe/FP32/fcn8/fcn8.xml</value>
+        <value>caffe/FP32/fcn32/fcn32.xml</value>
+        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+        <value>caffe/FP32/vnect/vnect.xml</value>
+        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+        <value>tf/1.14.0/FP32/east/east.xml</value>
+        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/caffenet/caffenet.xml</value>
+        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>mxnet/FP32/vgg16/vgg16.xml</value>
+        <value>mxnet/FP32/vgg19/vgg19.xml</value>
+        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+        <value>mxnet/FP32/location_net/location_net.xml</value>
+        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>mxnet/FP32/nin/nin.xml</value>
+        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+        <value>onnx/FP32/retina_net/retina_net.xml</value>
+        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7d356d0
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_references_config.xml

new file mode 100644 (file)

index 0000000..32ef748
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_references_config.xml
@@ -0,0 +1,533 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..1a50d72
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_pregen_irs_tests/weekly_configs/desktop_test_config.xml
@@ -0,0 +1,156 @@
+<?xml version="1.0"?>
+<attributes>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>caffe/FP32/caffenet/caffenet.xml</value>
+        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+        <value>caffe/FP32/lenet/lenet.xml</value>
+        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+        <value>caffe/FP32/vgg16/vgg16.xml</value>
+        <value>caffe/FP32/vgg19/vgg19.xml</value>
+        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>caffe/FP32/dilation/dilation.xml</value>
+        <value>caffe/FP32/dssd/dssd.xml</value>
+        <value>caffe/FP32/fcn8/fcn8.xml</value>
+        <value>caffe/FP32/fcn32/fcn32.xml</value>
+        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+        <value>caffe/FP32/vnect/vnect.xml</value>
+        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+        <value>tf/1.14.0/FP32/east/east.xml</value>
+        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/caffenet/caffenet.xml</value>
+        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>mxnet/FP32/vgg16/vgg16.xml</value>
+        <value>mxnet/FP32/vgg19/vgg19.xml</value>
+        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+        <value>mxnet/FP32/location_net/location_net.xml</value>
+        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>mxnet/FP32/nin/nin.xml</value>
+        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+        <value>onnx/FP32/retina_net/retina_net.xml</value>
+        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml

index 32ef748..2f7ef80 100644 (file)
--- a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml
@@ -1,533 +1,6 @@
  <?xml version="1.0"?>
  <attributes>
      <models>
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
-        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
-        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
-        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
-        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
-        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
-        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
-        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
-        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+
      </models>
  </attributes>
 \ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml

index 1a50d72..6138a2f 100644 (file)
--- a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml
@@ -5,152 +5,181 @@
          <value>GPU</value>
      </devices>
      <models>
-        <value>caffe/FP32/alexnet/alexnet.xml</value>
-        <value>caffe/FP32/caffenet/caffenet.xml</value>
-        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
-        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
-        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
-        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
-        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
-        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
-        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
-        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
-        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
-        <value>caffe/FP32/lenet/lenet.xml</value>
-        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
-        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
-        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
-        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
-        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
-        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
-        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
-        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
-        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
-        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
-        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
-        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
-        <value>caffe/FP32/vgg16/vgg16.xml</value>
-        <value>caffe/FP32/vgg19/vgg19.xml</value>
-        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
-        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
-        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
-        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
-        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
-        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
-        <value>caffe/FP32/dilation/dilation.xml</value>
-        <value>caffe/FP32/dssd/dssd.xml</value>
-        <value>caffe/FP32/fcn8/fcn8.xml</value>
-        <value>caffe/FP32/fcn32/fcn32.xml</value>
-        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
-        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
-        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
-        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
-        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
-        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
-        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
-        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
-        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
-        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
-        <value>caffe/FP32/vnect/vnect.xml</value>
-        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
-        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
-        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
-        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
-        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
-        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
-        <value>tf/1.14.0/FP32/east/east.xml</value>
-        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
-        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
-        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
-        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
-        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
-        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
-        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
-        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
-        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
-        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
-        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
-        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
-        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
-        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
-        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
-        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
-        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
-        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
-        <value>mxnet/FP32/caffenet/caffenet.xml</value>
-        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
-        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
-        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
-        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
-        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
-        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
-        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
-        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
-        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
-        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
-        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
-        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
-        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
-        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
-        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
-        <value>mxnet/FP32/vgg16/vgg16.xml</value>
-        <value>mxnet/FP32/vgg19/vgg19.xml</value>
-        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
-        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
-        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
-        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
-        <value>mxnet/FP32/location_net/location_net.xml</value>
-        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
-        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
-        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
-        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
-        <value>mxnet/FP32/nin/nin.xml</value>
-        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
-        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
-        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
-        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
-        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
-        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
-        <value>onnx/FP32/retina_net/retina_net.xml</value>
-        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
-        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
-        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
-        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+        <value>public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml</value>
+        <value>public/resnet-101/FP32/resnet-101.xml</value>
+        <value>public/brain-tumor-segmentation-0001/FP32/brain-tumor-segmentation-0001.xml</value>
+        <value>public/octave-resnet-101-0.125/FP32/octave-resnet-101-0.125.xml</value>
+        <value>public/faster_rcnn_inception_resnet_v2_atrous_coco/FP32/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>public/efficientnet-b7_auto_aug/FP32/efficientnet-b7_auto_aug.xml</value>
+        <value>public/yolo-v2-tf/FP32/yolo-v2-tf.xml</value>
+        <value>public/mobilenet-v2-1.0-224/FP32/mobilenet-v2-1.0-224.xml</value>
+        <value>public/colorization-v2-norebal/FP32/colorization-v2-norebal.xml</value>
+        <value>public/se-inception/FP32/se-inception.xml</value>
+        <value>public/efficientnet-b0/FP32/efficientnet-b0.xml</value>
+        <value>public/mobilenet-v1-1.0-224-tf/FP32/mobilenet-v1-1.0-224-tf.xml</value>
+        <value>public/mask_rcnn_resnet101_atrous_coco/FP32/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>public/ssd_mobilenet_v1_coco/FP32/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/se-resnet-152/FP32/se-resnet-152.xml</value>
+        <value>public/octave-resnext-50-0.25/FP32/octave-resnext-50-0.25.xml</value>
+        <value>public/googlenet-v3/FP32/googlenet-v3.xml</value>
+        <value>public/ssd_mobilenet_v2_coco/FP32/ssd_mobilenet_v2_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.xml</value>
+        <value>public/mobilenet-v1-0.50-224/FP32/mobilenet-v1-0.50-224.xml</value>
+        <value>public/ssd_mobilenet_v1_fpn_coco/FP32/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>public/vgg16/FP32/vgg16.xml</value>
+        <value>public/face-recognition-resnet34-arcface/FP32/face-recognition-resnet34-arcface.xml</value>
+        <value>public/gmcnn-places2-tf/FP32/gmcnn-places2-tf.xml</value>
+        <value>public/mobilenet-v1-1.0-224/FP32/mobilenet-v1-1.0-224.xml</value>
+        <value>public/se-resnet-101/FP32/se-resnet-101.xml</value>
+        <value>public/face-detection-retail-0044/FP32/face-detection-retail-0044.xml</value>
+        <value>public/face-recognition-mobilefacenet-arcface/FP32/face-recognition-mobilefacenet-arcface.xml</value>
+        <value>public/vehicle-license-plate-detection-barrier-0123/FP32/vehicle-license-plate-detection-barrier-0123.xml</value>
+        <value>public/densenet-161/FP32/densenet-161.xml</value>
+        <value>public/mask_rcnn_inception_resnet_v2_atrous_coco/FP32/mask_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>public/octave-resnext-101-0.25/FP32/octave-resnext-101-0.25.xml</value>
+        <value>public/face-recognition-resnet50-arcface/FP32/face-recognition-resnet50-arcface.xml</value>
+        <value>public/densenet-161-tf/FP32/densenet-161-tf.xml</value>
+        <value>public/octave-resnet-200-0.125/FP32/octave-resnet-200-0.125.xml</value>
+        <value>public/mtcnn-p/FP32/mtcnn-p.xml</value>
+        <value>public/se-resnext-101/FP32/se-resnext-101.xml</value>
+        <value>public/efficientnet-b5/FP32/efficientnet-b5.xml</value>
+        <value>public/densenet-169-tf/FP32/densenet-169-tf.xml</value>
+        <value>public/densenet-201/FP32/densenet-201.xml</value>
+        <value>public/resnet-50-tf/FP32/resnet-50-tf.xml</value>
+        <value>public/squeezenet1.1/FP32/squeezenet1.1.xml</value>
+        <value>public/squeezenet1.0/FP32/squeezenet1.0.xml</value>
+        <value>public/octave-resnet-26-0.25/FP32/octave-resnet-26-0.25.xml</value>
+        <value>public/googlenet-v4-tf/FP32/googlenet-v4-tf.xml</value>
+        <value>public/ssd300/FP32/ssd300.xml</value>
+        <value>public/rfcn-resnet101-coco-tf/FP32/rfcn-resnet101-coco-tf.xml</value>
+        <value>public/vgg19/FP32/vgg19.xml</value>
+        <value>public/ctdet_coco_dlav0_384/FP32/ctdet_coco_dlav0_384.xml</value>
+        <value>public/efficientnet-b0_auto_aug/FP32/efficientnet-b0_auto_aug.xml</value>
+        <value>public/googlenet-v1/FP32/googlenet-v1.xml</value>
+        <value>public/faster_rcnn_inception_v2_coco/FP32/faster_rcnn_inception_v2_coco.xml</value>
+        <value>public/mask_rcnn_inception_v2_coco/FP32/mask_rcnn_inception_v2_coco.xml</value>
+        <value>public/inception-resnet-v2-tf/FP32/inception-resnet-v2-tf.xml</value>
+        <value>public/deeplabv3/FP32/deeplabv3.xml</value>
+        <value>public/yolo-v3-tf/FP32/yolo-v3-tf.xml</value>
+        <value>public/resnet-152/FP32/resnet-152.xml</value>
+        <value>public/mtcnn-o/FP32/mtcnn-o.xml</value>
+        <value>public/octave-se-resnet-50-0.125/FP32/octave-se-resnet-50-0.125.xml</value>
+        <value>public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml</value>
+        <value>public/resnet-50/FP32/resnet-50.xml</value>
+        <value>public/googlenet-v1-tf/FP32/googlenet-v1-tf.xml</value>
+        <value>public/yolo-v2-tiny-tf/FP32/yolo-v2-tiny-tf.xml</value>
+        <value>public/ssd512/FP32/ssd512.xml</value>
+        <value>public/densenet-169/FP32/densenet-169.xml</value>
+        <value>public/brain-tumor-segmentation-0002/FP32/brain-tumor-segmentation-0002.xml</value>
+        <value>public/Sphereface/FP32/Sphereface.xml</value>
+        <value>public/googlenet-v2/FP32/googlenet-v2.xml</value>
+        <value>public/face-recognition-resnet100-arcface/FP32/face-recognition-resnet100-arcface.xml</value>
+        <value>public/mobilenet-v1-0.25-128/FP32/mobilenet-v1-0.25-128.xml</value>
+        <value>public/ctdet_coco_dlav0_512/FP32/ctdet_coco_dlav0_512.xml</value>
+        <value>public/facenet-20180408-102900/FP32/facenet-20180408-102900.xml</value>
+        <value>public/ctpn/FP32/ctpn.xml</value>
+        <value>public/ssdlite_mobilenet_v2/FP32/ssdlite_mobilenet_v2.xml</value>
+        <value>public/i3d-rgb-tf/FP32/i3d-rgb-tf.xml</value>
+        <value>public/mobilenet-v2/FP32/mobilenet-v2.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/se-resnext-50/FP32/se-resnext-50.xml</value>
+        <value>public/caffenet/FP32/caffenet.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
+        <value>public/faster_rcnn_resnet50_coco/FP32/faster_rcnn_resnet50_coco.xml</value>
+        <value>public/se-resnet-50/FP32/se-resnet-50.xml</value>
+        <value>public/mask_rcnn_resnet50_atrous_coco/FP32/mask_rcnn_resnet50_atrous_coco.xml</value>
+        <value>public/octave-resnet-50-0.125/FP32/octave-resnet-50-0.125.xml</value>
+        <value>public/densenet-121-tf/FP32/densenet-121-tf.xml</value>
+        <value>public/mobilenet-v1-0.50-160/FP32/mobilenet-v1-0.50-160.xml</value>
+        <value>public/densenet-121/FP32/densenet-121.xml</value>
+        <value>public/faster_rcnn_resnet101_coco/FP32/faster_rcnn_resnet101_coco.xml</value>
+        <value>public/octave-densenet-121-0.125/FP32/octave-densenet-121-0.125.xml</value>
+        <value>public/colorization-v2/FP32/colorization-v2.xml</value>
+        <value>public/densenet-121-caffe2/FP32/densenet-121-caffe2.xml</value>
+        <value>public/efficientnet-b0-pytorch/FP32/efficientnet-b0-pytorch.xml</value>
+        <value>public/efficientnet-b5-pytorch/FP32/efficientnet-b5-pytorch.xml</value>
+        <value>public/efficientnet-b7-pytorch/FP32/efficientnet-b7-pytorch.xml</value>
+        <value>public/googlenet-v3-pytorch/FP32/googlenet-v3-pytorch.xml</value>
+        <value>public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml</value>
+        <value>public/midasnet/FP32/midasnet.xml</value>
+        <value>public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml</value>
+        <value>public/resnet-18-pytorch/FP32/resnet-18-pytorch.xml</value>
+        <value>public/resnet-50-caffe2/FP32/resnet-50-caffe2.xml</value>
+        <value>public/resnet-50-pytorch/FP32/resnet-50-pytorch.xml</value>
+        <value>public/single-human-pose-estimation-0001/FP32/single-human-pose-estimation-0001.xml</value>
+        <value>public/squeezenet1.1-caffe2/FP32/squeezenet1.1-caffe2.xml</value>
+        <value>public/vgg19-caffe2/FP32/vgg19-caffe2.xml</value>
+        <value>intel/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.xml</value>
+        <value>intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.xml</value>
+        <value>intel/person-detection-action-recognition-0006/FP32/person-detection-action-recognition-0006.xml</value>
+        <value>intel/asl-recognition-0004/FP32/asl-recognition-0004.xml</value>
+        <value>intel/yolo-v2-tiny-ava-sparse-30-0001/FP32/yolo-v2-tiny-ava-sparse-30-0001.xml</value>
+        <value>intel/text-detection-0004/FP32/text-detection-0004.xml</value>
+        <value>intel/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.xml</value>
+        <value>intel/text-spotting-0002-detector/FP32/text-spotting-0002-detector.xml</value>
+        <value>intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.xml</value>
+        <value>intel/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.xml</value>
+        <value>intel/image-retrieval-0001/FP32/image-retrieval-0001.xml</value>
+        <value>intel/person-detection-retail-0002/FP32/person-detection-retail-0002.xml</value>
+        <value>intel/person-attributes-recognition-crossroad-0230/FP32/person-attributes-recognition-crossroad-0230.xml</value>
+        <value>intel/face-detection-0100/FP32/face-detection-0100.xml</value>
+        <value>intel/face-detection-0102/FP32/face-detection-0102.xml</value>
+        <value>intel/person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml</value>
+        <value>intel/person-reidentification-retail-0300/FP32/person-reidentification-retail-0300.xml</value>
+        <value>intel/instance-segmentation-security-0010/FP32/instance-segmentation-security-0010.xml</value>
+        <value>intel/instance-segmentation-security-0083/FP32/instance-segmentation-security-0083.xml</value>
+        <value>intel/face-detection-0105/FP32/face-detection-0105.xml</value>
+        <value>intel/face-detection-0104/FP32/face-detection-0104.xml</value>
+        <value>intel/icnet-camvid-ava-sparse-30-0001/FP32/icnet-camvid-ava-sparse-30-0001.xml</value>
+        <value>intel/action-recognition-0001-decoder/FP32/action-recognition-0001-decoder.xml</value>
+        <value>intel/face-detection-0106/FP32/face-detection-0106.xml</value>
+        <value>intel/person-detection-action-recognition-teacher-0002/FP32/person-detection-action-recognition-teacher-0002.xml</value>
+        <value>intel/person-vehicle-bike-detection-crossroad-0078/FP32/person-vehicle-bike-detection-crossroad-0078.xml</value>
+        <value>intel/icnet-camvid-ava-sparse-60-0001/FP32/icnet-camvid-ava-sparse-60-0001.xml</value>
+        <value>intel/face-detection-adas-0001/FP32/face-detection-adas-0001.xml</value>
+        <value>intel/unet-camvid-onnx-0001/FP32/unet-camvid-onnx-0001.xml</value>
+        <value>intel/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml</value>
+        <value>intel/faster-rcnn-resnet101-coco-sparse-60-0001/FP32/faster-rcnn-resnet101-coco-sparse-60-0001.xml</value>
+        <value>intel/action-recognition-0001-encoder/FP32/action-recognition-0001-encoder.xml</value>
+        <value>intel/yolo-v2-ava-sparse-35-0001/FP32/yolo-v2-ava-sparse-35-0001.xml</value>
+        <value>intel/yolo-v2-ava-sparse-70-0001/FP32/yolo-v2-ava-sparse-70-0001.xml</value>
+        <value>intel/person-reidentification-retail-0248/FP32/person-reidentification-retail-0248.xml</value>
+        <value>intel/person-detection-raisinghand-recognition-0001/FP32/person-detection-raisinghand-recognition-0001.xml</value>
+        <value>intel/person-detection-asl-0001/FP32/person-detection-asl-0001.xml</value>
+        <value>intel/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml</value>
+        <value>intel/yolo-v2-tiny-ava-0001/FP32/yolo-v2-tiny-ava-0001.xml</value>
+        <value>intel/license-plate-recognition-barrier-0001/FP32/license-plate-recognition-barrier-0001.xml</value>
+        <value>intel/person-detection-retail-0013/FP32/person-detection-retail-0013.xml</value>
+        <value>intel/instance-segmentation-security-0050/FP32/instance-segmentation-security-0050.xml</value>
+        <value>intel/single-image-super-resolution-1032/FP32/single-image-super-resolution-1032.xml</value>
+        <value>intel/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml</value>
+        <value>intel/driver-action-recognition-adas-0002-decoder/FP32/driver-action-recognition-adas-0002-decoder.xml</value>
+        <value>intel/person-reidentification-retail-0249/FP32/person-reidentification-retail-0249.xml</value>
+        <value>intel/text-spotting-0002-recognizer-decoder/FP32/text-spotting-0002-recognizer-decoder.xml</value>
+        <value>intel/yolo-v2-ava-0001/FP32/yolo-v2-ava-0001.xml</value>
+        <value>intel/person-detection-action-recognition-0005/FP32/person-detection-action-recognition-0005.xml</value>
+        <value>intel/text-recognition-0012/FP32/text-recognition-0012.xml</value>
+        <value>intel/face-detection-retail-0004/FP32/face-detection-retail-0004.xml</value>
+        <value>intel/product-detection-0001/FP32/product-detection-0001.xml</value>
+        <value>intel/yolo-v2-tiny-ava-sparse-60-0001/FP32/yolo-v2-tiny-ava-sparse-60-0001.xml</value>
+        <value>intel/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml</value>
+        <value>intel/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml</value>
+        <value>intel/single-image-super-resolution-1033/FP32/single-image-super-resolution-1033.xml</value>
+        <value>intel/face-detection-retail-0005/FP32/face-detection-retail-0005.xml</value>
+        <value>intel/pedestrian-and-vehicle-detector-adas-0001/FP32/pedestrian-and-vehicle-detector-adas-0001.xml</value>
+        <value>intel/handwritten-japanese-recognition-0001/FP32/handwritten-japanese-recognition-0001.xml</value>
+        <value>intel/semantic-segmentation-adas-0001/FP32/semantic-segmentation-adas-0001.xml</value>
+        <value>intel/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.xml</value>
+        <value>intel/driver-action-recognition-adas-0002-encoder/FP32/driver-action-recognition-adas-0002-encoder.xml</value>
+        <value>intel/text-detection-0003/FP32/text-detection-0003.xml</value>
+        <value>intel/text-spotting-0002-recognizer-encoder/FP32/text-spotting-0002-recognizer-encoder.xml</value>
+        <value>intel/handwritten-score-recognition-0003/FP32/handwritten-score-recognition-0003.xml</value>
+        <value>intel/icnet-camvid-ava-0001/FP32/icnet-camvid-ava-0001.xml</value>
+        <value>intel/text-image-super-resolution-0001/FP32/text-image-super-resolution-0001.xml</value>
+        <value>intel/gaze-estimation-adas-0002/FP32/gaze-estimation-adas-0002.xml</value>
+        <value>intel/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001.xml</value>
+        <value>intel/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.xml</value>
+        <value>intel/instance-segmentation-security-1025/FP32/instance-segmentation-security-1025.xml</value>
      </models>
  </attributes>
 \ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml

index 35b701b..fe07641 100644 (file)
--- a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml
@@ -1,21 +1,21 @@
  <?xml version="1.0"?>
  <attributes>
      <models>
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="757218" vmpeak="901683" vmrss="73920" vmhwm="107866" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="747815" vmpeak="860978" vmrss="401808" vmhwm="435358" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1001189" vmpeak="1001189" vmrss="116080" vmhwm="116080" />
-<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="788752" vmpeak="860842" vmrss="435283" vmhwm="435283" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="754806" vmpeak="803184" vmrss="15206" vmhwm="26532" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="554650" vmpeak="644666" vmrss="207592" vmhwm="217720" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="959257" vmpeak="959257" vmrss="26690" vmhwm="26690" />
-<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="572576" vmpeak="644666" vmrss="215230" vmhwm="215230" />
-<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="755224" vmpeak="1146142" vmrss="22246" vmhwm="370770" />
-<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="747709" vmpeak="1031694" vmrss="401746" vmhwm="749962" />
-<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1343474" vmpeak="1415563" vmrss="314204" vmhwm="371131" />
-<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1088700" vmpeak="1160790" vmrss="739626" vmhwm="748008" />
-<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754050" vmpeak="2548532" vmrss="15593" vmhwm="1808765" />
-<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="648912" vmpeak="3289101" vmrss="299327" vmhwm="3003457" />
-<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2257006" vmpeak="2548532" vmrss="1243448" vmhwm="1809143" />
-<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2413290" vmpeak="3289101" vmrss="2059780" vmhwm="3006845" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="740214" vmpeak="805110" vmrss="129308" vmhwm="129308" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="922147" vmpeak="922147" vmrss="587522" vmhwm="587522" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1007890" vmpeak="1007890" vmrss="138652" vmhwm="138652" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="1006439" vmpeak="1091636" vmrss="587241" vmhwm="587241" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="691589" vmpeak="922864" vmrss="31054" vmhwm="31054" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="626194" vmpeak="626194" vmrss="290695" vmhwm="290695" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="958240" vmpeak="1043437" vmrss="31366" vmhwm="31366" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="708734" vmpeak="793930" vmrss="287877" vmhwm="287877" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="1046988" vmpeak="1179042" vmrss="307990" vmhwm="439457" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1267775" vmpeak="1279647" vmrss="932672" vmhwm="944626" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1321819" vmpeak="1321819" vmrss="374207" vmhwm="439748" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1356565" vmpeak="1441762" vmrss="941418" vmhwm="947060" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133814" vmpeak="2836412" vmrss="1438049" vmhwm="2140533" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2801422" vmpeak="3915366" vmrss="2465065" vmhwm="3578811" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401380" vmpeak="2836412" vmrss="1469832" vmhwm="2140377" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2892432" vmpeak="3939166" vmrss="2472017" vmhwm="3602924" />
      </models>
  </attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml

index 32ef748..2f7ef80 100644 (file)
--- a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml
@@ -1,533 +1,6 @@
  <?xml version="1.0"?>
  <attributes>
      <models>
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1321668" vmpeak="1631245" vmrss="657919" vmhwm="967408" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1563796" vmpeak="2064987" vmrss="1227532" vmhwm="1728485" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1589073" vmpeak="1631151" vmrss="659287" vmhwm="966721" />
-        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1557202" vmpeak="1973197" vmrss="1079972" vmhwm="1580035" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1341314" vmpeak="1650890" vmrss="665329" vmhwm="974724" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1591844" vmpeak="1793074" vmrss="1255238" vmhwm="1456566" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1441388" vmpeak="1650797" vmrss="682999" vmhwm="973897" />
-        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1605884" vmpeak="1696297" vmrss="1128160" vmhwm="1303270" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903562" vmpeak="903562" vmrss="180684" vmhwm="180684" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1301939" vmpeak="1301939" vmrss="964126" vmhwm="964126" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170582" vmpeak="1255779" vmrss="189836" vmhwm="189836" />
-        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1057290" vmpeak="1142486" vmrss="582316" vmhwm="582316" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1155512" vmpeak="1257531" vmrss="406551" vmhwm="508289" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1884636" vmpeak="1884636" vmrss="1547655" vmhwm="1547655" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1241500" vmpeak="1326696" vmrss="419666" vmhwm="506740" />
-        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1583504" vmpeak="1668700" vmrss="1108941" vmhwm="1108941" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992170" vmpeak="1004790" vmrss="275704" vmhwm="288189" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1487241" vmpeak="1487241" vmrss="1150458" vmhwm="1150458" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259122" vmpeak="1259122" vmrss="283545" vmhwm="286317" />
-        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1294259" vmpeak="1379456" vmrss="819712" vmhwm="819712" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1135388" vmpeak="1188803" vmrss="366688" vmhwm="384436" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1903132" vmpeak="1903132" vmrss="1341693" vmhwm="1509783" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1221381" vmpeak="1306578" vmrss="376038" vmhwm="384514" />
-        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1517360" vmpeak="1602556" vmrss="1041424" vmhwm="1041424" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="2658385" vmpeak="3374820" vmrss="1479264" vmhwm="2195507" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="3398751" vmpeak="3980990" vmrss="3009406" vmhwm="3589695" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2763358" vmpeak="3374727" vmrss="1996228" vmhwm="2195658" />
-        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="3381653" vmpeak="3900676" vmrss="2904111" vmhwm="3506760" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1254858" vmpeak="1436120" vmrss="461666" vmhwm="642226" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1880288" vmpeak="2024947" vmrss="1544847" vmhwm="1688965" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1529008" vmpeak="1529008" vmrss="505601" vmhwm="640972" />
-        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1560561" vmpeak="1620039" vmrss="1084423" vmhwm="1227179" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1467497" vmpeak="1765602" vmrss="637795" vmhwm="935719" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1611261" vmpeak="2008177" vmrss="1219769" vmhwm="1615723" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1771364" vmpeak="1771364" vmrss="805464" vmhwm="935511" />
-        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1605936" vmpeak="1895415" vmrss="1127750" vmhwm="1502191" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1436468" vmpeak="1623923" vmrss="753001" vmhwm="940030" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2477649" vmpeak="2606604" vmrss="1727107" vmhwm="1917645" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1704596" vmpeak="1704596" vmrss="763807" vmhwm="939510" />
-        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2069168" vmpeak="2154365" vmrss="1592208" vmhwm="1718236" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="755742" vmpeak="920202" vmrss="149593" vmhwm="149593" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="941834" vmpeak="941834" vmrss="605690" vmhwm="605690" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1012616" vmpeak="1012616" vmrss="154793" vmhwm="154793" />
-        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="827018" vmpeak="912215" vmrss="350012" vmhwm="350012" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="833872" vmpeak="886454" vmrss="162780" vmhwm="214853" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1017369" vmpeak="1055308" vmrss="681980" vmhwm="719721" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="920363" vmpeak="1005560" vmrss="167133" vmhwm="214895" />
-        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="898206" vmpeak="983403" vmrss="419707" vmhwm="455660" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="980382" vmpeak="1099368" vmrss="295952" vmhwm="414325" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1322427" vmpeak="1407354" vmrss="987646" vmhwm="1072141" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1248421" vmpeak="1248421" vmrss="307860" vmhwm="415298" />
-        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1158170" vmpeak="1243366" vmrss="680934" vmhwm="763703" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1188829" vmpeak="1392934" vmrss="513037" vmhwm="716632" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1859291" vmpeak="1997377" vmrss="1524088" vmhwm="1661504" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1456962" vmpeak="1456962" vmrss="521965" vmhwm="715650" />
-        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1605110" vmpeak="1690306" vmrss="1127874" vmhwm="1262539" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="694122" vmpeak="774706" vmrss="35958" vmhwm="35958" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="617312" vmpeak="617312" vmrss="281574" vmhwm="281574" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="961006" vmpeak="1046203" vmrss="35443" vmhwm="35443" />
-        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="610729" vmpeak="695926" vmrss="132324" vmhwm="132324" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720948" vmpeak="795828" vmrss="98992" vmhwm="98992" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="770952" vmpeak="770952" vmrss="435333" vmhwm="435333" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="987984" vmpeak="1073181" vmrss="103136" vmhwm="103136" />
-        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727896" vmpeak="813092" vmrss="252522" vmhwm="252522" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727100" vmpeak="727100" vmrss="92372" vmhwm="92372" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="858800" vmpeak="858800" vmrss="523712" vmhwm="523712" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="994151" vmpeak="1079348" vmrss="100588" vmhwm="100588" />
-        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="763750" vmpeak="848946" vmrss="288984" vmhwm="288984" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="774893" vmrss="34673" vmhwm="34673" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631940" vmpeak="631940" vmrss="288189" vmhwm="288189" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="960580" vmpeak="1045777" vmrss="35604" vmhwm="35604" />
-        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618436" vmpeak="703632" vmrss="140368" vmhwm="140368" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="866314" vmrss="43825" vmhwm="43825" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="728395" vmpeak="756038" vmrss="383780" vmhwm="410545" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="979997" vmpeak="979997" vmrss="128320" vmhwm="128320" />
-        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="763287" vmpeak="848484" vmrss="284648" vmhwm="284648" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30253" vmhwm="30253" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="520577" vmpeak="523374" vmrss="126614" vmhwm="129084" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="963367" vmpeak="1048564" vmrss="33337" vmhwm="33337" />
-        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605597" vmpeak="690794" vmrss="128091" vmhwm="129911" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="1141790" vmpeak="1336405" vmrss="431813" vmhwm="626236" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="1443811" vmpeak="1566063" vmrss="1055756" vmhwm="1177592" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1409517" vmpeak="1409517" vmrss="472004" vmhwm="625461" />
-        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1361157" vmpeak="1446354" vmrss="883168" vmhwm="1005030" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="1125716" vmpeak="1312344" vmrss="413764" vmhwm="600215" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="1426141" vmpeak="1538960" vmrss="1037488" vmhwm="1149792" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1212156" vmpeak="1312438" vmrss="455239" vmhwm="601276" />
-        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1337679" vmpeak="1365301" vmrss="859944" vmhwm="972233" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="1299688" vmpeak="1563577" vmrss="586242" vmhwm="849924" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1812174" vmpeak="1997912" vmrss="1424103" vmhwm="1609166" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1386018" vmpeak="1563577" vmrss="626147" vmhwm="849420" />
-        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1652414" vmpeak="1755286" vmrss="1174087" vmhwm="1361599" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="1287572" vmpeak="1580612" vmrss="624582" vmhwm="917441" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="1513813" vmpeak="1998531" vmrss="1151737" vmhwm="1636216" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1464517" vmpeak="1580597" vmrss="626922" vmhwm="916905" />
-        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1498551" vmpeak="1889992" vmrss="1020489" vmhwm="1496653" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="746007" vmpeak="746007" vmrss="136240" vmhwm="136240" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="926957" vmpeak="926957" vmrss="577309" vmhwm="577309" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1013547" vmpeak="1013547" vmrss="142885" vmhwm="142885" />
-        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="813794" vmpeak="898991" vmrss="336570" vmhwm="336570" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="824631" vmpeak="897722" vmrss="151590" vmhwm="210714" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="838567" vmpeak="891956" vmrss="503739" vmhwm="557273" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="910988" vmpeak="996184" vmrss="158886" vmhwm="211936" />
-        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="818776" vmpeak="903973" vmrss="341322" vmhwm="391955" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1184934" vmpeak="1406100" vmrss="511170" vmhwm="731827" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1640386" vmpeak="1850810" vmrss="1305855" vmhwm="1515966" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1452578" vmpeak="1452578" vmrss="518258" vmhwm="732508" />
-        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1479166" vmpeak="1604392" vmrss="1000901" vmhwm="1210248" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1360918" vmpeak="1658852" vmrss="684892" vmhwm="982316" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2023595" vmpeak="2311010" vmrss="1620923" vmhwm="1906216" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628577" vmpeak="1713774" vmrss="691672" vmhwm="982930" />
-        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814176" vmpeak="2016393" vmrss="1336238" vmhwm="1622244" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="2119015" vmpeak="2465268" vmrss="1307748" vmhwm="1653490" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="3063808" vmpeak="3522360" vmrss="2673543" vmhwm="3130623" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2386618" vmpeak="2465538" vmrss="1321663" vmhwm="1652372" />
-        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2799269" vmpeak="3172618" vmrss="2321664" vmhwm="2777736" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="974698" vmpeak="1100762" vmrss="304220" vmhwm="429774" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1173671" vmpeak="1286625" vmrss="838682" vmhwm="951636" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1242233" vmpeak="1242233" vmrss="310086" vmhwm="429150" />
-        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1090726" vmpeak="1175922" vmrss="613813" vmhwm="726200" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="870022" vmpeak="924336" vmrss="179088" vmhwm="232892" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="1125753" vmpeak="1166344" vmrss="786666" vmhwm="827138" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="1137541" vmpeak="1137541" vmrss="184485" vmhwm="232949" />
-        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="955177" vmpeak="1040374" vmrss="477032" vmhwm="519178" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="1074985" vmpeak="1208168" vmrss="344406" vmhwm="477089" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="1383397" vmpeak="1496918" vmrss="980408" vmhwm="1092702" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1168200" vmpeak="1253397" vmrss="374275" vmhwm="477698" />
-        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1240657" vmpeak="1325854" vmrss="762725" vmhwm="854386" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713351" vmpeak="787898" vmrss="52858" vmhwm="52858" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="719794" vmpeak="719794" vmrss="384508" vmhwm="384508" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="980522" vmpeak="980522" vmrss="59456" vmhwm="59456" />
-        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="686613" vmpeak="771810" vmrss="211426" vmhwm="211426" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705796" vmpeak="705796" vmrss="52405" vmhwm="52405" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724984" vmpeak="724984" vmrss="390031" vmhwm="390031" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791918" vmpeak="877115" vmrss="56269" vmhwm="56269" />
-        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674590" vmpeak="759787" vmrss="199139" vmhwm="199139" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="850278" vmpeak="901976" vmrss="168672" vmhwm="218660" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="1092353" vmpeak="1123298" vmrss="689566" vmhwm="762699" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="1118015" vmpeak="1118015" vmrss="177444" vmhwm="218670" />
-        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="944564" vmpeak="1029761" vmrss="467672" vmhwm="495326" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="740069" vmpeak="740069" vmrss="128315" vmhwm="128315" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="791986" vmpeak="791986" vmrss="456830" vmhwm="456830" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="1188891" vmpeak="1274088" vmrss="138252" vmhwm="138252" />
-        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="819218" vmpeak="904415" vmrss="342066" vmhwm="342066" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="740714" vmpeak="803946" vmrss="126521" vmhwm="126521" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="925225" vmpeak="925225" vmrss="519417" vmhwm="586206" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="1008446" vmpeak="1093643" vmrss="135714" vmhwm="135714" />
-        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="824470" vmpeak="909667" vmrss="348103" vmhwm="348103" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046843" vmpeak="1178897" vmrss="308848" vmhwm="440377" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1151961" vmpeak="1168070" vmrss="815692" vmhwm="831932" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1321751" vmpeak="1321751" vmrss="373412" vmhwm="440299" />
-        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1200820" vmpeak="1286017" vmrss="725717" vmhwm="734500" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="1186697" vmpeak="1322895" vmrss="323164" vmhwm="457116" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="1522606" vmpeak="1522606" vmrss="1120277" vmhwm="1120277" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1288424" vmpeak="1373621" vmrss="500370" vmhwm="500370" />
-        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1449448" vmpeak="1534644" vmrss="973845" vmhwm="973845" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133809" vmpeak="2836407" vmrss="1438444" vmhwm="2140850" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707359" vmpeak="3834188" vmrss="2314816" vmhwm="3441464" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401339" vmpeak="3101945" vmrss="1469098" vmhwm="2139987" />
-        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2792654" vmpeak="3834136" vmrss="2314577" vmhwm="3440408" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188804" vmpeak="2918375" vmrss="1492623" vmhwm="2222001" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2898989" vmpeak="4025117" vmrss="2481081" vmhwm="3626459" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2275379" vmpeak="2918474" vmrss="1523834" vmhwm="2221715" />
-        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2876250" vmpeak="3944834" vmrss="2398682" vmhwm="3551002" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="873480" vmpeak="943924" vmrss="196320" vmhwm="266656" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="1067367" vmpeak="1101604" vmrss="730048" vmhwm="764051" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="961745" vmpeak="1046942" vmrss="212149" vmhwm="266546" />
-        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="976471" vmpeak="1061668" vmrss="499335" vmhwm="528736" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="1428580" vmpeak="1776923" vmrss="741670" vmhwm="1089587" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="1842729" vmpeak="2177494" vmrss="1452183" vmhwm="1785934" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1514890" vmpeak="1776834" vmrss="756730" vmhwm="1088464" />
-        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1753476" vmpeak="2003045" vmrss="1275523" vmhwm="1608807" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3478618" vmpeak="4858219" vmrss="2796794" vmhwm="4176062" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4842442" vmpeak="6987687" vmrss="4397738" vmhwm="6544928" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3567340" vmpeak="4858193" vmrss="2814666" vmhwm="4176177" />
-        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4814217" vmpeak="6932785" vmrss="4335193" vmhwm="6538194" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="998956" vmpeak="1136428" vmrss="307600" vmhwm="444735" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1052719" vmpeak="1232316" vmrss="717854" vmhwm="897540" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1258004" vmpeak="1258004" vmrss="326175" vmhwm="443996" />
-        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1059619" vmpeak="1138789" vmrss="582155" vmhwm="745664" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1249211" vmpeak="1506304" vmrss="550752" vmhwm="807762" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1492743" vmpeak="1714642" vmrss="1095354" vmhwm="1316988" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1427483" vmpeak="1512680" vmrss="582514" vmhwm="806858" />
-        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1456343" vmpeak="1595287" vmrss="978369" vmhwm="1201579" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="871930" vmpeak="952359" vmrss="193388" vmhwm="273634" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="878768" vmpeak="973180" vmrss="533348" vmhwm="627848" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="959909" vmpeak="1045106" vmrss="208156" vmhwm="273530" />
-        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="883818" vmpeak="969014" vmrss="406442" vmhwm="476595" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388405" vmpeak="1700311" vmrss="680352" vmhwm="991998" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1970503" vmpeak="2164422" vmrss="1583935" vmhwm="1777209" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1661649" vmpeak="1746846" vmrss="723148" vmhwm="991354" />
-        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1812694" vmpeak="1917910" vmrss="1335609" vmhwm="1524931" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="1321320" vmpeak="1630896" vmrss="658730" vmhwm="968125" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="1563660" vmpeak="2064852" vmrss="1226097" vmhwm="1727050" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1679251" vmpeak="1849645" vmrss="659406" vmhwm="966815" />
-        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1557181" vmpeak="1973176" vmrss="1079998" vmhwm="1579983" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="908549" vmpeak="908549" vmrss="180804" vmhwm="180804" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1315620" vmpeak="1315620" vmrss="978213" vmhwm="978213" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1170239" vmpeak="1255436" vmrss="189326" vmhwm="189326" />
-        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1068553" vmpeak="1153750" vmrss="590298" vmhwm="590298" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="1160718" vmpeak="1262736" vmrss="405376" vmhwm="507317" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1898410" vmpeak="1898410" vmrss="1560884" vmhwm="1560884" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1240917" vmpeak="1326114" vmrss="419094" vmhwm="507306" />
-        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1594502" vmpeak="1679698" vmrss="1116954" vmhwm="1116954" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="991671" vmpeak="1004291" vmrss="275397" vmhwm="287918" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1616690" vmpeak="1618188" vmrss="1278908" vmhwm="1280494" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1258623" vmpeak="1258623" vmrss="284320" vmhwm="287606" />
-        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1303156" vmpeak="1388353" vmrss="824928" vmhwm="824928" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="1134889" vmpeak="1188636" vmrss="367130" vmhwm="384935" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1865047" vmpeak="1865047" vmrss="1527947" vmhwm="1527947" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1220882" vmpeak="1306078" vmrss="376006" vmhwm="384217" />
-        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1551019" vmpeak="1636216" vmrss="1071928" vmhwm="1071928" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="1255898" vmpeak="1437160" vmrss="461385" vmhwm="642049" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1800479" vmpeak="1945580" vmrss="1462780" vmhwm="1607470" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1530053" vmpeak="1530053" vmrss="505570" vmhwm="641368" />
-        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1561955" vmpeak="1619753" vmrss="1084324" vmhwm="1225473" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2833797" vmpeak="3516609" vmrss="1409798" vmhwm="2092417" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="4293634" vmpeak="4293634" vmrss="3955525" vmhwm="3955525" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="3022032" vmpeak="3516609" vmrss="2255333" vmhwm="2255333" />
-        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="4277993" vmpeak="4363190" vmrss="3799333" vmhwm="3799333" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="1066384" vmpeak="1233736" vmrss="390972" vmhwm="557528" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="1358442" vmpeak="1615062" vmrss="1020947" vmhwm="1273121" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1243392" vmpeak="1328589" vmrss="398580" vmhwm="558469" />
-        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1256070" vmpeak="1398212" vmrss="778549" vmhwm="1001192" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437560" vmpeak="1625010" vmrss="754254" vmhwm="941142" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2281713" vmpeak="2410668" vmrss="1943780" vmhwm="2072428" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524473" vmpeak="1625005" vmrss="763001" vmhwm="940264" />
-        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2070671" vmpeak="2155868" vmrss="1593108" vmhwm="1719125" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="835629" vmpeak="889226" vmrss="164216" vmhwm="217245" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="983507" vmpeak="1024665" vmrss="645985" vmhwm="686930" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="927451" vmpeak="1012648" vmrss="168360" vmhwm="216569" />
-        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="900712" vmpeak="985909" vmrss="423519" vmhwm="463533" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="980636" vmpeak="1099706" vmrss="296680" vmhwm="415194" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="1326213" vmpeak="1409371" vmrss="988488" vmhwm="1071366" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1248691" vmpeak="1248691" vmrss="306857" vmhwm="414752" />
-        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1163032" vmpeak="1248228" vmrss="685843" vmhwm="765507" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189531" vmpeak="1393636" vmrss="513661" vmhwm="717204" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1866176" vmpeak="2002847" vmrss="1528664" vmhwm="1664577" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1457669" vmpeak="1457669" vmrss="523811" vmhwm="715837" />
-        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1606243" vmpeak="1691440" vmrss="1129185" vmhwm="1262534" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="1521920" vmpeak="1894167" vmrss="814210" vmhwm="1185704" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1961772" vmpeak="2317998" vmrss="1623268" vmhwm="1979062" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1789325" vmpeak="1894157" vmrss="828328" vmhwm="1185480" />
-        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1951877" vmpeak="2240295" vmrss="1479337" vmhwm="1843041" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="1427384" vmpeak="1755920" vmrss="719097" vmhwm="1047295" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="2059070" vmpeak="2371101" vmrss="1721616" vmhwm="2033194" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1694035" vmpeak="1779232" vmrss="732596" vmhwm="1046208" />
-        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1863825" vmpeak="2084664" vmrss="1386002" vmhwm="1691248" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="720959" vmpeak="795839" vmrss="98898" vmhwm="98898" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="749106" vmpeak="749106" vmrss="411049" vmhwm="411049" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="806941" vmpeak="806941" vmrss="104702" vmhwm="104702" />
-        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="727818" vmpeak="813014" vmrss="252787" vmhwm="252787" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="727116" vmpeak="793010" vmrss="92508" vmhwm="92508" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="817554" vmpeak="817554" vmrss="479762" vmhwm="479762" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="813108" vmpeak="898305" vmrss="99481" vmhwm="99481" />
-        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="765070" vmpeak="850267" vmrss="290040" vmhwm="290040" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="694023" vmpeak="694023" vmrss="34377" vmhwm="34377" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="631919" vmpeak="631919" vmrss="294070" vmhwm="294070" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="779532" vmpeak="864728" vmrss="36524" vmhwm="36524" />
-        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="618586" vmpeak="703783" vmrss="140582" vmhwm="140582" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="783447" vmpeak="783447" vmrss="42936" vmhwm="42936" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="724302" vmpeak="724302" vmrss="386261" vmhwm="386339" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="1070524" vmpeak="1155720" vmrss="129376" vmhwm="129376" />
-        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="762933" vmpeak="848130" vmrss="284216" vmhwm="284216" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="691485" vmpeak="691485" vmrss="30700" vmhwm="30700" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="588270" vmpeak="610240" vmrss="250692" vmhwm="269453" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="958042" vmpeak="958042" vmrss="30908" vmhwm="30908" />
-        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="605176" vmpeak="690372" vmrss="127602" vmhwm="129365" />
-        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="732747" vmpeak="732747" vmrss="146874" vmhwm="146874" />
-        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="778096" vmpeak="778096" vmrss="439654" vmhwm="439654" />
-        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="818864" vmpeak="904061" vmrss="148220" vmhwm="148220" />
-        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="781279" vmpeak="866476" vmrss="323528" vmhwm="323528" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="739559" vmpeak="739559" vmrss="67152" vmhwm="67152" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="769938" vmpeak="769938" vmrss="431922" vmhwm="431922" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="1007323" vmpeak="1007323" vmrss="99127" vmhwm="99127" />
-        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="760047" vmpeak="845244" vmrss="281866" vmhwm="281866" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1219296" vmpeak="1440462" vmrss="513271" vmhwm="733850" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1693062" vmpeak="1898192" vmrss="1355270" vmhwm="1559838" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1305881" vmpeak="1440556" vmrss="527399" vmhwm="732924" />
-        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1500881" vmpeak="1620819" vmrss="1022845" vmhwm="1226721" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1406802" vmpeak="1704736" vmrss="687445" vmhwm="984760" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2147516" vmpeak="2429642" vmrss="1810073" vmhwm="2091382" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1674363" vmpeak="1759560" vmrss="702972" vmhwm="984744" />
-        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1849614" vmpeak="2046543" vmrss="1371458" vmhwm="1652222" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1218568" vmpeak="1439734" vmrss="513505" vmhwm="734136" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1688476" vmpeak="1897693" vmrss="1350502" vmhwm="1559168" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1305106" vmpeak="1439828" vmrss="526188" vmhwm="732721" />
-        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1498400" vmpeak="1619649" vmrss="1021170" vmhwm="1226201" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1406007" vmpeak="1703941" vmrss="687798" vmhwm="985082" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2132431" vmpeak="2419976" vmrss="1795331" vmhwm="2082298" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1673562" vmpeak="1758759" vmrss="702202" vmhwm="984557" />
-        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1852832" vmpeak="2055175" vmrss="1375025" vmhwm="1661046" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="1214486" vmpeak="1422704" vmrss="531008" vmhwm="738576" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1653386" vmpeak="1850721" vmrss="1316047" vmhwm="1513090" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1307545" vmpeak="1422720" vmrss="553290" vmhwm="739018" />
-        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1505826" vmpeak="1597455" vmrss="1028154" vmhwm="1203888" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="1639840" vmpeak="2058960" vmrss="933025" vmhwm="1351495" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="2290340" vmpeak="2674006" vmrss="1952048" vmhwm="2335455" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1914021" vmpeak="2149482" vmrss="959363" vmhwm="1351006" />
-        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="2119436" vmpeak="2416320" vmrss="1662554" vmhwm="2022462" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="705806" vmpeak="780353" vmrss="52806" vmhwm="52806" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="700835" vmpeak="700835" vmrss="362949" vmhwm="362949" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="791934" vmpeak="791934" vmrss="56794" vmhwm="56794" />
-        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674611" vmpeak="759808" vmrss="198120" vmhwm="198120" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="1046858" vmpeak="1178912" vmrss="308542" vmhwm="439483" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="1226248" vmpeak="1247022" vmrss="889018" vmhwm="909454" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1140729" vmpeak="1225926" vmrss="372574" vmhwm="439826" />
-        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1199894" vmpeak="1285091" vmrss="724178" vmhwm="734505" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2134158" vmpeak="2836756" vmrss="1438309" vmhwm="2140715" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2781932" vmpeak="3912818" vmrss="2443178" vmhwm="3574105" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2220634" vmpeak="2836865" vmrss="1468797" vmhwm="2139722" />
-        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2790174" vmpeak="3834277" vmrss="2311826" vmhwm="3439888" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2189153" vmpeak="2918723" vmrss="1491048" vmhwm="2220868" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2869105" vmpeak="4001228" vmrss="2531100" vmhwm="3662869" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2366254" vmpeak="2918817" vmrss="1523605" vmhwm="2221388" />
-        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877716" vmpeak="3944751" vmrss="2400091" vmhwm="3551449" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="3569482" vmpeak="4949084" vmrss="2797106" vmhwm="4176364" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="4819713" vmpeak="6984764" vmrss="4481042" vmhwm="6645126" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3929790" vmpeak="4858536" vmrss="2814931" vmhwm="4176198" />
-        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4816962" vmpeak="6932770" vmrss="4337715" vmhwm="6538006" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="1172662" vmpeak="1401509" vmrss="491966" vmhwm="720564" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="1345822" vmpeak="1585391" vmrss="1008384" vmhwm="1247916" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1442381" vmpeak="1442381" vmrss="510697" vmhwm="720267" />
-        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1348219" vmpeak="1513917" vmrss="870485" vmhwm="1120215" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="1106159" vmpeak="1204460" vmrss="268408" vmhwm="366470" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="1568190" vmpeak="1568190" vmrss="1230538" vmhwm="1230538" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1395617" vmpeak="1395617" vmrss="399692" vmhwm="399692" />
-        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1513621" vmpeak="1598818" vmrss="1035897" vmhwm="1035897" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="1108187" vmpeak="1206488" vmrss="271648" vmhwm="369590" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="2870816" vmpeak="2870816" vmrss="1290972" vmhwm="1290972" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1396408" vmpeak="1396408" vmrss="396172" vmhwm="396172" />
-        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2778490" vmpeak="2863686" vmrss="2307058" vmhwm="2307058" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="979706" vmpeak="1098692" vmrss="295682" vmhwm="414247" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1303499" vmpeak="1390069" vmrss="965224" vmhwm="1051580" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1247750" vmpeak="1247750" vmrss="307928" vmhwm="415266" />
-        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1160265" vmpeak="1245462" vmrss="682354" vmhwm="766100" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304610" vmhwm="430336" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="1170265" vmpeak="1281675" vmrss="833180" vmhwm="944299" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1253189" vmpeak="1253189" vmrss="316373" vmhwm="429618" />
-        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1091214" vmpeak="1176411" vmrss="613095" vmhwm="724110" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="985660" vmpeak="1111723" vmrss="304772" vmhwm="430414" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="1150806" vmpeak="1261878" vmrss="813394" vmhwm="924123" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1253194" vmpeak="1253194" vmrss="315463" vmhwm="428974" />
-        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1090070" vmpeak="1175267" vmrss="612274" vmhwm="722924" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="705577" vmpeak="780457" vmrss="53320" vmhwm="53320" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="716476" vmpeak="716476" vmrss="378487" vmhwm="378487" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="972613" vmpeak="1057810" vmrss="57033" vmhwm="57033" />
-        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="672594" vmpeak="757790" vmrss="194183" vmhwm="194183" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="1863586" vmpeak="2298270" vmrss="1166578" vmhwm="1601236" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="3438385" vmpeak="3992487" vmrss="3100890" vmhwm="3654268" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="2136893" vmpeak="2298270" vmrss="1177888" vmhwm="1601350" />
-        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2866156" vmpeak="3332056" vmrss="2390778" vmhwm="2939315" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="1795970" vmpeak="2230654" vmrss="1095978" vmhwm="1530557" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="3373229" vmpeak="3883687" vmrss="3035104" vmhwm="3545068" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="2069298" vmpeak="2230675" vmrss="1108967" vmhwm="1530178" />
-        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2783367" vmpeak="3206626" vmrss="2308222" vmhwm="2813283" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="1389767" vmpeak="1653657" vmrss="587459" vmhwm="851136" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1997091" vmpeak="1999374" vmrss="1659538" vmhwm="1661498" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1660250" vmpeak="1660250" vmrss="717350" vmhwm="850948" />
-        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1842703" vmpeak="1927900" vmrss="1363991" vmhwm="1363991" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="783562" vmpeak="783562" vmrss="74089" vmhwm="74089" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="976300" vmpeak="976300" vmrss="639132" vmhwm="639132" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="1055204" vmpeak="1140401" vmrss="135018" vmhwm="135018" />
-        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="895616" vmpeak="980813" vmrss="418631" vmhwm="418631" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="903520" vmpeak="903520" vmrss="182405" vmhwm="182405" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1300780" vmpeak="1300780" vmrss="963144" vmhwm="963144" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="1261171" vmpeak="1346368" vmrss="191354" vmhwm="191354" />
-        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1066088" vmpeak="1151285" vmrss="588608" vmhwm="588608" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="992097" vmpeak="1004718" vmrss="276021" vmhwm="288532" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1673510" vmpeak="1686178" vmrss="1335256" vmhwm="1346415" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1259304" vmpeak="1259304" vmrss="285667" vmhwm="288584" />
-        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1318803" vmpeak="1404000" vmrss="840652" vmhwm="840652" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="742190" vmpeak="801429" vmrss="120036" vmhwm="120036" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="917155" vmpeak="917155" vmrss="580470" vmhwm="580470" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="828079" vmpeak="828079" vmrss="124950" vmhwm="124950" />
-        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="798803" vmpeak="884000" vmrss="322223" vmhwm="322223" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="1036542" vmpeak="1123340" vmrss="332675" vmhwm="418984" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1419095" vmpeak="1503018" vmrss="1081142" vmhwm="1164966" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1122513" vmpeak="1207710" vmrss="333564" vmhwm="417877" />
-        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1206654" vmpeak="1291851" vmrss="729799" vmhwm="812141" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2502557" vmpeak="2710479" vmrss="803394" vmhwm="1011098" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4844647" vmpeak="4844647" vmrss="4505820" vmhwm="4505820" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="927518" vmpeak="990735" vmrss="192327" vmhwm="255424" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1410156" vmpeak="1410156" vmrss="1071818" vmhwm="1071818" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1348308" vmpeak="1587736" vmrss="555162" vmhwm="794456" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2073328" vmpeak="2139914" vmrss="1735650" vmhwm="1801794" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="1137926" vmpeak="1282252" vmrss="347172" vmhwm="491384" />
-        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="1528581" vmpeak="1558133" vmrss="1191273" vmhwm="1220918" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="1064445" vmpeak="1124276" vmrss="233131" vmhwm="292728" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="1608666" vmpeak="1608666" vmrss="1270744" vmhwm="1270744" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1209941" vmpeak="1295138" vmrss="396422" vmhwm="396422" />
-        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1593238" vmpeak="1678435" vmrss="1137583" vmhwm="1257484" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="713814" vmpeak="788028" vmrss="53034" vmhwm="53034" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="701729" vmpeak="701729" vmrss="363578" vmhwm="363578" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="799869" vmpeak="885066" vmrss="59810" vmhwm="59810" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="687694" vmpeak="772891" vmrss="209248" vmhwm="209248" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="706258" vmpeak="780140" vmrss="52884" vmhwm="52884" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="705052" vmpeak="705052" vmrss="367395" vmhwm="367395" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="973367" vmpeak="1058564" vmrss="56414" vmhwm="56414" />
-        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677320" vmpeak="762517" vmrss="198619" vmhwm="198619" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="1437061" vmpeak="1624516" vmrss="755024" vmhwm="942141" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="2478034" vmpeak="2597150" vmrss="2139680" vmhwm="2258219" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1524120" vmpeak="1624521" vmrss="762559" vmhwm="940914" />
-        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2100274" vmpeak="2185471" vmrss="1622847" vmhwm="1739566" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="748534" vmpeak="809437" vmrss="143514" vmhwm="143514" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="943758" vmpeak="943758" vmrss="606392" vmhwm="606392" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1015783" vmpeak="1015783" vmrss="147118" vmhwm="147118" />
-        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="835073" vmpeak="920270" vmrss="357146" vmhwm="357146" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="834953" vmpeak="887541" vmrss="164626" vmhwm="217001" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="1034649" vmpeak="1064835" vmrss="696592" vmhwm="726694" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="921081" vmpeak="1006278" vmrss="167502" vmhwm="215597" />
-        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="911310" vmpeak="996507" vmrss="433617" vmhwm="464682" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="971453" vmpeak="1081683" vmrss="305390" vmhwm="415204" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="1332598" vmpeak="1413375" vmrss="995165" vmhwm="1075859" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1148685" vmpeak="1233882" vmrss="314220" vmhwm="414882" />
-        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1167634" vmpeak="1252830" vmrss="689416" vmhwm="769002" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="1189630" vmpeak="1393740" vmrss="511908" vmhwm="715540" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1867418" vmpeak="2007080" vmrss="1529990" vmhwm="1668929" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1367256" vmpeak="1452453" vmrss="523946" vmhwm="715577" />
-        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1611350" vmpeak="1696546" vmrss="1133615" vmhwm="1270427" />
-        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="2715268" vmpeak="3061650" vmrss="776375" vmhwm="1122695" />
-        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="4160156" vmpeak="4971210" vmrss="3823164" vmhwm="4634151" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="701350" vmpeak="776562" vmrss="42281" vmhwm="42281" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="717771" vmpeak="717771" vmrss="379501" vmhwm="379501" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="786552" vmpeak="786552" vmrss="42406" vmhwm="42406" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="656084" vmpeak="741280" vmrss="177543" vmhwm="177543" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="705936" vmpeak="781149" vmrss="55619" vmhwm="55619" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="724765" vmpeak="724765" vmrss="386458" vmhwm="386458" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="791554" vmpeak="791554" vmrss="55582" vmhwm="55582" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="670987" vmpeak="756184" vmrss="193029" vmhwm="193029" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="720673" vmpeak="720673" vmrss="99512" vmhwm="99512" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="771253" vmpeak="771253" vmrss="433087" vmhwm="433087" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="987828" vmpeak="1073025" vmrss="104005" vmhwm="104005" />
-        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="726986" vmpeak="812182" vmrss="248450" vmhwm="248450" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="726554" vmpeak="793447" vmrss="91452" vmhwm="91452" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="857027" vmpeak="857027" vmrss="519630" vmhwm="519630" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="812619" vmpeak="897816" vmrss="100895" vmhwm="100895" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="764800" vmpeak="849997" vmrss="287019" vmhwm="287019" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="739960" vmpeak="739960" vmrss="134924" vmhwm="134924" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="905439" vmpeak="905439" vmrss="567876" vmhwm="567876" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="825988" vmpeak="891722" vmrss="144684" vmhwm="144684" />
-        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="821251" vmpeak="906448" vmrss="343085" vmhwm="343085" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="1026407" vmpeak="1026407" vmrss="351535" vmhwm="351535" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="1104485" vmpeak="1149496" vmrss="766740" vmhwm="811642" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1209280" vmpeak="1209280" vmrss="362325" vmhwm="362325" />
-        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1105275" vmpeak="1190472" vmrss="627822" vmhwm="671450" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="988072" vmpeak="1114146" vmrss="304798" vmhwm="430279" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="1171383" vmpeak="1282325" vmrss="833705" vmhwm="944476" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1164982" vmpeak="1250178" vmrss="319394" vmhwm="429904" />
-        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1090481" vmpeak="1115056" vmrss="613485" vmhwm="722176" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="1185163" vmpeak="1406329" vmrss="511669" vmhwm="732674" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1646897" vmpeak="1857653" vmrss="1308538" vmhwm="1518940" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1361906" vmpeak="1447102" vmrss="515138" vmhwm="731073" />
-        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1486612" vmpeak="1612171" vmrss="1008602" vmhwm="1218973" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="1361328" vmpeak="1659262" vmrss="685287" vmhwm="983091" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="2053204" vmpeak="2340951" vmrss="1714788" vmhwm="2002072" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1628504" vmpeak="1713701" vmrss="690892" vmhwm="983257" />
-        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1817290" vmpeak="2019841" vmrss="1338792" vmhwm="1625405" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="980148" vmpeak="1106211" vmrss="304340" vmhwm="430242" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="1177410" vmpeak="1291040" vmrss="839217" vmhwm="952868" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1060997" vmpeak="1146194" vmrss="308906" vmhwm="429811" />
-        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1094189" vmpeak="1123038" vmrss="616548" vmhwm="730298" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="1217086" vmpeak="1438262" vmrss="515611" vmhwm="736502" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1721532" vmpeak="1922648" vmrss="1383304" vmhwm="1584195" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1394296" vmpeak="1479493" vmrss="530197" vmhwm="735883" />
-        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1533625" vmpeak="1649492" vmrss="1055813" vmhwm="1256236" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="1664005" vmpeak="1929070" vmrss="791611" vmhwm="988280" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="2054062" vmpeak="2324472" vmrss="1715776" vmhwm="1985344" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1750642" vmpeak="1750642" vmrss="806811" vmhwm="988041" />
-        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1905020" vmpeak="2088814" vmrss="1426682" vmhwm="1694347" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="994541" vmpeak="1120615" vmrss="307034" vmhwm="432806" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="1212042" vmpeak="1312194" vmrss="874780" vmhwm="974438" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1081334" vmpeak="1166531" vmrss="322436" vmhwm="432702" />
-        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1116720" vmpeak="1132315" vmrss="638097" vmhwm="738348" />
-        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="1467762" vmpeak="1671108" vmrss="691412" vmhwm="894509" />
-        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="2625381" vmpeak="2732168" vmrss="2288915" vmhwm="2392494" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="713590" vmpeak="788138" vmrss="53216" vmhwm="53216" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="724427" vmpeak="724427" vmrss="386354" vmhwm="386354" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="799604" vmpeak="799604" vmrss="59534" vmhwm="59534" />
-        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="685677" vmpeak="770874" vmrss="206845" vmhwm="206845" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="832010" vmpeak="832010" vmrss="144367" vmhwm="144367" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="920249" vmpeak="920249" vmrss="582769" vmhwm="582769" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="1009200" vmpeak="1094397" vmrss="156052" vmhwm="156052" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="851666" vmpeak="936863" vmrss="374660" vmhwm="374660" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="1357855" vmpeak="1537842" vmrss="428038" vmhwm="602841" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1748255" vmpeak="1748255" vmrss="1410474" vmhwm="1410474" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1539933" vmpeak="1625130" vmrss="506157" vmhwm="602326" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597762" vmpeak="1597762" vmrss="1125956" vmhwm="1125956" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="1508566" vmpeak="1688554" vmrss="427086" vmhwm="602414" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1694071" vmpeak="1694071" vmrss="1356300" vmhwm="1356300" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1418346" vmpeak="1507495" vmrss="498206" vmhwm="602238" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1533370" vmpeak="1618567" vmrss="1062006" vmhwm="1062006" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="912147" vmpeak="990698" vmrss="224068" vmhwm="302484" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="1144707" vmpeak="1222395" vmrss="807570" vmhwm="885076" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="998842" vmpeak="1048663" vmrss="239059" vmhwm="302291" />
-        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1054336" vmpeak="1139533" vmrss="577106" vmhwm="651913" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="1046905" vmpeak="1206301" vmrss="351400" vmhwm="510603" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="1199005" vmpeak="1333363" vmrss="861400" vmhwm="995815" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1132003" vmpeak="1217200" vmrss="380998" vmhwm="509615" />
-        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1174336" vmpeak="1259533" vmrss="696300" vmhwm="857849" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133768" vmpeak="2836366" vmrss="1437966" vmhwm="2140403" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2803710" vmpeak="3934762" vmrss="2464961" vmhwm="3596054" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2400741" vmpeak="2836230" vmrss="1468438" vmhwm="2139410" />
-        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793221" vmpeak="3855737" vmrss="2313766" vmhwm="3461135" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="2188924" vmpeak="2918494" vmrss="1491630" vmhwm="2221008" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="2899624" vmpeak="4031731" vmrss="2561410" vmhwm="3693086" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2274792" vmpeak="2918401" vmrss="1523438" vmhwm="2221039" />
-        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2877160" vmpeak="3966222" vmrss="2398546" vmhwm="3572186" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="1252357" vmpeak="1511010" vmrss="552931" vmhwm="811361" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="1481464" vmpeak="1701512" vmrss="1144072" vmhwm="1363939" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1340471" vmpeak="1510438" vmrss="585192" vmhwm="810186" />
-        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1465339" vmpeak="1601189" vmrss="987604" vmhwm="1207902" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="872019" vmpeak="952447" vmrss="192904" vmhwm="272953" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="876340" vmpeak="970054" vmrss="538460" vmhwm="632299" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="959992" vmpeak="1045189" vmrss="207662" vmhwm="273093" />
-        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="883292" vmpeak="968489" vmrss="405891" vmhwm="476907" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="1248988" vmpeak="1505738" vmrss="549031" vmhwm="805745" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="1459816" vmpeak="1681716" vmrss="1121952" vmhwm="1343638" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1337055" vmpeak="1506221" vmrss="582212" vmhwm="806447" />
-        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1456322" vmpeak="1589104" vmrss="977688" vmhwm="1194798" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="1388498" vmpeak="1700405" vmrss="680981" vmhwm="992706" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1904952" vmpeak="2102276" vmrss="1567898" vmhwm="1764921" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1486066" vmpeak="1705636" vmrss="724443" vmhwm="992409" />
-        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1809121" vmpeak="1916995" vmrss="1331512" vmhwm="1523137" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="803400" vmpeak="848244" vmrss="123765" vmhwm="168360" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="795683" vmpeak="825796" vmrss="458718" vmhwm="488498" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="892273" vmpeak="977470" vmrss="139048" vmhwm="168292" />
-        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="789438" vmpeak="874634" vmrss="312400" vmhwm="338832" />
+
      </models>
  </attributes>
 \ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml

index 1a50d72..6138a2f 100644 (file)
--- a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml
@@ -5,152 +5,181 @@
          <value>GPU</value>
      </devices>
      <models>
-        <value>caffe/FP32/alexnet/alexnet.xml</value>
-        <value>caffe/FP32/caffenet/caffenet.xml</value>
-        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
-        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
-        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
-        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
-        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
-        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
-        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
-        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
-        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
-        <value>caffe/FP32/lenet/lenet.xml</value>
-        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
-        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
-        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
-        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
-        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
-        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
-        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
-        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
-        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
-        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
-        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
-        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
-        <value>caffe/FP32/vgg16/vgg16.xml</value>
-        <value>caffe/FP32/vgg19/vgg19.xml</value>
-        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
-        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
-        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
-        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
-        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
-        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
-        <value>caffe/FP32/dilation/dilation.xml</value>
-        <value>caffe/FP32/dssd/dssd.xml</value>
-        <value>caffe/FP32/fcn8/fcn8.xml</value>
-        <value>caffe/FP32/fcn32/fcn32.xml</value>
-        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
-        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
-        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
-        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
-        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
-        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
-        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
-        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
-        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
-        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
-        <value>caffe/FP32/vnect/vnect.xml</value>
-        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
-        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
-        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
-        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
-        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
-        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
-        <value>tf/1.14.0/FP32/east/east.xml</value>
-        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
-        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
-        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
-        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
-        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
-        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
-        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
-        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
-        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
-        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
-        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
-        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
-        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
-        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
-        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
-        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
-        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
-        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
-        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
-        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
-        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
-        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
-        <value>mxnet/FP32/caffenet/caffenet.xml</value>
-        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
-        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
-        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
-        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
-        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
-        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
-        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
-        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
-        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
-        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
-        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
-        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
-        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
-        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
-        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
-        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
-        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
-        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
-        <value>mxnet/FP32/vgg16/vgg16.xml</value>
-        <value>mxnet/FP32/vgg19/vgg19.xml</value>
-        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
-        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
-        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
-        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
-        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
-        <value>mxnet/FP32/location_net/location_net.xml</value>
-        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
-        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
-        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
-        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
-        <value>mxnet/FP32/nin/nin.xml</value>
-        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
-        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
-        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
-        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
-        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
-        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
-        <value>onnx/FP32/retina_net/retina_net.xml</value>
-        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
-        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
-        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
-        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+        <value>public/mobilenet-v2-1.4-224/FP32/mobilenet-v2-1.4-224.xml</value>
+        <value>public/resnet-101/FP32/resnet-101.xml</value>
+        <value>public/brain-tumor-segmentation-0001/FP32/brain-tumor-segmentation-0001.xml</value>
+        <value>public/octave-resnet-101-0.125/FP32/octave-resnet-101-0.125.xml</value>
+        <value>public/faster_rcnn_inception_resnet_v2_atrous_coco/FP32/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>public/efficientnet-b7_auto_aug/FP32/efficientnet-b7_auto_aug.xml</value>
+        <value>public/yolo-v2-tf/FP32/yolo-v2-tf.xml</value>
+        <value>public/mobilenet-v2-1.0-224/FP32/mobilenet-v2-1.0-224.xml</value>
+        <value>public/colorization-v2-norebal/FP32/colorization-v2-norebal.xml</value>
+        <value>public/se-inception/FP32/se-inception.xml</value>
+        <value>public/efficientnet-b0/FP32/efficientnet-b0.xml</value>
+        <value>public/mobilenet-v1-1.0-224-tf/FP32/mobilenet-v1-1.0-224-tf.xml</value>
+        <value>public/mask_rcnn_resnet101_atrous_coco/FP32/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>public/ssd_mobilenet_v1_coco/FP32/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/se-resnet-152/FP32/se-resnet-152.xml</value>
+        <value>public/octave-resnext-50-0.25/FP32/octave-resnext-50-0.25.xml</value>
+        <value>public/googlenet-v3/FP32/googlenet-v3.xml</value>
+        <value>public/ssd_mobilenet_v2_coco/FP32/ssd_mobilenet_v2_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/license-plate-recognition-barrier-0007/FP32/license-plate-recognition-barrier-0007.xml</value>
+        <value>public/mobilenet-v1-0.50-224/FP32/mobilenet-v1-0.50-224.xml</value>
+        <value>public/ssd_mobilenet_v1_fpn_coco/FP32/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>public/vgg16/FP32/vgg16.xml</value>
+        <value>public/face-recognition-resnet34-arcface/FP32/face-recognition-resnet34-arcface.xml</value>
+        <value>public/gmcnn-places2-tf/FP32/gmcnn-places2-tf.xml</value>
+        <value>public/mobilenet-v1-1.0-224/FP32/mobilenet-v1-1.0-224.xml</value>
+        <value>public/se-resnet-101/FP32/se-resnet-101.xml</value>
+        <value>public/face-detection-retail-0044/FP32/face-detection-retail-0044.xml</value>
+        <value>public/face-recognition-mobilefacenet-arcface/FP32/face-recognition-mobilefacenet-arcface.xml</value>
+        <value>public/vehicle-license-plate-detection-barrier-0123/FP32/vehicle-license-plate-detection-barrier-0123.xml</value>
+        <value>public/densenet-161/FP32/densenet-161.xml</value>
+        <value>public/mask_rcnn_inception_resnet_v2_atrous_coco/FP32/mask_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>public/octave-resnext-101-0.25/FP32/octave-resnext-101-0.25.xml</value>
+        <value>public/face-recognition-resnet50-arcface/FP32/face-recognition-resnet50-arcface.xml</value>
+        <value>public/densenet-161-tf/FP32/densenet-161-tf.xml</value>
+        <value>public/octave-resnet-200-0.125/FP32/octave-resnet-200-0.125.xml</value>
+        <value>public/mtcnn-p/FP32/mtcnn-p.xml</value>
+        <value>public/se-resnext-101/FP32/se-resnext-101.xml</value>
+        <value>public/efficientnet-b5/FP32/efficientnet-b5.xml</value>
+        <value>public/densenet-169-tf/FP32/densenet-169-tf.xml</value>
+        <value>public/densenet-201/FP32/densenet-201.xml</value>
+        <value>public/resnet-50-tf/FP32/resnet-50-tf.xml</value>
+        <value>public/squeezenet1.1/FP32/squeezenet1.1.xml</value>
+        <value>public/squeezenet1.0/FP32/squeezenet1.0.xml</value>
+        <value>public/octave-resnet-26-0.25/FP32/octave-resnet-26-0.25.xml</value>
+        <value>public/googlenet-v4-tf/FP32/googlenet-v4-tf.xml</value>
+        <value>public/ssd300/FP32/ssd300.xml</value>
+        <value>public/rfcn-resnet101-coco-tf/FP32/rfcn-resnet101-coco-tf.xml</value>
+        <value>public/vgg19/FP32/vgg19.xml</value>
+        <value>public/ctdet_coco_dlav0_384/FP32/ctdet_coco_dlav0_384.xml</value>
+        <value>public/efficientnet-b0_auto_aug/FP32/efficientnet-b0_auto_aug.xml</value>
+        <value>public/googlenet-v1/FP32/googlenet-v1.xml</value>
+        <value>public/faster_rcnn_inception_v2_coco/FP32/faster_rcnn_inception_v2_coco.xml</value>
+        <value>public/mask_rcnn_inception_v2_coco/FP32/mask_rcnn_inception_v2_coco.xml</value>
+        <value>public/inception-resnet-v2-tf/FP32/inception-resnet-v2-tf.xml</value>
+        <value>public/deeplabv3/FP32/deeplabv3.xml</value>
+        <value>public/yolo-v3-tf/FP32/yolo-v3-tf.xml</value>
+        <value>public/resnet-152/FP32/resnet-152.xml</value>
+        <value>public/mtcnn-o/FP32/mtcnn-o.xml</value>
+        <value>public/octave-se-resnet-50-0.125/FP32/octave-se-resnet-50-0.125.xml</value>
+        <value>public/yolo-v1-tiny-tf/FP32/yolo-v1-tiny-tf.xml</value>
+        <value>public/resnet-50/FP32/resnet-50.xml</value>
+        <value>public/googlenet-v1-tf/FP32/googlenet-v1-tf.xml</value>
+        <value>public/yolo-v2-tiny-tf/FP32/yolo-v2-tiny-tf.xml</value>
+        <value>public/ssd512/FP32/ssd512.xml</value>
+        <value>public/densenet-169/FP32/densenet-169.xml</value>
+        <value>public/brain-tumor-segmentation-0002/FP32/brain-tumor-segmentation-0002.xml</value>
+        <value>public/Sphereface/FP32/Sphereface.xml</value>
+        <value>public/googlenet-v2/FP32/googlenet-v2.xml</value>
+        <value>public/face-recognition-resnet100-arcface/FP32/face-recognition-resnet100-arcface.xml</value>
+        <value>public/mobilenet-v1-0.25-128/FP32/mobilenet-v1-0.25-128.xml</value>
+        <value>public/ctdet_coco_dlav0_512/FP32/ctdet_coco_dlav0_512.xml</value>
+        <value>public/facenet-20180408-102900/FP32/facenet-20180408-102900.xml</value>
+        <value>public/ctpn/FP32/ctpn.xml</value>
+        <value>public/ssdlite_mobilenet_v2/FP32/ssdlite_mobilenet_v2.xml</value>
+        <value>public/i3d-rgb-tf/FP32/i3d-rgb-tf.xml</value>
+        <value>public/mobilenet-v2/FP32/mobilenet-v2.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/se-resnext-50/FP32/se-resnext-50.xml</value>
+        <value>public/caffenet/FP32/caffenet.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
+        <value>public/faster_rcnn_resnet50_coco/FP32/faster_rcnn_resnet50_coco.xml</value>
+        <value>public/se-resnet-50/FP32/se-resnet-50.xml</value>
+        <value>public/mask_rcnn_resnet50_atrous_coco/FP32/mask_rcnn_resnet50_atrous_coco.xml</value>
+        <value>public/octave-resnet-50-0.125/FP32/octave-resnet-50-0.125.xml</value>
+        <value>public/densenet-121-tf/FP32/densenet-121-tf.xml</value>
+        <value>public/mobilenet-v1-0.50-160/FP32/mobilenet-v1-0.50-160.xml</value>
+        <value>public/densenet-121/FP32/densenet-121.xml</value>
+        <value>public/faster_rcnn_resnet101_coco/FP32/faster_rcnn_resnet101_coco.xml</value>
+        <value>public/octave-densenet-121-0.125/FP32/octave-densenet-121-0.125.xml</value>
+        <value>public/colorization-v2/FP32/colorization-v2.xml</value>
+        <value>public/densenet-121-caffe2/FP32/densenet-121-caffe2.xml</value>
+        <value>public/efficientnet-b0-pytorch/FP32/efficientnet-b0-pytorch.xml</value>
+        <value>public/efficientnet-b5-pytorch/FP32/efficientnet-b5-pytorch.xml</value>
+        <value>public/efficientnet-b7-pytorch/FP32/efficientnet-b7-pytorch.xml</value>
+        <value>public/googlenet-v3-pytorch/FP32/googlenet-v3-pytorch.xml</value>
+        <value>public/human-pose-estimation-3d-0001/FP32/human-pose-estimation-3d-0001.xml</value>
+        <value>public/midasnet/FP32/midasnet.xml</value>
+        <value>public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml</value>
+        <value>public/resnet-18-pytorch/FP32/resnet-18-pytorch.xml</value>
+        <value>public/resnet-50-caffe2/FP32/resnet-50-caffe2.xml</value>
+        <value>public/resnet-50-pytorch/FP32/resnet-50-pytorch.xml</value>
+        <value>public/single-human-pose-estimation-0001/FP32/single-human-pose-estimation-0001.xml</value>
+        <value>public/squeezenet1.1-caffe2/FP32/squeezenet1.1-caffe2.xml</value>
+        <value>public/vgg19-caffe2/FP32/vgg19-caffe2.xml</value>
+        <value>intel/facial-landmarks-35-adas-0002/FP32/facial-landmarks-35-adas-0002.xml</value>
+        <value>intel/vehicle-attributes-recognition-barrier-0039/FP32/vehicle-attributes-recognition-barrier-0039.xml</value>
+        <value>intel/person-detection-action-recognition-0006/FP32/person-detection-action-recognition-0006.xml</value>
+        <value>intel/asl-recognition-0004/FP32/asl-recognition-0004.xml</value>
+        <value>intel/yolo-v2-tiny-ava-sparse-30-0001/FP32/yolo-v2-tiny-ava-sparse-30-0001.xml</value>
+        <value>intel/text-detection-0004/FP32/text-detection-0004.xml</value>
+        <value>intel/person-vehicle-bike-detection-crossroad-1016/FP32/person-vehicle-bike-detection-crossroad-1016.xml</value>
+        <value>intel/text-spotting-0002-detector/FP32/text-spotting-0002-detector.xml</value>
+        <value>intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013.xml</value>
+        <value>intel/vehicle-detection-adas-0002/FP32/vehicle-detection-adas-0002.xml</value>
+        <value>intel/image-retrieval-0001/FP32/image-retrieval-0001.xml</value>
+        <value>intel/person-detection-retail-0002/FP32/person-detection-retail-0002.xml</value>
+        <value>intel/person-attributes-recognition-crossroad-0230/FP32/person-attributes-recognition-crossroad-0230.xml</value>
+        <value>intel/face-detection-0100/FP32/face-detection-0100.xml</value>
+        <value>intel/face-detection-0102/FP32/face-detection-0102.xml</value>
+        <value>intel/person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml</value>
+        <value>intel/person-reidentification-retail-0300/FP32/person-reidentification-retail-0300.xml</value>
+        <value>intel/instance-segmentation-security-0010/FP32/instance-segmentation-security-0010.xml</value>
+        <value>intel/instance-segmentation-security-0083/FP32/instance-segmentation-security-0083.xml</value>
+        <value>intel/face-detection-0105/FP32/face-detection-0105.xml</value>
+        <value>intel/face-detection-0104/FP32/face-detection-0104.xml</value>
+        <value>intel/icnet-camvid-ava-sparse-30-0001/FP32/icnet-camvid-ava-sparse-30-0001.xml</value>
+        <value>intel/action-recognition-0001-decoder/FP32/action-recognition-0001-decoder.xml</value>
+        <value>intel/face-detection-0106/FP32/face-detection-0106.xml</value>
+        <value>intel/person-detection-action-recognition-teacher-0002/FP32/person-detection-action-recognition-teacher-0002.xml</value>
+        <value>intel/person-vehicle-bike-detection-crossroad-0078/FP32/person-vehicle-bike-detection-crossroad-0078.xml</value>
+        <value>intel/icnet-camvid-ava-sparse-60-0001/FP32/icnet-camvid-ava-sparse-60-0001.xml</value>
+        <value>intel/face-detection-adas-0001/FP32/face-detection-adas-0001.xml</value>
+        <value>intel/unet-camvid-onnx-0001/FP32/unet-camvid-onnx-0001.xml</value>
+        <value>intel/human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml</value>
+        <value>intel/faster-rcnn-resnet101-coco-sparse-60-0001/FP32/faster-rcnn-resnet101-coco-sparse-60-0001.xml</value>
+        <value>intel/action-recognition-0001-encoder/FP32/action-recognition-0001-encoder.xml</value>
+        <value>intel/yolo-v2-ava-sparse-35-0001/FP32/yolo-v2-ava-sparse-35-0001.xml</value>
+        <value>intel/yolo-v2-ava-sparse-70-0001/FP32/yolo-v2-ava-sparse-70-0001.xml</value>
+        <value>intel/person-reidentification-retail-0248/FP32/person-reidentification-retail-0248.xml</value>
+        <value>intel/person-detection-raisinghand-recognition-0001/FP32/person-detection-raisinghand-recognition-0001.xml</value>
+        <value>intel/person-detection-asl-0001/FP32/person-detection-asl-0001.xml</value>
+        <value>intel/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml</value>
+        <value>intel/yolo-v2-tiny-ava-0001/FP32/yolo-v2-tiny-ava-0001.xml</value>
+        <value>intel/license-plate-recognition-barrier-0001/FP32/license-plate-recognition-barrier-0001.xml</value>
+        <value>intel/person-detection-retail-0013/FP32/person-detection-retail-0013.xml</value>
+        <value>intel/instance-segmentation-security-0050/FP32/instance-segmentation-security-0050.xml</value>
+        <value>intel/single-image-super-resolution-1032/FP32/single-image-super-resolution-1032.xml</value>
+        <value>intel/landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml</value>
+        <value>intel/driver-action-recognition-adas-0002-decoder/FP32/driver-action-recognition-adas-0002-decoder.xml</value>
+        <value>intel/person-reidentification-retail-0249/FP32/person-reidentification-retail-0249.xml</value>
+        <value>intel/text-spotting-0002-recognizer-decoder/FP32/text-spotting-0002-recognizer-decoder.xml</value>
+        <value>intel/yolo-v2-ava-0001/FP32/yolo-v2-ava-0001.xml</value>
+        <value>intel/person-detection-action-recognition-0005/FP32/person-detection-action-recognition-0005.xml</value>
+        <value>intel/text-recognition-0012/FP32/text-recognition-0012.xml</value>
+        <value>intel/face-detection-retail-0004/FP32/face-detection-retail-0004.xml</value>
+        <value>intel/product-detection-0001/FP32/product-detection-0001.xml</value>
+        <value>intel/yolo-v2-tiny-ava-sparse-60-0001/FP32/yolo-v2-tiny-ava-sparse-60-0001.xml</value>
+        <value>intel/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml</value>
+        <value>intel/road-segmentation-adas-0001/FP32/road-segmentation-adas-0001.xml</value>
+        <value>intel/single-image-super-resolution-1033/FP32/single-image-super-resolution-1033.xml</value>
+        <value>intel/face-detection-retail-0005/FP32/face-detection-retail-0005.xml</value>
+        <value>intel/pedestrian-and-vehicle-detector-adas-0001/FP32/pedestrian-and-vehicle-detector-adas-0001.xml</value>
+        <value>intel/handwritten-japanese-recognition-0001/FP32/handwritten-japanese-recognition-0001.xml</value>
+        <value>intel/semantic-segmentation-adas-0001/FP32/semantic-segmentation-adas-0001.xml</value>
+        <value>intel/pedestrian-detection-adas-0002/FP32/pedestrian-detection-adas-0002.xml</value>
+        <value>intel/driver-action-recognition-adas-0002-encoder/FP32/driver-action-recognition-adas-0002-encoder.xml</value>
+        <value>intel/text-detection-0003/FP32/text-detection-0003.xml</value>
+        <value>intel/text-spotting-0002-recognizer-encoder/FP32/text-spotting-0002-recognizer-encoder.xml</value>
+        <value>intel/handwritten-score-recognition-0003/FP32/handwritten-score-recognition-0003.xml</value>
+        <value>intel/icnet-camvid-ava-0001/FP32/icnet-camvid-ava-0001.xml</value>
+        <value>intel/text-image-super-resolution-0001/FP32/text-image-super-resolution-0001.xml</value>
+        <value>intel/gaze-estimation-adas-0002/FP32/gaze-estimation-adas-0002.xml</value>
+        <value>intel/head-pose-estimation-adas-0001/FP32/head-pose-estimation-adas-0001.xml</value>
+        <value>intel/vehicle-license-plate-detection-barrier-0106/FP32/vehicle-license-plate-detection-barrier-0106.xml</value>
+        <value>intel/instance-segmentation-security-1025/FP32/instance-segmentation-security-1025.xml</value>
      </models>
  </attributes>
 \ No newline at end of file
diff --git a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml

index 12e95b4..a049173 100644 (file)
--- a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml
@@ -15,8 +15,8 @@
          <value>GPU</value>
      </devices>
      <models>
-        <value>caffe/FP32/alexnet/alexnet.xml</value>
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
      </models>
  </attributes>
diff --git a/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml

index a8f6ff5..67f57e7 100644 (file)
--- a/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml
@@ -15,8 +15,8 @@
          <value>GPU</value>
      </devices>
      <models>
-        <value>caffe/FP32/alexnet/alexnet.xml</value>
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
      </models>
  </attributes>
diff --git a/tests/stress_tests/.automation/unittests/nightly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml

index 0fb2428..f16e60f 100644 (file)
--- a/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml
@@ -15,8 +15,8 @@
          <value>GPU</value>
      </devices>
      <models>
-        <value>caffe/FP32/alexnet/alexnet.xml</value>
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
      </models>
  </attributes>
diff --git a/tests/stress_tests/.automation/unittests/weekly_configs/desktop_env_config.xml b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_env_config.xml

new file mode 100644 (file)

index 0000000..7e137c7
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>${STRESS_IRS_PATH}</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml

index cab0ca2..aaded21 100644 (file)
--- a/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml
+++ b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml
@@ -16,8 +16,8 @@
          <value>GPU</value>
      </devices>
      <models>
-        <!--<value>caffe/FP32/alexnet/alexnet.xml</value>-->
-        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
-        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>public/alexnet/FP32/alexnet.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <!--<value>public/mtcnn-r/FP32/mtcnn-r.xml</value>-->
      </models>
  </attributes>
diff --git a/tests/stress_tests/.gitignore b/tests/stress_tests/.gitignore

index cedcbdf..10379d1 100644 (file)
--- a/tests/stress_tests/.gitignore
+++ b/tests/stress_tests/.gitignore
@@ -1,2 +1,5 @@
  # Name of virtualenv created by stress_tests/scripts/get_testdata.py
-./.stress_venv
-\ No newline at end of file
+./.stress_venv
+
+# File with models names generated by stress_tests/scripts/get_testdata.py to use in OMZ downloader.py
+./scripts/models_list.txt
diff --git a/tests/stress_tests/README.md b/tests/stress_tests/README.md

index 509bd53..85c6a54 100644 (file)
--- a/tests/stress_tests/README.md
+++ b/tests/stress_tests/README.md
@@ -72,6 +72,16 @@ gtest-parallel ./MemCheckTests
  ``` bash
  gtest-parallel ./StressMemLeaksTests
  ```
+There are the next available command-line keys:
+1. --test_conf < path > - path to config with description of arguments 
+   used to parametrize tests
+2. --test_conf < path > - path to config with definition of environment values 
+   (path to models etc.) 
+3. --refs_conf < path > (available for MemCheckTests only) - path to config with references used to 
+   compare with results of a run
+4. --collect_results_only < bool > (available for MemCheckTests only) - boolean value that disable comparison and 
+   provide memory consumption results only 
+
  MemCheckTests logs can be used to gather reference values based on current
  memory consumption:
  
diff --git a/tests/stress_tests/common/tests_utils.cpp b/tests/stress_tests/common/tests_utils.cpp

index 0f5bfb6..e884c11 100644 (file)
--- a/tests/stress_tests/common/tests_utils.cpp
+++ b/tests/stress_tests/common/tests_utils.cpp
@@ -26,6 +26,14 @@ void Environment::setEnvConfig(const pugi::xml_document &env_config) {
      _env_config.reset(env_config);
  }
  
+const bool & Environment::getCollectResultsOnly() {
+    return _collect_results_only;
+}
+
+void Environment::setCollectResultsOnly(const bool &collect_results_only) {
+    _collect_results_only = collect_results_only;
+}
+
  std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> fields) {
      std::vector<TestCase> tests_cases;
      const pugi::xml_document & test_config = Environment::Instance().getTestConfig();
diff --git a/tests/stress_tests/common/tests_utils.h b/tests/stress_tests/common/tests_utils.h

index 65af35d..137fdb5 100644 (file)
--- a/tests/stress_tests/common/tests_utils.h
+++ b/tests/stress_tests/common/tests_utils.h
@@ -56,6 +56,8 @@ class Environment {
  private:
      pugi::xml_document _test_config;
      pugi::xml_document _env_config;
+    bool _collect_results_only = false;
+
      Environment() = default;
      Environment(const Environment&) = delete;
      Environment& operator=(const Environment&) = delete;
@@ -69,6 +71,8 @@ public:
      void setTestConfig(const pugi::xml_document &test_config);
      const pugi::xml_document & getEnvConfig();
      void setEnvConfig(const pugi::xml_document &env_config);
+    const bool & getCollectResultsOnly();
+    void setCollectResultsOnly(const bool &collect_results_only);
  };
  
  std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> items);
diff --git a/tests/stress_tests/memcheck_tests/flags.h b/tests/stress_tests/memcheck_tests/flags.h

index 27e8d7d..7974931 100644 (file)
--- a/tests/stress_tests/memcheck_tests/flags.h
+++ b/tests/stress_tests/memcheck_tests/flags.h
@@ -31,9 +31,16 @@ static const char env_conf_message[] = "Optional. Path to an env config with pat
  /// env_conf is an optional parameter
  DEFINE_string(env_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "env_config.xml"}), env_conf_message);
  
-/// @brief message for env_config argument
+/// @brief message for refs_config argument
  static const char refs_conf_message[] = "Optional. Path to a references config with values of memory consumption per test.";
  
  /// @brief Define parameter for set references' configuration <br>
  /// refs_conf is an optional parameter
-DEFINE_string(refs_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "references_config.xml"}), refs_conf_message);
-\ No newline at end of file
+DEFINE_string(refs_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "references_config.xml"}), refs_conf_message);
+
+/// @brief message for collect_results_only argument
+static const char collect_results_only_message[] = "Optional. Path to a references config with values of memory consumption per test.";
+
+/// @brief Define parameter for mode with collecting results only <br>
+/// collect_results_only is an optional parameter
+DEFINE_bool(collect_results_only, false, collect_results_only_message);
diff --git a/tests/stress_tests/memcheck_tests/local_configs/references_config.xml b/tests/stress_tests/memcheck_tests/local_configs/references_config.xml

index c9e6e79..fe07641 100644 (file)
--- a/tests/stress_tests/memcheck_tests/local_configs/references_config.xml
+++ b/tests/stress_tests/memcheck_tests/local_configs/references_config.xml
@@ -2,20 +2,20 @@
  <attributes>
      <models>
          <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="740214" vmpeak="805110" vmrss="129308" vmhwm="129308" />
-        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="739154" vmpeak="739154" vmrss="346522" vmhwm="346522" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="922147" vmpeak="922147" vmrss="587522" vmhwm="587522" />
          <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1007890" vmpeak="1007890" vmrss="138652" vmhwm="138652" />
-        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="824366" vmpeak="909563" vmrss="347167" vmhwm="347167" />
+        <model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="1006439" vmpeak="1091636" vmrss="587241" vmhwm="587241" />
          <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="691589" vmpeak="922864" vmrss="31054" vmhwm="31054" />
-        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="520530" vmpeak="522740" vmrss="127706" vmhwm="129630" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="626194" vmpeak="626194" vmrss="290695" vmhwm="290695" />
          <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="958240" vmpeak="1043437" vmrss="31366" vmhwm="31366" />
-        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="605727" vmpeak="690924" vmrss="127753" vmhwm="129537" />
+        <model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="708734" vmpeak="793930" vmrss="287877" vmhwm="287877" />
          <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="1046988" vmpeak="1179042" vmrss="307990" vmhwm="439457" />
-        <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1108775" vmpeak="1126985" vmrss="716341" vmhwm="734578" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="1267775" vmpeak="1279647" vmrss="932672" vmhwm="944626" />
          <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1321819" vmpeak="1321819" vmrss="374207" vmhwm="439748" />
-        <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1199957" vmpeak="1285154" vmrss="728046" vmhwm="734593" />
+        <model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1356565" vmpeak="1441762" vmrss="941418" vmhwm="947060" />
          <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="2133814" vmpeak="2836412" vmrss="1438049" vmhwm="2140533" />
-        <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2707988" vmpeak="3834209" vmrss="2313022" vmhwm="3439202" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="2801422" vmpeak="3915366" vmrss="2465065" vmhwm="3578811" />
          <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2401380" vmpeak="2836412" vmrss="1469832" vmhwm="2140377" />
-        <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2793211" vmpeak="3834235" vmrss="2314192" vmhwm="3439550" />
+        <model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2892432" vmpeak="3939166" vmrss="2472017" vmhwm="3602924" />
      </models>
  </attributes>
diff --git a/tests/stress_tests/memcheck_tests/main.cpp b/tests/stress_tests/memcheck_tests/main.cpp

index 6d6d6c9..593ecf3 100644 (file)
--- a/tests/stress_tests/memcheck_tests/main.cpp
+++ b/tests/stress_tests/memcheck_tests/main.cpp
@@ -57,6 +57,7 @@ int main(int argc, char **argv) {
          return 0;   // TODO return correct status
      }
  
+    Environment::Instance().setCollectResultsOnly(FLAGS_collect_results_only);
      pugi::xml_document config;
      config.load_file(FLAGS_test_conf.c_str());
      Environment::Instance().setTestConfig(config);
diff --git a/tests/stress_tests/memcheck_tests/tests.cpp b/tests/stress_tests/memcheck_tests/tests.cpp

index b0293ac..ce44db7 100644 (file)
--- a/tests/stress_tests/memcheck_tests/tests.cpp
+++ b/tests/stress_tests/memcheck_tests/tests.cpp
@@ -9,11 +9,17 @@
  
  #include <gtest/gtest.h>
  
-#define checkRefVmValues()                                                                                             \
-    ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmSize is less than 0. Value: " << test_refs.ref_vmsize;  \
-    ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmPeak is less than 0. Value: " << test_refs.ref_vmpeak;  \
-    ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmRSS is less than 0. Value: " << test_refs.ref_vmrss;     \
-    ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmHWM is less than 0. Value: " << test_refs.ref_vmhwm;
+#define checkRefVmValues()                                                                          \
+    if (!Environment::Instance().getCollectResultsOnly()) {                                         \
+        ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmSize is less than 0. Value: "   \
+                                           << test_refs.ref_vmsize;                                 \
+        ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmPeak is less than 0. Value: "   \
+                                           << test_refs.ref_vmpeak;                                 \
+        ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmRSS is less than 0. Value: "     \
+                                          << test_refs.ref_vmrss;                                   \
+        ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmHWM is less than 0. Value: "     \
+                                          << test_refs.ref_vmhwm;                                   \
+    }
  
  class MemCheckTestSuite : public ::testing::TestWithParam<TestCase> {
  };
diff --git a/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp

index 0b03380..95b7312 100644 (file)
--- a/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp
+++ b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp
@@ -63,7 +63,7 @@ test_create_exenetwork(const std::string &model_name, const std::string &model_p
      log_info_ref_mem_usage();
      log_info_cur_mem_usage();
  
-    if (test_cur_vmhwm > ref_vmhwm)
+    if ((!Environment::Instance().getCollectResultsOnly()) && (test_cur_vmhwm > ref_vmhwm))
          return TestResult(TestStatus::TEST_FAILED,
                            "Test failed: HWM (peak of RSS) virtual memory consumption is greater than reference.\n"
                            "Reference HWM of memory consumption: " + std::to_string(ref_vmhwm) + " KB.\n" +
@@ -108,7 +108,7 @@ test_infer_request_inference(const std::string &model_name, const std::string &m
          getAlignedVmValues(test_cur_vmsize, test_cur_vmpeak, test_cur_vmrss, test_cur_vmhwm,
                             vmsize_before_test, vmrss_before_test);
  
-        if (test_cur_vmrss > ref_vmrss) {
+        if ((!Environment::Instance().getCollectResultsOnly()) && (test_cur_vmrss > ref_vmrss)) {
              log_debug_ref_record_for_test("infer_request_inference");
              return TestResult(TestStatus::TEST_FAILED,
                                "Test failed: RSS virtual memory consumption became greater than reference "
diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp

index dfbd865..55671f9 100644 (file)
--- a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
+++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
@@ -6,9 +6,9 @@
  
  #include <math.h>
  
-#include <inference_engine.hpp>
  #include <algorithm>
  #include <array>
+#include <inference_engine.hpp>
  #include <string>
  
  using namespace InferenceEngine;
@@ -20,13 +20,17 @@ using namespace InferenceEngine;
  #define MAX_OUTLIERS 5
  // Maximum number of measuring pipeline restarts
  #define MAX_RETRY 3
+// Maximum values to compute an average for reference
+#define MAX_AVERAGE 100
+// Size of log line string to pre-allocate
+#define LOG_LINE_RESERVE 1024
  // A threshold for which memory growth will be considered an error
  #define THRESHOLD 0.1
  
  // Measure values
  enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax };
  
-namespace util {    
+namespace util {
  template <typename In, typename Out, typename Func>
  void transform(const In& in, Out& out, const Func& func) {
      std::transform(std::begin(in), std::end(in), std::begin(out), func);
@@ -47,17 +51,20 @@ TestResult common_test_pipeline(const std::function<void()>& test_pipeline, cons
      std::array<bool, MeasureValueMax> outlier = {0};       // flag if current does not fit threshold
      std::array<int, MeasureValueMax> outlier_count = {0};  // counter for how many times current does not fit threshold
      std::array<float, MeasureValueMax> threshold = {0};    // ref * THRESHOLD
+    std::vector<std::array<long, MeasureValueMax>> past;   // past measures
      std::string progress_str;
  
-    progress_str.reserve(1024);
+    progress_str.reserve(LOG_LINE_RESERVE);
+    past.resize(std::min(n / 2, MAX_AVERAGE));
  
      log_info("Warming up for " << WARMUP_STEPS << " iterations");
      log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK");
      int measure_count = n;
-    for (int iteration = 0; measure_count > 0; iteration++) {
+    for (size_t iteration = 0; measure_count > 0; iteration++) {
          // Warm up to take reference values
          test_pipeline();
          getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]);
+        past[iteration % past.size()] = cur;
          progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" +
                         std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" +
                         std::to_string(cur[VMPEAK]);
@@ -71,21 +78,39 @@ TestResult common_test_pipeline(const std::function<void()>& test_pipeline, cons
                  retry_count++;
                  measure_count = n;
                  outlier_count = {0};
-                ref = cur;
+                // set reference as an average of `past` elements
+                ref = {0};
+                size_t past_size = std::min(iteration + 1, past.size());  // count number of past elements
+                for (size_t i = 0; i < past_size; i++) {
+                    // ref = ref + past
+                    util::transform(ref, past[i], ref, [](long ref_val, long past_val) -> long {
+                        return ref_val + past_val;
+                    });
+                }
+                // ref = ref / past_size
+                util::transform(ref, ref, [&past_size](long ref_val) -> float {
+                    return ref_val / past_size;
+                });
+                // threshold = THRESHOLD * ref
                  util::transform(ref, threshold, [](long ref_val) -> float {
                      return THRESHOLD * ref_val;
                  });
-                log_info("Setting thresholds VMRSS=" << ref[VMRSS] << "(+-" << static_cast<int>(threshold[VMRSS])
-                                                     << "), VMHWM=" << ref[VMHWM] << "(+-"
-                                                     << static_cast<int>(threshold[VMHWM]) << ")");
+                log_info("Setting thresholds to average of "
+                         << past_size << " past elements:"
+                         << " VMRSS=" << ref[VMRSS] << "(+-" << static_cast<int>(threshold[VMRSS]) << "),"
+                         << " VMHWM=" << ref[VMHWM] << "(+-" << static_cast<int>(threshold[VMHWM]) << ")");
              }
              measure_count--;
+            // diff = cur - ref
              util::transform(cur, ref, diff, [](long cur_val, long ref_val) -> long {
-                return labs(cur_val - ref_val);
+                // no labs() here - ignore cur smaller than ref
+                return cur_val - ref_val;
              });
+            // outlier = diff > threshold
              util::transform(diff, threshold, outlier, [](long diff_val, float threshold_val) -> bool {
                  return diff_val > threshold_val;
              });
+            // outlier_count = outlier_count + (outlier ? 1 : 0)
              util::transform(outlier, outlier_count, outlier_count,
                              [](bool outlier_val, long outlier_count_val) -> long {
                                  return outlier_count_val + (outlier_val ? 1 : 0);
@@ -111,79 +136,67 @@ TestResult common_test_pipeline(const std::function<void()>& test_pipeline, cons
      return TestResult(TestStatus::TEST_OK, "");
  }
  
-
-TestResult test_load_unload_plugin(const std::string &target_device, const int &n) {
+TestResult test_load_unload_plugin(const std::string& target_device, const int& n) {
      log_info("Load/unload plugin for device: " << target_device << " for " << n << " times");
      return common_test_pipeline(load_unload_plugin(target_device), n);
  }
  
-TestResult test_read_network(const std::string &model, const int &n) {
+TestResult test_read_network(const std::string& model, const int& n) {
      log_info("Read network: \"" << model << "\" for " << n << " times");
      return common_test_pipeline(read_network(model), n);
  }
  
-TestResult test_create_cnnnetwork(const std::string &model, const int &n) {
+TestResult test_create_cnnnetwork(const std::string& model, const int& n) {
      log_info("Create CNNNetwork from network: \"" << model << "\" for " << n << " times");
      return common_test_pipeline(create_cnnnetwork(model), n);
  }
  
-TestResult test_cnnnetwork_reshape_batch_x2(const std::string &model, const int &n) {
+TestResult test_cnnnetwork_reshape_batch_x2(const std::string& model, const int& n) {
      log_info("Reshape to batch*=2 of CNNNetwork created from network: \"" << model << "\" for " << n << " times");
      return common_test_pipeline(cnnnetwork_reshape_batch_x2(model), n);
  }
  
-TestResult test_set_input_params(const std::string &model, const int &n) {
+TestResult test_set_input_params(const std::string& model, const int& n) {
      log_info("Apply preprocessing for CNNNetwork from network: \"" << model << "\" for " << n << " times");
      return common_test_pipeline(set_input_params(model), n);
  }
  
-TestResult test_create_exenetwork(const std::string &model, const std::string &target_device, const int &n) {
-    log_info("Create ExecutableNetwork from network: \"" << model
-                                                         << "\" for device: \"" << target_device << "\" for " << n
-                                                         << " times");
+TestResult test_create_exenetwork(const std::string& model, const std::string& target_device, const int& n) {
+    log_info("Create ExecutableNetwork from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+                                                         << n << " times");
      return common_test_pipeline(create_exenetwork(model, target_device), n);
  }
  
-TestResult
-test_recreate_exenetwork(InferenceEngine::Core &ie, const std::string &model, const std::string &target_device,
-                         const int &n) {
-    log_info("Recreate ExecutableNetwork from network within existing InferenceEngine::Core: \"" << model
-                                                                                                 << "\" for device: \""
-                                                                                                 << target_device
-                                                                                                 << "\" for " << n
-                                                                                                 << " times");
+TestResult test_recreate_exenetwork(InferenceEngine::Core& ie, const std::string& model,
+                                    const std::string& target_device, const int& n) {
+    log_info("Recreate ExecutableNetwork from network within existing InferenceEngine::Core: \""
+             << model << "\" for device: \"" << target_device << "\" for " << n << " times");
      return common_test_pipeline(recreate_exenetwork(ie, model, target_device), n);
  }
  
-TestResult test_create_infer_request(const std::string &model, const std::string &target_device, const int &n) {
-    log_info("Create InferRequest from network: \"" << model
-                                                    << "\" for device: \"" << target_device << "\" for " << n
+TestResult test_create_infer_request(const std::string& model, const std::string& target_device, const int& n) {
+    log_info("Create InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for " << n
                                                      << " times");
      return common_test_pipeline(create_infer_request(model, target_device), n);
  }
  
-TestResult
-test_recreate_infer_request(ExecutableNetwork &network, const std::string &model, const std::string &target_device,
-                            const int &n) {
-    log_info("Create InferRequest from network: \"" << model
-                                                    << "\" for device: \"" << target_device << "\" for " << n
+TestResult test_recreate_infer_request(ExecutableNetwork& network, const std::string& model,
+                                       const std::string& target_device, const int& n) {
+    log_info("Create InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for " << n
                                                      << " times");
      return common_test_pipeline(recreate_infer_request(network), n);
  }
  
-TestResult
-test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n) {
-    log_info("Inference of InferRequest from network: \"" << model
-                                                          << "\" for device: \"" << target_device << "\" for " << n
-                                                          << " times");
+TestResult test_infer_request_inference(const std::string& model, const std::string& target_device, const int& n) {
+    log_info("Inference of InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+                                                          << n << " times");
      return common_test_pipeline(infer_request_inference(model, target_device), n);
  }
  
-TestResult
-test_reinfer_request_inference(InferenceEngine::InferRequest &infer_request, InferenceEngine::CNNNetwork &cnnNetwork,
-                               const std::string &model, const std::string &target_device, const int &n) {
-    log_info("Inference of InferRequest from network: \"" << model
-                                                          << "\" for device: \"" << target_device << "\" for " << n
-                                                          << " times");
+TestResult test_reinfer_request_inference(InferenceEngine::InferRequest& infer_request,
+                                          InferenceEngine::CNNNetwork& cnnNetwork, const std::string& model,
+                                          const std::string& target_device, const int& n) {
+    log_info("Inference of InferRequest from network: \"" << model << "\" for device: \"" << target_device << "\" for "
+                                                          << n << " times");
      return common_test_pipeline(reinfer_request_inference(infer_request, cnnNetwork), n);
  }
diff --git a/tests/stress_tests/scripts/__init__.py b/tests/stress_tests/scripts/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tests/stress_tests/scripts/get_testdata.py b/tests/stress_tests/scripts/get_testdata.py

index b3c058f..0119aa8 100644 (file)
--- a/tests/stress_tests/scripts/get_testdata.py
+++ b/tests/stress_tests/scripts/get_testdata.py
@@ -18,13 +18,14 @@ import subprocess
  import sys
  from inspect import getsourcefile
  from pathlib import Path
+from xml.etree import ElementTree as ET
  
  log.basicConfig(format="{file}: [ %(levelname)s ] %(message)s".format(file=os.path.basename(__file__)),
                  level=log.INFO, stream=sys.stdout)
  
  # Parameters
  OMZ_NUM_ATTEMPTS = 6
-MODEL_NAMES = 'vgg16,mtcnn-r,mobilenet-ssd,ssd300'
+MODEL_NAMES = ['vgg16', 'mtcnn-r', 'mobilenet-ssd', 'ssd300'] # TODO (vurusovs): remove after merge changes in product-configs
  
  
  def abs_path(relative_path):
@@ -89,23 +90,47 @@ def main():
          description='Acquire test data',
          formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  
+    parser.add_argument('--test_conf', required=False,
+                        # TODO (vurusovs): make it required after merge changes in product-configs
+                        type=Path,
+                        help='Path to a test config .xml file containing models '
+                             'which will be downloaded and converted to IRs via OMZ.')
      parser.add_argument('--omz_repo', required=False,
                          help='Path to Open Model Zoo (OMZ) repository. It will be used to skip cloning step.')
-    parser.add_argument('--mo_tool', default='../../model-optimizer/mo.py',
+    parser.add_argument('--mo_tool', type=Path,
+                        default=Path('../../../model-optimizer/mo.py').resolve(),
                          help='Path to Model Optimizer (MO) runner. Required for OMZ converter.py only.')
-    parser.add_argument('--omz_models_out_dir', default='../_omz_out/models',
+    parser.add_argument('--omz_models_out_dir', type=Path,
+                        default=abs_path('../_omz_out/models'),
                          help='Directory to put test data into. Required for OMZ downloader.py and converter.py')
-    parser.add_argument('--omz_irs_out_dir', default='../_omz_out/irs',
+    parser.add_argument('--omz_irs_out_dir', type=Path,
+                        default=abs_path('../_omz_out/irs'),
                          help='Directory to put test data into. Required for OMZ converter.py only.')
-    parser.add_argument('--omz_cache_dir', default='../_omz_out/cache',
+    parser.add_argument('--omz_cache_dir', type=Path,
+                        default=abs_path('../_omz_out/cache'),
                          help='Directory with test data cache. Required for OMZ downloader.py only.')
      parser.add_argument('--no_venv', action="store_true",
                          help='Skip preparation and use of virtual environment to convert models via OMZ converter.py.')
      args = parser.parse_args()
-    models_out_dir = Path(abs_path(args.omz_models_out_dir))
-    irs_out_dir = Path(abs_path(args.omz_irs_out_dir))
-    cache_dir = Path(abs_path(args.omz_cache_dir))
-    mo_tool = Path(args.mo_tool).resolve()
+
+    # Step 0: prepare models list
+    if not args.test_conf:  # TODO (vurusovs): remove after merge changes in product-configs
+        models_names = MODEL_NAMES
+    else:
+        tree = ET.parse(str(args.test_conf))
+        root = tree.getroot()
+        models_names = []
+        for attributes in root:
+            if attributes.tag == "models":
+                models = [child.text for child in attributes]
+                models_names = [Path(model).stem for model in models]
+                break
+
+    models_list_path = Path().resolve() / "models_list.txt"
+    log.info("List of models from {models_list_path} used for downloader.py and converter.py: "
+             "{models_names}".format(models_list_path=models_list_path, models_names=",".join(models_names)))
+    with open(str(models_list_path), "w") as file:
+        file.writelines([name + "\n" for name in models_names])
  
      # Step 1: prepare Open Model Zoo
      if args.omz_repo:
@@ -120,13 +145,13 @@ def main():
  
      # Step 3: prepare models
      downloader_path = omz_path / "tools" / "downloader" / "downloader.py"
-    cmd = '{downloader_path} --name "{MODEL_NAMES}"' \
+    cmd = '{downloader_path} --list {models_list_path}' \
            ' --num_attempts {num_attempts}' \
            ' --output_dir {models_dir}' \
-          ' --cache_dir {cache_dir}'.format(downloader_path=downloader_path, MODEL_NAMES=MODEL_NAMES,
+          ' --cache_dir {cache_dir}'.format(downloader_path=downloader_path, models_list_path=models_list_path,
                                              num_attempts=OMZ_NUM_ATTEMPTS,
-                                            models_dir=models_out_dir,
-                                            cache_dir=cache_dir)
+                                            models_dir=args.omz_models_out_dir,
+                                            cache_dir=args.omz_cache_dir)
      run_in_subprocess(cmd)
  
      # Step 4: prepare virtual environment and install requirements
@@ -135,8 +160,8 @@ def main():
          Venv = VirtualEnv("./.stress_venv")
          requirements = [
              omz_path / "tools" / "downloader" / "requirements.in",
-            mo_tool.parent / "requirements.txt",
-            mo_tool.parent / "requirements_dev.txt",
+            args.mo_tool.parent / "requirements.txt",
+            args.mo_tool.parent / "requirements_dev.txt",
              # omz_path / "tools" / "downloader" / "requirements-caffe2.in",
              # omz_path / "tools" / "downloader" / "requirements-pytorch.in"
          ]
@@ -146,14 +171,14 @@ def main():
      # Step 5: convert models to IRs
      converter_path = omz_path / "tools" / "downloader" / "converter.py"
      # NOTE: remove --precision if both precisions (FP32 & FP16) required
-    cmd = '{executable} {converter_path} --name "{MODEL_NAMES}"' \
+    cmd = '{executable} {converter_path} --list "{models_list_path}"' \
            ' -p {executable}' \
            ' --precision=FP32' \
            ' --output_dir {irs_dir}' \
            ' --download_dir {models_dir}' \
            ' --mo {mo_tool} --jobs {workers_num}'.format(executable=python_executable, converter_path=converter_path,
-                                                        MODEL_NAMES=MODEL_NAMES, irs_dir=irs_out_dir,
-                                                        models_dir=models_out_dir, mo_tool=mo_tool,
+                                                        models_list_path=models_list_path, irs_dir=args.omz_irs_out_dir,
+                                                        models_dir=args.omz_models_out_dir, mo_tool=args.mo_tool,
                                                          workers_num=multiprocessing.cpu_count())
      run_in_subprocess(cmd)
  
diff --git a/tests/stress_tests/scripts/memcheck_upload.py b/tests/stress_tests/scripts/memcheck_upload.py

new file mode 100644 (file)

index 0000000..891ded1
--- /dev/null
+++ b/tests/stress_tests/scripts/memcheck_upload.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+# Copyright (C) 2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+"""
+Upload metrics gathered by MemCheckTests into Mongo DB
+Usage: ./scrips/memcheck_upload.py https://ci.intel.com/job/memchek/1234/ \
+    ./gtest-parallel-logs/**/*.log \
+    --artifact_root ./gtest-parallel-logs --dryrun
+"""
+
+import json
+import logging
+from types import SimpleNamespace
+import os
+import re
+import sys
+import argparse
+from glob import glob
+import xml.etree.ElementTree as ET
+import hashlib
+from pymongo import MongoClient
+
+
+DATABASE = 'memcheck'
+RE_GTEST_MODEL_XML = re.compile(r'<model[^>]*>')
+RE_GTEST_CUR_MEASURE = re.compile(
+    r'Current values of virtual memory consumption')
+RE_GTEST_REF_MEASURE = re.compile(
+    r'Reference values of virtual memory consumption')
+RE_GTEST_PASSED = re.compile(r'\[\s*PASSED\s*\]')
+RE_GTEST_FAILED = re.compile(r'\[\s*FAILED\s*\]')
+GTEST_INFO = '[ INFO ]'
+PRECISSIONS = ('FP32', 'FP16', 'INT8')
+KEY_FIELDS = ('test_name', 'model', 'device', 'build_url')
+
+
+def globber(paths):
+    """Generator extending paths with wildcards"""
+    for path in paths:
+        if any(magic in path for magic in ['*', '?', '!', '[', ']']):
+            for resolved in glob(path, recursive=True):
+                yield resolved
+        else:
+            yield path
+
+
+def parse_memcheck_log(log_path):
+    """ Parse memcheck log
+    """
+    with open(log_path, 'r') as log_file:
+        log = log_file.read()
+
+    passed_match = RE_GTEST_PASSED.search(log)
+    failed_match = RE_GTEST_FAILED.search(log)
+    model_match = RE_GTEST_MODEL_XML.search(log)
+    if not model_match:
+        return None
+    model = ET.fromstring(model_match.group(0)).attrib
+
+    log_lines = log.splitlines()
+    for index, line in enumerate(log_lines):
+        if RE_GTEST_REF_MEASURE.search(line):
+            heading = [name.lower() for name in log_lines[index+1]
+                       [len(GTEST_INFO):].split()]
+            values = [int(val) for val in log_lines[index+2]
+                      [len(GTEST_INFO):].split()]
+            ref_metrics = dict(zip(heading, values))
+    for index in reversed(range(len(log_lines))):
+        if RE_GTEST_CUR_MEASURE.search(log_lines[index]):
+            heading = [name.lower() for name in log_lines[index+1]
+                       [len(GTEST_INFO):].split()]
+            values = [int(val) for val in log_lines[index+2]
+                      [len(GTEST_INFO):].split()]
+            entry = SimpleNamespace(
+                metrics=dict(zip(heading, values)),
+                test_name=model['test'],
+                model_name=os.path.splitext(
+                    os.path.basename(model['path']))[0],
+                precision=next(pr for pr in PRECISSIONS if pr.upper()
+                               in model['path'].upper()),
+                model=model['path'],
+                device=model['device'].upper(),
+                status='passed' if passed_match else 'failed' if failed_match else 'started'
+            )
+            if ref_metrics:
+                entry.ref_metrics = ref_metrics
+            return vars(entry)
+    return None
+
+
+def create_memcheck_records(logs, build_url, artifact_root, append=None):
+    """ Parse memcheck logs and create records for MongoDB
+    """
+    records = []
+    for log in logs:
+        data = parse_memcheck_log(log)
+        if not data:
+            continue
+        data['build_url'] = build_url
+        data['log_path'] = os.path.relpath(log, artifact_root)
+        if append:
+            data.update(append)
+
+        data['_id'] = hashlib.sha256(
+            ''.join([str(data[key]) for key in KEY_FIELDS]).encode()).hexdigest()
+        records += [data]
+    return records
+
+
+def upload_memcheck_records(records, db_url, db_collection):
+    """ Upload records created by create_memcheck_records
+    """
+    client = MongoClient(db_url)
+    collection = client[DATABASE][db_collection]
+    for record in records:
+        collection.replace_one({'_id': record['_id']}, record, upsert=True)
+
+
+def main():
+    """Main entry point.
+    """
+    parser = argparse.ArgumentParser(
+        description='Upload metrics gathered by memcheck into Mongo DB',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--dryrun', action="store_true",
+                        help='Parse logs, not modify database.')
+    is_dryrun = parser.parse_known_args()[0].dryrun
+    parser.add_argument('build_url', help='A place where memcheck execution logs can be found.')
+    parser.add_argument('log', nargs='+', help='Local path to log. Extended wildcards supported.')
+    parser.add_argument('--db_url', required=not is_dryrun,
+                        help='MongoDB URL in a for "mongodb://server:port".')
+    parser.add_argument('--db_collection', required=not is_dryrun,
+                        help=f'Collection name in {DATABASE} database to upload')
+    parser.add_argument('--artifact_root', required=True,
+                        help=f'A root directory to strip from log path before upload.')
+    parser.add_argument('--append', help='JSON to append to each item.')
+    args = parser.parse_args()
+
+    logging.basicConfig(format="{file}: [ %(levelname)s ] %(message)s".format(
+        file=os.path.basename(__file__)), level=logging.INFO, stream=sys.stdout)
+
+    if args.append:
+        with open(args.append, 'r') as append_file:
+            append = json.load(append_file)
+    else:
+        append = None
+
+    logs = list(globber(args.log))
+    records = create_memcheck_records(
+        logs, args.build_url, args.artifact_root, append=append)
+    logging.info('Prepared %d records', len(records))
+    if len(records) != len(logs):
+        logging.warning(
+            'Skipped %d logs of %d', len(logs) - len(records), len(logs))
+    if not args.dryrun:
+        upload_memcheck_records(records, args.db_url, args.db_collection)
+        logging.info('Uploaded to %s', args.db_url)
+    else:
+        print(json.dumps(records, sort_keys=True, indent=4))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/stress_tests/scripts/requirements.txt b/tests/stress_tests/scripts/requirements.txt

new file mode 100644 (file)

index 0000000..8c7d698
--- /dev/null
+++ b/tests/stress_tests/scripts/requirements.txt
@@ -0,0 +1 @@
+pymongo
+\ No newline at end of file
diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md

index b3fc7b3..a83467f 100644 (file)
--- a/tools/benchmark/README.md
+++ b/tools/benchmark/README.md
@@ -102,6 +102,9 @@ Options:
    -t TIME, --time TIME  Optional. Time in seconds to execute topology.
    -progress [PROGRESS]  Optional. Show progress bar (can affect performance
                          measurement). Default values is "False".
+  -shape SHAPE          Optional. Set shape for input. For example,
+                        "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in
+                        case of one input size.
    -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS
                         Optional. Number of streams to use for inference on the CPU/GPU in throughput mode
                         (for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>).
diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py

index 1682bee..95965c9 100644 (file)
--- a/tools/benchmark/benchmark.py
+++ b/tools/benchmark/benchmark.py
@@ -21,7 +21,6 @@ from openvino.inference_engine import IENetwork, IECore, get_version, StatusCode
  from .utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, BIN_EXTENSION
  from .utils.logging import logger
  from .utils.utils import get_duration_seconds
-from .utils.inputs_filling import get_blob_shape
  from .utils.statistics_report import StatisticsReport
  
  class Benchmark:
@@ -56,16 +55,6 @@ class Benchmark:
              version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
          return version_string
  
-    @staticmethod
-    def reshape(ie_network: IENetwork, batch_size: int):
-        new_shapes = {}
-        for input_layer_name, input_layer in ie_network.inputs.items():
-            new_shapes[input_layer_name] = get_blob_shape(input_layer, batch_size)
-
-        if new_shapes:
-            logger.info('Resizing network to batch = {}'.format(batch_size))
-            ie_network.reshape(new_shapes)
-
      def set_config(self, config = {}):
          for device in config.keys():
              self.ie.set_config(config[device], device)
@@ -94,6 +83,15 @@ class Benchmark:
  
          return exe_network
  
+    def import_network(self, path_to_file : str, config = {}):
+        exe_network = self.ie.import_network(model_file=path_to_file,
+                                             device_name=self.device,
+                                             config=config,
+                                             num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
+        # Number of requests
+        self.nireq = len(exe_network.requests)
+        return exe_network
+
      def infer(self, exe_network, batch_size, progress_bar=None):
          progress_count = 0
          infer_requests = exe_network.requests
diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py

index f2d4b01..8945cc3 100644 (file)
--- a/tools/benchmark/main.py
+++ b/tools/benchmark/main.py
@@ -4,20 +4,24 @@ from datetime import datetime
  
  from openvino.tools.benchmark.benchmark import Benchmark
  from openvino.tools.benchmark.parameters import parse_args
-from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION
+from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
+    GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION, BLOB_EXTENSION
  from openvino.tools.benchmark.utils.inputs_filling import set_inputs
  from openvino.tools.benchmark.utils.logging import logger
  from openvino.tools.benchmark.utils.progress_bar import ProgressBar
  from openvino.tools.benchmark.utils.utils import next_step, config_network_inputs, get_number_iterations, \
      process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
-    get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, load_config, dump_config
+    get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, update_shapes, \
+    adjust_shapes_batch, load_config, dump_config
  from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
  
+
  def main():
      # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
      next_step()
      run(parse_args())
  
+
  def run(args):
      statistics = None
      try:
@@ -43,6 +47,13 @@ def run(args):
          if args.load_config:
              load_config(args.load_config, config)
  
+        is_network_compiled = False
+        _, ext = os.path.splitext(args.path_to_model)
+
+        if ext == BLOB_EXTENSION:
+            is_network_compiled = True
+            print("Network is compiled")
+
          # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
          next_step(step_id=2)
  
@@ -67,33 +78,7 @@ def run(args):
  
          logger.info(version)
  
-        # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
-        next_step()
-
-        start_time = datetime.utcnow()
-        ie_network = benchmark.read_network(args.path_to_model)
-        duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
-        logger.info("Read network took {} ms".format(duration_ms))
-        if statistics:
-            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
-                                      [
-                                          ('read network time (ms)', duration_ms)
-                                      ])
-
-        # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
-
-        next_step()
-        if args.batch_size and args.batch_size != ie_network.batch_size:
-            benchmark.reshape(ie_network, args.batch_size)
-        batch_size = ie_network.batch_size
-        logger.info('Network batch size: {}'.format(ie_network.batch_size))
-
-        # --------------------- 5. Configuring input of the model ------------------------------------------------------
-        next_step()
-
-        config_network_inputs(ie_network)
-
-        # --------------------- 6. Setting device configuration --------------------------------------------------------
+        # --------------------- 3. Setting device configuration --------------------------------------------------------
          next_step()
  
          perf_counts = False
@@ -172,27 +157,95 @@ def run(args):
          perf_counts = perf_counts
  
          benchmark.set_config(config)
+        batch_size = args.batch_size
+        if not is_network_compiled:
+            # --------------------- 4. Read the Intermediate Representation of the network -----------------------------
+            next_step()
+
+            start_time = datetime.utcnow()
+            ie_network = benchmark.read_network(args.path_to_model)
+            duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+            logger.info("Read network took {} ms".format(duration_ms))
+            if statistics:
+                statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                          [
+                                              ('read network time (ms)', duration_ms)
+                                          ])
  
-        # --------------------- 7. Loading the model to the device -----------------------------------------------------
+            # --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
+            next_step()
+
+            shapes = {k: v.shape.copy() for k, v in ie_network.inputs.items()}
+            reshape = False
+            if args.shape:
+                reshape |= update_shapes(shapes, args.shape, ie_network.inputs)
+            if args.batch_size and args.batch_size != ie_network.batch_size:
+                reshape |= adjust_shapes_batch(shapes, args.batch_size, ie_network.inputs)
+
+            if reshape:
+                start_time = datetime.utcnow()
+                logger.info(
+                    'Reshaping network: {}'.format(', '.join("'{}': {}".format(k, v) for k, v in shapes.items())))
+                ie_network.reshape(shapes)
+                duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+                logger.info("Reshape network took {} ms".format(duration_ms))
+                if statistics:
+                    statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                              [
+                                                  ('reshape network time (ms)', duration_ms)
+                                              ])
+
+            batch_size = ie_network.batch_size
+            logger.info('Network batch size: {}'.format(ie_network.batch_size))
+
+            # --------------------- 6. Configuring input of the model --------------------------------------------------
+            next_step()
+
+            config_network_inputs(ie_network)
+
+            # --------------------- 7. Loading the model to the device -------------------------------------------------
+            next_step()
+
+            start_time = datetime.utcnow()
+            exe_network = benchmark.load_network(ie_network)
+            duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+            logger.info("Load network took {} ms".format(duration_ms))
+            if statistics:
+                statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                          [
+                                              ('load network time (ms)', duration_ms)
+                                          ])
+        else:
+            next_step()
+            print("Skipping the step for compiled network")
+            next_step()
+            print("Skipping the step for compiled network")
+            next_step()
+            print("Skipping the step for compiled network")
+
+            # --------------------- 7. Loading the model to the device -------------------------------------------------
+            next_step()
+
+            start_time = datetime.utcnow()
+            exe_network = benchmark.import_network(args.path_to_model)
+            duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
+            logger.info("Import network took {} ms".format(duration_ms))
+            if statistics:
+                statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
+                                          [
+                                              ('import network time (ms)', duration_ms)
+                                          ])
+            if batch_size == 0:
+                batch_size = 1
+
+        # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
          next_step()
  
-        start_time = datetime.utcnow()
-        exe_network = benchmark.load_network(ie_network)
-        duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
-        logger.info("Load network took {} ms".format(duration_ms))
-        if statistics:
-            statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS,
-                                      [
-                                          ('load network time (ms)', duration_ms)
-                                      ])
-        ## Update number of streams
+        # Update number of streams
          for device in device_number_streams.keys():
              key = device + '_THROUGHPUT_STREAMS'
              device_number_streams[device] = benchmark.ie.get_config(device, key)
  
-        # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
-        next_step()
-
          # Number of requests
          infer_requests = exe_network.requests
  
@@ -206,7 +259,7 @@ def run(args):
          if args.paths_to_input:
              for path in args.paths_to_input:
                  paths_to_input.append(os.path.abspath(*path) if args.paths_to_input else None)
-        set_inputs(paths_to_input, batch_size, ie_network.inputs, infer_requests)
+        set_inputs(paths_to_input, batch_size, exe_network.inputs, infer_requests)
  
          if statistics:
              statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
diff --git a/tools/benchmark/parameters.py b/tools/benchmark/parameters.py

index f7c474f..5d21470 100644 (file)
--- a/tools/benchmark/parameters.py
+++ b/tools/benchmark/parameters.py
@@ -1,7 +1,7 @@
  import sys,argparse
  from fnmatch import fnmatch
  
-from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN
+from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN, BLOB_EXTENSION_PATTERN
  from openvino.tools.benchmark.utils.utils import show_available_devices
  
  def str2bool(v):
@@ -18,8 +18,8 @@ def validate_args(args):
          raise Exception("Number of iterations should be positive (invalid -niter option value)")
      if args.number_infer_requests and args.number_infer_requests < 0:
          raise Exception("Number of inference requests should be positive (invalid -nireq option value)")
-    if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN):
-        raise Exception('Path {} is not xml file.')
+    if not (fnmatch(args.path_to_model, XML_EXTENSION_PATTERN) or fnmatch(args.path_to_model, BLOB_EXTENSION_PATTERN)):
+        raise Exception('Path {} is not xml or blob file.')
  
  
  class print_help(argparse.Action):
@@ -37,7 +37,8 @@ def parse_args():
                        help='Optional. '
                             'Path to a folder with images and/or binaries or to specific image or binary file.')
      args.add_argument('-m', '--path_to_model', type=str, required=True,
-                      help='Required. Path to an .xml file with a trained model.')
+                      help='Required. Path to an .xml file with a trained model or '
+                           'to a .blob file with a trained compiled model.')
      args.add_argument('-d', '--target_device', type=str, required=False, default='CPU',
                        help='Optional. Specify a target device to infer on (the list of available devices is shown below). '
                             'Default value is CPU. Use \'-d HETERO:<comma separated devices list>\' format to specify HETERO plugin. '
@@ -56,7 +57,7 @@ def parse_args():
                             'If not specified, the number of iterations is calculated depending on a device.')
      args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None,
                        help='Optional. Number of infer requests. Default value is determined automatically for device.')
-    args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
+    args.add_argument('-b', '--batch_size', type=int, required=False, default=0,
                        help='Optional. ' +
                             'Batch size value. ' +
                             'If not specified, the batch size value is determined from Intermediate Representation')
@@ -69,6 +70,9 @@ def parse_args():
      args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True,
                        help='Optional. '
                             'Show progress bar (can affect performance measurement). Default values is \'False\'.')
+    args.add_argument('-shape', type=str, required=False, default='',
+                      help='Optional. '
+                           'Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.')
      args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,
                        help='Optional. Number of streams to use for inference on the CPU/GPU in throughput mode '
                             '(for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> '
diff --git a/tools/benchmark/utils/constants.py b/tools/benchmark/utils/constants.py

index 0fcfa4a..ab74081 100644 (file)
--- a/tools/benchmark/utils/constants.py
+++ b/tools/benchmark/utils/constants.py
@@ -26,8 +26,10 @@ UNKNOWN_DEVICE_TYPE = 'UNKNOWN'
  
  XML_EXTENSION = '.xml'
  BIN_EXTENSION = '.bin'
+BLOB_EXTENSION = '.blob'
  
  XML_EXTENSION_PATTERN = '*' + XML_EXTENSION
+BLOB_EXTENSION_PATTERN = '*' + BLOB_EXTENSION
  
  IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP']
  BINARY_EXTENSIONS = ['BIN']
diff --git a/tools/benchmark/utils/inputs_filling.py b/tools/benchmark/utils/inputs_filling.py

index 5cd8f2b..b27f6eb 100644 (file)
--- a/tools/benchmark/utils/inputs_filling.py
+++ b/tools/benchmark/utils/inputs_filling.py
@@ -22,22 +22,6 @@ from glob import glob
  from .constants import IMAGE_EXTENSIONS, BINARY_EXTENSIONS
  from .logging import logger
  
-
-def get_blob_shape(layer, batch_size: int):
-    shape = layer.shape.copy()
-    layout = layer.layout
-
-    try:
-        batch_index = layout.index('N')
-    except ValueError:
-        batch_index = 1 if layout == 'C' else -1
-
-    if batch_index != -1 and shape[batch_index] != batch_size:
-        shape[batch_index] = batch_size
-
-    return shape
-
-
  def is_image(blob):
      if blob.layout != "NCHW":
          return False
diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py

index 32da5e4..2cef9f9 100644 (file)
--- a/tools/benchmark/utils/utils.py
+++ b/tools/benchmark/utils/utils.py
@@ -21,6 +21,7 @@ from .inputs_filling import is_image
  from .logging import logger
  
  import json
+import re
  
  def static_vars(**kwargs):
      def decorate(func):
@@ -36,10 +37,10 @@ def next_step(additional_info='', step_id=0):
      step_names = {
          1: "Parsing and validating input arguments",
          2: "Loading Inference Engine",
-        3: "Reading the Intermediate Representation network",
-        4: "Resizing network to match image sizes and given batch",
-        5: "Configuring input of the model",
-        6: "Setting device configuration",
+        3: "Setting device configuration",
+        4: "Reading the Intermediate Representation network",
+        5: "Resizing network to match image sizes and given batch",
+        6: "Configuring input of the model",
          7: "Loading the model to the device",
          8: "Setting optimal runtime parameters",
          9: "Creating infer requests and filling input blobs with images",
@@ -239,6 +240,34 @@ def get_command_line_arguments(argv):
          parameters.append((arg_name, arg_value))
      return parameters
  
+def update_shapes(shapes, shapes_string: str, inputs_info):
+    updated = False
+    matches = re.findall(r'(.*?)\[(.*?)\],?', shapes_string)
+    if matches:
+        for match in matches:
+            input_name = match[0]
+            parsed_shape = [int(dim) for dim in match[1].split(',')]
+            if input_name != '':
+                shapes[input_name] = parsed_shape
+                updated = True
+            else:
+                shapes.update({ k:parsed_shape for k in shapes.keys() })
+                updated = True
+                break
+    else:
+        raise Exception("Can't parse `shape` parameter: {}".format(shapes_string))
+    return updated
+
+def adjust_shapes_batch(shapes, batch_size: int, inputs_info):
+    updated = False
+    for name, data in inputs_info.items():
+        layout = data.layout
+        batch_index = layout.index('N') if 'N' in layout else -1
+        if batch_index != -1 and shapes[name][batch_index] != batch_size:
+            shapes[name][batch_index] = batch_size
+            updated = True
+    return updated
+
  def show_available_devices():
      ie = IECore()
      print("\nAvailable target devices:  ", ("  ".join(ie.available_devices)))
author	Alexey Suhov <alexey.suhov@intel.com>
	Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)
committer	Alexey Suhov <alexey.suhov@intel.com>
	Wed, 13 May 2020 18:12:22 +0000 (21:12 +0300)