publish master branch snapshot, revision cdcab9d7ab48ffb0ee5629fabbfa06cb45debd9b

author Alexey Suhov <alexey.suhov@intel.com>

Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)

committer Alexey Suhov <alexey.suhov@intel.com>

Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)
author Alexey Suhov <alexey.suhov@intel.com>
Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)
committer Alexey Suhov <alexey.suhov@intel.com>
Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)
diff --git a/.gitignore b/.gitignore

index 31184fc..547ae62 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,342 +1,24 @@
-## Ignore Visual Studio temporary files, build results, and
-## files generated by popular Visual Studio add-ons.
-
-# User-specific files
-*.suo
-*.user
-*.userosscache
-*.sln.docstates
-
-# User-specific files (MonoDevelop/Xamarin Studio)
-*.userprefs
-
-# Build results
-[Dd]ebug/
-[Dd]ebugPublic/
-[Rr]elease/
-[Rr]eleases/
-[Xx]64/
-[Xx]86/
-[Bb]uild/
-bld/
-[Bb]in/
-[Oo]bj/
-
-# PY.TEST
-*.pyc
-tests/integration/report.html
-tests/integration/report.xml
-tests/integration/assets/
-tests/integration/__pycache__/
-
-# Visual Studio 2015 cache/options directory
-.vs/
-# Uncomment if you have tasks that create the project's static files in wwwroot
-#wwwroot/
-
-# MSTest test Results
-[Tt]est[Rr]esult*/
-[Bb]uild[Ll]og.*
-
-# NUNIT
-*.VisualState.xml
-TestResult.xml
-
-# Build Results of an ATL Project
-[Dd]ebugPS/
-[Rr]eleasePS/
-dlldata.c
-
-# DNX
-project.lock.json
-artifacts/
-
-*_i.c
-*_p.c
-*_i.h
-*.ilk
-*.meta
-*.obj
-*.pch
-*.pdb
-*.pgc
-*.pgd
-*.rsp
-*.sbr
-*.tlb
-*.tli
-*.tlh
-*.tmp
-*.tmp_proj
-*.log
-*.vspscc
-*.vssscc
-.builds
-*.pidb
-*.svclog
-*.scc
-
-# Chutzpah Test files
-_Chutzpah*
-
-# Visual C++ cache files
-ipch/
-*.aps
-*.ncb
-*.opendb
-*.opensdf
-*.sdf
-*.cachefile
-*.VC.db
-
-# Visual Studio profiler
-*.psess
-*.vsp
-*.vspx
-*.sap
-
-# TFS 2012 Local Workspace
-$tf/
-
-# Guidance Automation Toolkit
-*.gpState
-
-# ReSharper is a .NET coding add-in
-_ReSharper*/
-*.[Rr]e[Ss]harper
-*.DotSettings.user
-
-# JustCode is a .NET coding add-in
-.JustCode
-
-# TeamCity is a build add-in
-_TeamCity*
-
-# DotCover is a Code Coverage Tool
-*.dotCover
-
-# NCrunch
-_NCrunch_*
-.*crunch*.local.xml
-nCrunchTemp_*
-
-# MightyMoose
-*.mm.*
-AutoTest.Net/
-
-# Web workbench (sass)
-.sass-cache/
-
-# Installshield output folder
-[Ee]xpress/
-
-# DocProject is a documentation generator add-in
-DocProject/buildhelp/
-DocProject/Help/*.HxT
-DocProject/Help/*.HxC
-DocProject/Help/*.hhc
-DocProject/Help/*.hhk
-DocProject/Help/*.hhp
-DocProject/Help/Html2
-DocProject/Help/html
-
-# Click-Once directory
-publish/
-
-# Publish Web Output
-*.[Pp]ublish.xml
-*.azurePubxml
-
-# TODO: Un-comment the next line if you do not want to checkin
-# your web deploy settings because they may include unencrypted
-# passwords
-#*.pubxml
-*.publishproj
-
-# NuGet Packages
-*.nupkg
-# The packages folder can be ignored because of Package Restore
-**/packages/*
-# except build/, which is used as an MSBuild target.
-!**/packages/build/
-# Uncomment if necessary however generally it will be regenerated when needed
-#!**/packages/repositories.config
-# NuGet v3's project.json files produces more ignoreable files
-*.nuget.props
-*.nuget.targets
-
-# Microsoft Azure Build Output
-csx/
-*.build.csdef
-
-# Microsoft Azure Emulator
-ecf/
-rcf/
-
-# Microsoft Azure ApplicationInsights config file
-ApplicationInsights.config
-
-# Windows Store app package directory
-AppPackages/
-BundleArtifacts/
-
-# Visual Studio cache files
-# files ending in .cache can be ignored
-*.[Cc]ache
-# but keep track of directories ending in .cache
-!*.[Cc]ache/
-
-# Others
-ClientBin/
-[Ss]tyle[Cc]op.*
-~$*
-*~
-*.dbmdl
-*.dbproj.schemaview
-*.pfx
-*.publishsettings
-node_modules/
-orleans.codegen.cs
-
-# RIA/Silverlight projects
-Generated_Code/
-
-# Backup & report files from converting an old project file
-# to a newer Visual Studio version. Backup files are not needed,
-# because we have git ;-)
-_UpgradeReport_Files/
-Backup*/
-UpgradeLog*.XML
-UpgradeLog*.htm
-
-# SQL Server files
-*.mdf
-*.ldf
-
-# Business Intelligence projects
-*.rdl.data
-*.bim.layout
-*.bim_*.settings
-
-# Microsoft Fakes
-FakesAssemblies/
-
-# GhostDoc plugin setting file
-*.GhostDoc.xml
-
-# Target VS files:
-vsx64
-
-# Node.js Tools for Visual Studio
-.ntvs_analysis.dat
-
-# Visual Studio 6 build log
-*.plg
-
-# Visual Studio 6 workspace options file
-*.opt
-
-# Visual Studio LightSwitch build output
-**/*.HTMLClient/GeneratedArtifacts
-**/*.DesktopClient/GeneratedArtifacts
-**/*.DesktopClient/ModelManifest.xml
-**/*.Server/GeneratedArtifacts
-**/*.Server/ModelManifest.xml
-_Pvt_Extensions
-
-# LightSwitch generated files
-GeneratedArtifacts/
-ModelManifest.xml
-
-# Paket dependency manager
-.paket/paket.exe
-
-# FAKE - F# Make
-.fake/
-*.filters
-/External
-/Output
-/InferenceEngineMain/models
-/Test
-/HTTPClient/*.a
-/InferenceEngineMain/newModels
+# build/artifact dirs
+_*
+# but ensure we don't skip __init__.py
+!__init__.py
+# developer tools
+.idea
+.vscode
+cmake-build-debug
+cmake-build-release
  .DS_Store
-
-# For IDEA
-.idea/
-VS/
-Xcode/
-temp/
-report/
-.kdev4/
-*.kdev4
-*.kate-swp
-
-/lin-build
-/win-build
-/CMakeFiles
-*.stamp
-*.depend
-*.vcxproj
-*.sln
-/CMakeCache.txt
-.vimprj/
-build_IA32/
-.dir-locals.el
-GTAGS
-GPATH
-GRTAGS
-GSYMS
+**/tags
  compile_commands.json
-service/dot-net-service/Output
-**/sublime_build
-/.project
-.vscode/
-/vsx32
-/service/dot-net-service/.klocwork/DotNetService
-cmake-build-*/
-/lin64
-
-.gdb_history
+bin/
+build/
  .local_vimrc
-.ycm_extra_conf.py
-tags
-
-
-# from Model Optimizer repo
-.idea
-.project
-.cproject
-.pydevproject
-.settings
-/bin/
-/gen/
-__pycache__
-*.swp
-/config.xml
-
-# Python-specific
-.env3
-*.pyc
-
-# Tests-specific
-.coverage
-htmlcov
-pylint_report.txt
-pylint_report_comments.txt
-
-# Documentation-generated
-docs/build
-docs/source/_static
-docs/source/_templates
-docs/source/generated/
-
-# Artifacts
-/*.bin
-/*.xml
-/*.json
-/*.so
-/*.txt
-/*.mapping
-/*.dat
-/*.svg
+.gdb_history
+.vimspector.json
+doc/
+docs/build_documentation/work_dir/
+inference-engine/plugins/
+.repo/
+docs/template_plugin/html/
+CMakeLists.txt.user
+docs/IE_PLUGIN_DG/html/
diff --git a/.gitmodules b/.gitmodules

index 1aaf7fa..aaedb15 100644 (file)
--- a/.gitmodules
+++ b/.gitmodules
@@ -5,4 +5,4 @@
  [submodule "ngraph"]
         path = ngraph
         url = https://github.com/NervanaSystems/ngraph.git
-       ignore = dirty
+       ignore = dirty
+\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt

index edf8233..5c54d32 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,13 +77,13 @@ function(build_ngraph)
  
      if (NOT ANDROID)
          ngraph_set(NGRAPH_UNIT_TEST_ENABLE TRUE)
-        ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE TRUE)
+        ngraph_set(NGRAPH_IE_ENABLE TRUE)
          # ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE TRUE)
          set(NGRAPH_ONNX_IMPORT_ENABLE TRUE CACHE BOOL "" FORCE)
      else()
          ngraph_set(NGRAPH_UNIT_TEST_ENABLE FALSE)
          ngraph_set(NGRAPH_TEST_UTIL_ENABLE FALSE)
-        ngraph_set(NGRAPH_UNIT_TEST_OPENVINO_ENABLE FALSE)
+        ngraph_set(NGRAPH_IE_ENABLE FALSE)
          ngraph_set(NGRAPH_ONNX_IMPORT_ENABLE FALSE)
      endif()
  
diff --git a/Jenkinsfile b/Jenkinsfile

new file mode 100644 (file)

index 0000000..c473d2b
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,3 @@
+#!groovy
+
+dldtPipelineEntrypoint(this)
diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake

index bed7350..a074ecd 100644 (file)
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@@ -37,8 +37,12 @@ function(ie_cpack_set_library_dir)
  
      if(WIN32)
          set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${CMAKE_BUILD_TYPE}/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/bin/${CMAKE_BUILD_TYPE}/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${CMAKE_BUILD_TYPE}/${ARCH} PARENT_SCOPE)
      else()
          set(IE_CPACK_LIBRARY_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_RUNTIME_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
+        set(IE_CPACK_ARCHIVE_PATH ${IE_CPACK_IE_DIR}/lib/${ARCH} PARENT_SCOPE)
      endif()
  endfunction()
  
@@ -59,8 +63,10 @@ macro(ie_cpack)
      set(CPACK_GENERATOR "TGZ")
      if(WIN32)
          set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
+        string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
      else()
          set(CPACK_PACKAGE_NAME inference-engine)
+        string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
      endif()
      set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
      set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
diff --git a/inference-engine/cmake/add_ie_target.cmake b/inference-engine/cmake/add_ie_target.cmake

index ae4161d..0dfc1ce 100644 (file)
--- a/inference-engine/cmake/add_ie_target.cmake
+++ b/inference-engine/cmake/add_ie_target.cmake
@@ -118,7 +118,6 @@ function(addIeTarget)
      if (ARG_ADD_CPPLINT)
          # code style
          add_cpplint_target(${ARG_NAME}_cpplint FOR_TARGETS ${ARG_NAME})
-        add_clang_format_target(${ARG_NAME}_clang_format FOR_TARGETS ${ARG_NAME})
      endif()
      if (ARG_DEVELOPER_PACKAGE)
          # developer package
diff --git a/inference-engine/cmake/clang_format.cmake b/inference-engine/cmake/clang_format.cmake

index 595d139..ded2740 100644 (file)
--- a/inference-engine/cmake/clang_format.cmake
+++ b/inference-engine/cmake/clang_format.cmake
@@ -35,10 +35,6 @@ function(add_clang_format_target TARGET_NAME)
      set(multiValueArgs "FOR_TARGETS" "FOR_SOURCES" "EXCLUDE_PATTERNS")
      cmake_parse_arguments(CLANG_FORMAT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
  
-    if(CLANG_FORMAT_ALL)
-        set(all ALL)
-    endif()
-
      foreach(target IN LISTS CLANG_FORMAT_FOR_TARGETS)
          get_target_property(target_sources "${target}" SOURCES)
          list(APPEND CLANG_FORMAT_FOR_SOURCES ${target_sources})
@@ -95,7 +91,6 @@ function(add_clang_format_target TARGET_NAME)
          "All clang-format output files")
  
      add_custom_target(${TARGET_NAME}
-        ${all}
          DEPENDS ${all_output_files}
          COMMENT "[clang-format] ${TARGET_NAME}")
  
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake

index 2740691..cd5a520 100644 (file)
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -4,6 +4,8 @@
  
  cmake_policy(SET CMP0054 NEW)
  
+include(models)
+
  #we have number of dependencies stored on ftp
  include(dependency_solver)
  
@@ -13,6 +15,23 @@ endif()
  
  include(ExternalProject)
  
+if (ENABLE_SAME_BRANCH_FOR_MODELS)
+    branchName(MODELS_BRANCH)
+else()
+    set(MODELS_BRANCH "master")
+endif()
+
+
+if (ENABLE_DATA)
+    add_models_repo(${ENABLE_DATA} "data:inference-engine/open-source-data.git")
+    set(MODELS_PATH "${TEMP}/data/src/data")
+    set(DATA_PATH "${MODELS_PATH}")
+endif()
+
+message(STATUS "MODELS_PATH=" ${MODELS_PATH})
+
+fetch_models_and_validation_set()
+
  include(linux_name)
  if(COMMAND get_linux_name)
      get_linux_name(LINUX_OS_NAME)
diff --git a/inference-engine/cmake/developer_package_config.cmake.in b/inference-engine/cmake/developer_package_config.cmake.in

index 966feef..26fe61c 100644 (file)
--- a/inference-engine/cmake/developer_package_config.cmake.in
+++ b/inference-engine/cmake/developer_package_config.cmake.in
@@ -11,7 +11,11 @@ file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path)
  
  set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE;CMAKE_SKIP_RPATH")
  
-load_cache("${cache_path}" READ_WITH_PREFIX "" ${ie_options})
+foreach(option IN LISTS ie_options)
+    if(NOT DEFINED "${option}")
+        load_cache("${cache_path}" READ_WITH_PREFIX "" ${option})
+     endif()
+endforeach()
  
  message(STATUS "The following CMake options are exported from Inference Engine Developer package")
  message("")
diff --git a/inference-engine/cmake/features_ie.cmake b/inference-engine/cmake/features_ie.cmake

index 112371c..9749c21 100644 (file)
--- a/inference-engine/cmake/features_ie.cmake
+++ b/inference-engine/cmake/features_ie.cmake
@@ -78,7 +78,9 @@ ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performan
  
  ie_dependent_option (ENABLE_MYRIAD_MVNC_TESTS "functional and behavior tests for mvnc api" OFF "ENABLE_TESTS;ENABLE_MYRIAD" OFF)
  
-ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF)
+ie_dependent_option (ENABLE_DATA "fetch models from open-source-data repo" ON "ENABLE_FUNCTIONAL_TESTS;NOT ANDROID" OFF)
+
+ie_dependent_option (ENABLE_SAME_BRANCH_FOR_MODELS "uses same branch for models and for inference engine, if not enabled models are taken from master" OFF "ENABLE_TESTS" OFF)
  
  ie_dependent_option (ENABLE_BEH_TESTS "tests oriented to check inference engine API corecteness" ON "ENABLE_TESTS" OFF)
  
diff --git a/inference-engine/cmake/models.cmake b/inference-engine/cmake/models.cmake

new file mode 100644 (file)

index 0000000..3203068
--- /dev/null
+++ b/inference-engine/cmake/models.cmake
@@ -0,0 +1,80 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(ENABLE_DOCKER)
+    cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
+else()
+    cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
+endif()
+
+cmake_policy(SET CMP0054 NEW)
+
+find_package(Git REQUIRED)
+
+set(MODELS_LST "")
+set(MODELS_LST_TO_FETCH "")
+
+function (add_models_repo add_to_fetcher model_name)
+    list(LENGTH ARGV add_models_args)
+    if (add_models_args EQUAL 3)
+        list(GET ARGV 2 branch_name)
+    else()
+        set(branch_name ${MODELS_BRANCH})
+    endif()
+    if (add_to_fetcher)
+        set(model_name "${model_name}:${branch_name}")
+        list(APPEND MODELS_LST_TO_FETCH ${model_name})
+    endif()
+
+    list(APPEND MODELS_LST ${model_name})
+
+    set(MODELS_LST_TO_FETCH ${MODELS_LST_TO_FETCH} PARENT_SCOPE)
+    set(MODELS_LST ${MODELS_LST} PARENT_SCOPE)
+endfunction()
+
+function(add_lfs_repo name prefix url tag)
+    ExternalProject_Add(${name}
+        PREFIX ${prefix}
+        GIT_REPOSITORY ${url}
+        GIT_TAG ${tag}
+        GIT_CONFIG "http.sslverify=false"
+        GIT_PROGRESS 1
+        CONFIGURE_COMMAND ""
+        BUILD_COMMAND ""
+        INSTALL_COMMAND ""
+        LOG_DOWNLOAD ON)
+
+    execute_process(
+        COMMAND ${GIT_EXECUTABLE} lfs install --local --force
+        WORKING_DIRECTORY ${prefix}/src/${name}
+        OUTPUT_VARIABLE lfs_output
+        RESULT_VARIABLE lfs_var)
+    if(lfs_var)
+        message(FATAL_ERROR [=[
+            Failed to setup Git LFS: ${lfs_output}
+            Git lfs must be installed in order to fetch models
+            Please install it from https://git-lfs.github.com/
+        ]=])
+    endif()
+endfunction()
+
+function (fetch_models_and_validation_set)
+    foreach(loop_var ${MODELS_LST_TO_FETCH})
+        string(REPLACE ":" ";" MODEL_CONFIG_LST ${loop_var})
+
+        list(GET MODEL_CONFIG_LST 0 folder_name)
+        list(GET MODEL_CONFIG_LST 1 repo_name)
+        list(GET MODEL_CONFIG_LST 2 branch_name)
+
+        string(FIND ${folder_name} "model" IS_MODEL)
+        if(${folder_name} MATCHES "model*")
+            set(FOLDER_NAME "/models/src")
+        endif()
+        add_lfs_repo(
+            "${folder_name}"
+            ${TEMP}${FOLDER_NAME}/${folder_name}
+            "git@gitlab-icv.inn.intel.com:${repo_name}"
+            "${branch_name}")
+    endforeach(loop_var)
+endfunction()
diff --git a/inference-engine/cmake/plugins/plugins.cmake b/inference-engine/cmake/plugins/plugins.cmake

index c4a0354..75ecd3c 100644 (file)
--- a/inference-engine/cmake/plugins/plugins.cmake
+++ b/inference-engine/cmake/plugins/plugins.cmake
@@ -90,8 +90,8 @@ function(ie_add_plugin)
          ie_cpack_add_component(${install_component} REQUIRED DEPENDS core)
  
          install(TARGETS ${IE_PLUGIN_NAME}
-            RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT ${install_component}
-            ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT ${install_component}
+            RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT ${install_component}
+            ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT ${install_component}
              LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT ${install_component})
      endif()
  endfunction()
diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake

index 79f7391..d0b5e20 100644 (file)
--- a/inference-engine/cmake/vpu_dependencies.cmake
+++ b/inference-engine/cmake/vpu_dependencies.cmake
@@ -104,4 +104,4 @@ if(ANDROID)
      set(LIBUSB_LIBRARY "${LIBUSB}/libs/${ANDROID_ABI}/libusb1.0.so")
  
      log_rpath_from_dir(LIBUSB "${LIBUSB}/libs/${ANDROID_ABI}")
-endif()
-\ No newline at end of file
+endif()
diff --git a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c

index b82618a..2684412 100644 (file)
--- a/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c
+++ b/inference-engine/ie_bridges/c/samples/hello_nv12_input_classification/main.c
@@ -1,5 +1,5 @@
  // Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+// SPDX-License-Identifier : Apache-2.0
  //
  
  #include <stdlib.h>
diff --git a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c

index 15a2b53..bf599e6 100644 (file)
--- a/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
+++ b/inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/main.c
@@ -1,5 +1,5 @@
  // Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+// SPDX-License-Identifier : Apache-2.0
  //
  
  #include <stdlib.h>
diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt

index ca7d92e..ef8527a 100644 (file)
--- a/inference-engine/ie_bridges/c/src/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt
@@ -28,8 +28,8 @@ export(TARGETS ${TARGET_NAME} NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/ta
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
  
  install(DIRECTORY ${InferenceEngine_C_API_SOURCE_DIR}/include/
diff --git a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py

index 676bfae..96af9de 100644 (file)
--- a/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py
+++ b/inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/object_detection_sample_ssd.py
@@ -29,15 +29,18 @@ def build_argparser():
      args = parser.add_argument_group("Options")
      args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
      args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.",
-        required=True, type=str)
+                      required=True, type=str)
      args.add_argument("-i", "--input", help="Required. Path to image file.",
-        required=True, type=str, nargs="+")
+                      required=True, type=str, nargs="+")
      args.add_argument("-l", "--cpu_extension",
-        help="Optional. Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.",
-        type=str, default=None)
+                      help="Optional. Required for CPU custom layers. "
+                           "Absolute path to a shared library with the kernels implementations.",
+                      type=str, default=None)
      args.add_argument("-d", "--device",
-        help="Optional. Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified (CPU by default)",
-        default="CPU", type=str)
+                      help="Optional. Specify the target device to infer on; "
+                           "CPU, GPU, FPGA or MYRIAD is acceptable. "
+                           "Sample will look for a suitable plugin for device specified (CPU by default)",
+                      default="CPU", type=str)
      args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str)
      args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)
  
@@ -59,9 +62,10 @@ def main():
      # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
      log.info("Device info:")
      versions = ie.get_versions(args.device)
-    print("{}{}".format(" "*8, args.device))
-    print("{}MKLDNNPlugin version ......... {}.{}".format(" "*8, versions[args.device].major, versions[args.device].minor))
-    print("{}Build ........... {}".format(" "*8, versions[args.device].build_number))
+    print("{}{}".format(" " * 8, args.device))
+    print("{}MKLDNNPlugin version ......... {}.{}".format(" " * 8, versions[args.device].major,
+                                                          versions[args.device].minor))
+    print("{}Build ........... {}".format(" " * 8, versions[args.device].build_number))
  
      if args.cpu_extension and "CPU" in args.device:
          ie.add_extension(args.cpu_extension, "CPU")
@@ -79,8 +83,15 @@ def main():
      # -----------------------------------------------------------------------------------------------------
  
      # --------------------------- 3. Read and preprocess input --------------------------------------------
-    input_blob = next(iter(net.inputs))
-    n, c, h, w = net.inputs[input_blob].shape
+
+    print("inputs number: " + str(len(net.inputs.keys())))
+
+    for input_key in net.inputs:
+        print("input shape: " + str(net.inputs[input_key].shape))
+        print("input key: " + input_key)
+        if len(net.inputs[input_key].layout) == 4:
+            n, c, h, w = net.inputs[input_key].shape
+
      images = np.ndarray(shape=(n, c, h, w))
      images_hw = []
      for i in range(n):
@@ -94,13 +105,14 @@ def main():
              log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w)))
          image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
          images[i] = image
+
      # -----------------------------------------------------------------------------------------------------
  
      # --------------------------- 4. Configure input & output ---------------------------------------------
      # --------------------------- Prepare input blobs -----------------------------------------------------
      log.info("Preparing input blobs")
-    assert (len(net.inputs.keys()) == 1 or len(net.inputs.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs"
-    input_blob = next(iter(net.inputs))
+    assert (len(net.inputs.keys()) == 1 or len(
+        net.inputs.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs"
      out_blob = next(iter(net.outputs))
      input_name, input_info_name = "", ""
  
@@ -112,9 +124,21 @@ def main():
          elif len(net.inputs[input_key].layout) == 2:
              input_info_name = input_key
              net.inputs[input_key].precision = 'FP32'
-            if net.inputs[input_key].shape[1] != 3 and net.inputs[input_key].shape[1] != 6 or net.inputs[input_key].shape[0] != 1:
+            if net.inputs[input_key].shape[1] != 3 and net.inputs[input_key].shape[1] != 6 or \
+                net.inputs[input_key].shape[0] != 1:
                  log.error('Invalid input info. Should be 3 or 6 values length.')
  
+    data = {}
+    data[input_name] = images
+
+    if input_info_name != "":
+        infos = np.ndarray(shape=(n, c), dtype=float)
+        for i in range(n):
+            infos[i, 0] = h
+            infos[i, 1] = w
+            infos[i, 2] = 1.0
+        data[input_info_name] = infos
+
      # --------------------------- Prepare output blobs ----------------------------------------------------
      log.info('Preparing output blobs')
  
@@ -141,7 +165,7 @@ def main():
      log.info("Loading model to the device")
      exec_net = ie.load_network(network=net, device_name=args.device)
      log.info("Creating infer request and starting inference")
-    res = exec_net.infer(inputs={input_blob: images})
+    res = exec_net.infer(inputs=data)
      # -----------------------------------------------------------------------------------------------------
  
      # --------------------------- Read and postprocess output ---------------------------------------------
@@ -159,8 +183,8 @@ def main():
              ymin = np.int(ih * proposal[4])
              xmax = np.int(iw * proposal[5])
              ymax = np.int(ih * proposal[6])
-            print("[{},{}] element, prob = {:.6}    ({},{})-({},{}) batch id : {}"\
-                .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
+            print("[{},{}] element, prob = {:.6}    ({},{})-({},{}) batch id : {}" \
+                  .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
              if proposal[2] > 0.5:
                  print(" WILL BE PRINTED!")
                  if not imid in boxes.keys():
@@ -181,7 +205,8 @@ def main():
      # -----------------------------------------------------------------------------------------------------
  
      log.info("Execution successful\n")
-    log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool")
+    log.info(
+        "This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool")
  
  
  if __name__ == '__main__':
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt

index 50b2756..042091d 100644 (file)
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -39,6 +39,16 @@ add_custom_command(TARGET ${TARGET_NAME}
      COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../__init__.py
  )
  
+# creates a folder in openvino directory and a symlink to benchmark
+# inside bin directory for developers for running python benchmark_app
+if(UNIX)
+    add_custom_command(TARGET ${TARGET_NAME}
+        POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../tools
+)
+    file(COPY ${OpenVINO_MAIN_SOURCE_DIR}/tools/benchmark DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../tools/)
+endif()
+
  # install
  
  install(TARGETS ${TARGET_NAME}
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx

index d9c7c56..648fa6a 100644 (file)
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@@ -171,9 +171,9 @@ cdef class IECore:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie = IECore()
-    #  exec_net = ie.load_network(network=net, device_name="CPU", num_requsts=2)
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
+    #  exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2)
      #  ```
      cpdef ExecutableNetwork load_network(self, IENetwork network, str device_name, config=None, int num_requests=1):
          cdef ExecutableNetwork exec_net = ExecutableNetwork()
@@ -197,8 +197,8 @@ cdef class IECore:
      #  @return An `ExecutableNetwork` object
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie.load_network(network=net, device_name="MYRIAD", num_requsts=2)
      #  # export executable network
      #  exec_net.export(path_to_file_to_save)
@@ -226,8 +226,8 @@ cdef class IECore:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  layers_map = ie.query_network(network=net, device_name="HETERO:GPU,CPU")
      #  ```
      def query_network(self, IENetwork network, str device_name, config=None):
@@ -238,12 +238,19 @@ cdef class IECore:
          return c_map_to_dict(res)
  
      ## Sets a configuration for a plugin
-    #  NOTE: When specifying a key value of a config, the "KEY_" prefix is omitted.
+    #
+    #  \note When specifying a key value of a config, the "KEY_" prefix is omitted.
+    #
      #  @param config: a dictionary of configuration parameters as keys and their values
      #  @param device_name: a device name of a target plugin
      #  @return None
      #
-    #  Usage examples: See the `set_affinity` method of the `IENetwork` class
+    #  Usage examples:\n
+    #  ```python
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
+    #  ie.set_config({"DYN_BATCH_ENABLED": "YES"})
+    #  ```
      def set_config(self, config: dict, device_name: str):
          cdef map[string, string] c_config = dict_to_c_map(config)
          self.impl.setConfig(c_config, device_name.encode())
@@ -316,7 +323,9 @@ cdef class IECore:
  
      ## Gets a configuration dedicated to device behavior. The method targets to extract information
      #  which can be set via set_config method.
-    #  NOTE: When specifying a key value of a config, the "KEY_" prefix is omitted.
+    #
+    #  \note When specifying a key value of a config, the "KEY_" prefix is omitted.
+    #
      #  @param device_name: A name of a device to get a config value.
      #  @param config_name: A config name to request.
      #  @return A config value corresponding to a config key.
@@ -452,8 +461,8 @@ cdef class ExecutableNetwork:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie_core = IECore()
+    #  net = ie_core.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie_core.load_network(net, device, num_requests=2)
      #  res = exec_net.infer({'data': img})
      #  res
@@ -531,8 +540,8 @@ cdef class ExecutableNetwork:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie_core = IECore()
+    #  net = ie_core.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie_core.load_network(net, device, num_requsts=2)
      #  exec_graph = exec_net.get_exec_graph_info()
      #  ```
@@ -549,7 +558,7 @@ cdef class ExecutableNetwork:
      #  Usage example:\n
      #  ```python
      #  ie = IECore()
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie.load_network(net, "CPU")
      #  exec_net.get_metric("NETWORK_NAME")
      #  ```
@@ -564,7 +573,7 @@ cdef class ExecutableNetwork:
      #  Usage example:\n
      #  ```python
      #  ie = IECore()
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie.load_network(net, "CPU")
      #  exec_net.get_metric("DEVICE_ID")
      #  ```
@@ -576,8 +585,8 @@ cdef class ExecutableNetwork:
      #  @return None
      #
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  exec_net = ie.load_network(network=net, device_name="MYRIAD", num_requsts=2)
      #  exec_net.export(path_to_file_to_save)
      #  ```
@@ -632,8 +641,8 @@ cdef class InferRequest:
      #  Usage example:\n
      #  ```python
      #  callback = lambda status, py_data: print("Request with id {} finished with status {}".format(py_data, status))
-    #  net = IENetwork("./model.xml", "./model.bin")
      #  ie = IECore()
+    #  net = ie.read_network(model="./model.xml", weights="./model.bin")
      #  exec_net = ie.load_network(net, "CPU", num_requests=4)
      #  for id, req in enumerate(exec_net.requests):
      #      req.set_completion_callback(py_callback=callback, py_data=id)
@@ -662,7 +671,7 @@ cdef class InferRequest:
      #
      #  Usage example:\n
      #  ```python
-    #  exec_net = plugin.load(network=net, num_requests=2)
+    #  exec_net = ie_core.load_network(network=net, num_requests=2)
      #  exec_net.requests[0].infer({input_blob: image})
      #  res = exec_net.requests[0].outputs['prob']
      #  np.flip(np.sort(np.squeeze(res)),0)
@@ -683,7 +692,7 @@ cdef class InferRequest:
      #
      #  Usage example:\n
      #  ```python
-    #  exec_net = plugin.load(network=net, num_requests=2)
+    #  exec_net = ie_core.load_network(network=net, num_requests=2)
      #  exec_net.requests[0].async_infer({input_blob: image})
      #  request_status = exec_net.requests[0].wait()
      #  res = exec_net.requests[0].outputs['prob']
@@ -697,7 +706,8 @@ cdef class InferRequest:
  
      ## Waits for the result to become available. Blocks until specified timeout elapses or the result
      #  becomes available, whichever comes first.
-    #  NOTE: There are special values of the timeout parameter:
+    #
+    #  \note There are special values of the timeout parameter:
      #  * 0 - Immediately returns the inference status. It does not block or interrupt execution.
      #        To find statuses meaning, please refer to InferenceEngine::StatusCode in Inference Engine C++ documentation
      #  * -1 - Waits until inference result becomes available (default value)
@@ -724,12 +734,14 @@ cdef class InferRequest:
          return deref(self.impl).wait(<int64_t> timeout)
  
      ## Queries performance measures per layer to get feedback of what is the most time consuming layer.
-    #  NOTE: Performance counters data and format depends on the plugin
+    #
+    #  \note Performance counters data and format depends on the plugin
+    #
      #  @return Dictionary containing per-layer execution information.
      #
      #  Usage example:
      #  ```python
-    #  exec_net = plugin.load(network=net, num_requests=2)
+    #  exec_net = ie_core.load_network(network=net, num_requests=2)
      #  exec_net.requests[0].infer({input_blob: image})
      #  exec_net.requests[0].get_perf_counts()
      #  {'Conv2D': {'exec_type': 'jit_avx2_1x1',
@@ -780,18 +792,20 @@ cdef class InferRequest:
  
      ## Sets new batch size for certain infer request when dynamic batching is enabled in executable network
      #  that created this request.
-    #  NOTE: Support of dynamic batch size depends on the target plugin.
+    #
+    #  \note Support of dynamic batch size depends on the target plugin.
      #
      #  @param size: New batch size to be used by all the following inference calls for this request
      #  @return None
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  # Set max batch size
      #  net.batch = 10
-    #  plugin.set_config({"DYN_BATCH_ENABLED": "YES"})
-    #  exec_net = plugin.load(network=net)
+    #  ie.set_config({"DYN_BATCH_ENABLED": "YES"})
+    #  exec_net = ie.load_network(network=net)
      #  # Set batch size for certain network.
      #  # NOTE: Input data shape will not be changed, but will be used partially in inference which increases performance
      #  exec_net.requests[0].set_batch(2)
@@ -855,7 +869,11 @@ cdef class IENetLayer:
      def type(self):
          return deref(self._ptr).type.decode()
  
-    ## Layer base operating precision. Provides getter and setter interfaces.
+    ## \note This property is deprecated.
+    #  Please, use out_data property to access DataPtr objects for all output ports, which contains full
+    #  information about layer's output data including precision.
+    #
+    #  Layer base operating precision. Provides getter and setter interfaces.
      @property
      def precision(self):
          warnings.filterwarnings("always", category=DeprecationWarning)
@@ -874,8 +892,8 @@ cdef class IENetLayer:
      #  The affinity attribute provides getter and setter interfaces, so the layer affinity can be modified directly.
      #  For example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
      #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  layers_map = ie.query_network(network=net, device_name="HETERO:GPU,CPU")
      #  layers = net.layers
      #  for layer, device in layers_map.items():
@@ -922,8 +940,10 @@ cdef class IENetLayer:
                  input_to_list.append(deref(layer.second).name.decode())
          return input_to_list
  
-    ## Deprecated: use out_data property to access DataPtr objects for all output ports, which contains full
+    ## \note This property is deprecated.
+    # Please, use out_data property to access DataPtr objects for all output ports, which contains full
      # information about layer's output data including layout
+    #
      # Returns the layout of the layer output data on 1st port
      @property
      def layout(self):
@@ -936,8 +956,10 @@ cdef class IENetLayer:
          cdef C.DataPtr c_input = deref(self._ptr).outData[0]
          return layout_int_to_str_map[deref(c_input).getLayout()]
  
-    ## Deprecated: use out_data property to access DataPtr objects for all output ports, which contains full
+    ## \note This property is deprecated.
+    # Please, use out_data property to access DataPtr objects for all output ports, which contains full
      # information about layer's output data including shape
+    #
      # Return the list of dimension of the layer output data on 1st port
      @property
      def shape(self):
@@ -988,7 +1010,10 @@ cdef class IENetLayer:
              weights_buffer.reset(blob.second)
              blobs_map[blob.first.decode()] = weights_buffer.to_numpy()
          return blobs_map
-    ## Dictionary with layer weights, biases or custom blobs if any
+    ## \note This property is deprecated.
+    #  Please use blobs property instead.
+    #
+    #  Dictionary with layer weights, biases or custom blobs if any
      @property
      def weights(self):
          warnings.filterwarnings("always", category=DeprecationWarning)
@@ -1003,6 +1028,9 @@ cdef class IENetLayer:
  cdef class IENetwork:
      ## Class constructor
      #
+    #  \note Reading networks using IENetwork constructor is deprecated.
+    #  Please, use IECore.read_network() method instead.
+    #
      #  @param model: A `.xml` file of the IR or PyCapsule containing smart pointer to nGraph function.
      #                In case of passing a `.xml` file  attribute value can be a string path or bytes with file content
      #                depending on `init_from_buffer` attribute value
@@ -1100,8 +1128,9 @@ cdef class IENetwork:
      ## Batch size of the network. Provides getter and setter interfaces to get and modify the
      #  network batch size. For example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
-    #  print(et.batch_size)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
+    #  print(net.batch_size)
      #  net.batch_size = 4
      #  print(net.batch_size)
      #  print(net.inputs['data'].shape)
@@ -1109,7 +1138,9 @@ cdef class IENetwork:
      @property
      def batch_size(self):
          return self.impl.getBatch()
-    ## Deprecated: network precision does not make sence, use precision on egdes.
+    ## \note This property is deprecated:
+    #  network precision does not make sense, use precision on edges.
+    #
      #  Precision of the network
      @property
      def precision(self):
@@ -1139,13 +1170,16 @@ cdef class IENetwork:
              layers[deref(l).name.decode()] = net_l
          return layers
  
-    ## Deprecated: new Calibration Tool doesn't generate statistics
+    ## \note This property is deprecated.
+    #  New Calibration Tool doesn't generate statistics
+    #
      #  Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics
      #  represented by `LayerStats`  objects.
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  net.stats.update({"conv1_2d" : LayserStats(min=(-25, -1, 0), max=(63, 124, 70)),
      #                    "conv2_2d" : LayserStats(min=(-5, -1, 0, 1, -7, 2), max=(63, 124, 70, 174, 99, 106))
      #                   })
@@ -1163,26 +1197,6 @@ cdef class IENetwork:
                                                           max=tuple(it.second["max".encode()]))
          return py_stats_map
  
-    ## NOTE: The function is deprecated. Please use the `IENetwork()` class constructor
-    #        to create valid instance of `IENetwork`.
-    #
-    #  Reads the model from the `.xml` and `.bin` files of the IR.
-    #
-    #  @param model: Path to `.xml` file  of the IR
-    #  @param weights: Path to `.bin` file  of the IR
-    #  @return An instance of the `IENetwork` class
-    @classmethod
-    def from_ir(cls, model: str, weights: str):
-        warnings.filterwarnings("always", category=DeprecationWarning)
-        warnings.warn("from_ir() method of IENetwork is deprecated. "
-                      "Please use IENetwork class constructor to create valid IENetwork instance",
-                      DeprecationWarning)
-        if not os.path.isfile(model):
-            raise Exception("Path to the model {} doesn't exists or it's a directory".format(model))
-        if not os.path.isfile(weights):
-            raise Exception("Path to the weights {} doesn't exists or it's a directory".format(weights))
-        cdef IENetwork net = IENetwork(model, weights)
-        return net
  
      ## Marks any intermediate layer as output layer to retrieve the inference results from the specified layers.
      #  @param outputs: List of layers to be set as model outputs. The list can contain strings with layer names to be set
@@ -1192,7 +1206,8 @@ cdef class IENetwork:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  net.add_outputs(["conv5_1', conv2_1', (split_2, 1)])]
      #  ```
      def add_outputs(self, outputs):
@@ -1216,14 +1231,16 @@ cdef class IENetwork:
      #
      #  Usage example:
      #  ```python
-    #  net = IENetwork(model=path_to_model, weights=path_to_weights)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml, weights=path_to_bin)
      #  net.serialize(path_to_xml, path_to_bin)
      #  ```
      def serialize(self, path_to_xml, path_to_bin: str = ""):
          self.impl.serialize(path_to_xml.encode(), path_to_bin.encode())
  
      ## Reshapes the network to change spatial dimensions, batch size, or any dimension.
-    #  NOTE: Before using this method, make sure that the target shape is applicable for the network.
+    #
+    #  \note Before using this method, make sure that the target shape is applicable for the network.
      #        Changing the network shape to an arbitrary value may lead to unpredictable behaviour.
      #
      #  @param input_shapes: A dictionary that maps input layer names to tuples with the target shape
@@ -1231,7 +1248,8 @@ cdef class IENetwork:
      #
      #  Usage example:\n
      #  ```python
-    #  net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #  ie = IECore()
+    #  net = ie.read_network(model=path_to_xml_file, weights=path_to_bin_file)
      #  input_layer = next(iter(net.inputs))
      #  n, c, h, w = net.inputs[input_layer]
      #  net.reshape({input_layer: (n, c, h*2, w*2)}]
@@ -1255,9 +1273,11 @@ cdef class IENetwork:
      #     return self.impl.getFunction()
  
  ## This class is the main plugin interface and serves to initialize and configure the plugin.
+#
+#\note This class is deprecated: Use IECore instead
+#
  cdef class IEPlugin:
-    ## Deprecated: Use IECore instead
-    #  Class constructor
+    ##  Class constructor
      #
      #  @param device: Target device name. Supported devices: CPU, GPU, FPGA, MYRIAD, HETERO, MULTI
      #  @param plugin_dirs: List of paths to plugin directories
diff --git a/inference-engine/include/cpp/ie_cnn_net_reader.h b/inference-engine/include/cpp/ie_cnn_net_reader.h

index 5731cb7..40f6264 100644 (file)
--- a/inference-engine/include/cpp/ie_cnn_net_reader.h
+++ b/inference-engine/include/cpp/ie_cnn_net_reader.h
@@ -40,7 +40,7 @@ public:
      /**
       * @brief A default constructor
       */
-    CNNNetReader(): actual(shared_from_irelease(InferenceEngine::CreateCNNNetReader())) {
+    CNNNetReader(): actual(InferenceEngine::CreateCNNNetReaderPtr()) {
          if (actual == nullptr) {
              THROW_IE_EXCEPTION << "CNNNetReader was not initialized.";
          }
@@ -182,7 +182,7 @@ public:
      }
  
  private:
-    std::shared_ptr<ICNNNetReader> actual;
+    CNNNetReaderPtr actual;
      std::shared_ptr<CNNNetwork> network;
  };
  IE_SUPPRESS_DEPRECATED_END
diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h

index db6dde4..16fb7cd 100644 (file)
--- a/inference-engine/include/cpp/ie_cnn_network.h
+++ b/inference-engine/include/cpp/ie_cnn_network.h
@@ -66,8 +66,11 @@ public:
       * @param reader Pointer to the ICNNNetReader object
       */
      IE_SUPPRESS_DEPRECATED_START
-    explicit CNNNetwork(std::shared_ptr<ICNNNetReader> reader): reader(reader), actual(reader->getNetwork(nullptr)) {
-        if (actual == nullptr) {
+    explicit CNNNetwork(CNNNetReaderPtr reader_): reader(reader_) {
+        if (reader == nullptr) {
+            THROW_IE_EXCEPTION << "ICNNNetReader was not initialized.";
+        }
+        if ((actual = reader->getNetwork(nullptr)) == nullptr) {
              THROW_IE_EXCEPTION << "CNNNetwork was not initialized.";
          }
      }
@@ -161,6 +164,15 @@ public:
      }
  
      /**
+     * @brief An overloaded operator cast to get pointer on current network
+     *
+     * @return A shared pointer of the current network
+     */
+    operator std::shared_ptr<ICNNNetwork>() {
+        return network;
+    }
+
+    /**
       * @brief An overloaded operator & to get current network
       *
       * @return An instance of the current network
@@ -183,6 +195,15 @@ public:
       *
       * @return constant nGraph function
       */
+    std::shared_ptr<ngraph::Function> getFunction() noexcept {
+        return actual->getFunction();
+    }
+
+    /**
+     * @brief Returns constant nGraph function
+     *
+     * @return constant nGraph function
+     */
      std::shared_ptr<const ngraph::Function> getFunction() const noexcept {
          return actual->getFunction();
      }
@@ -297,7 +318,7 @@ protected:
       * @brief Reader extra reference, might be nullptr
       */
      IE_SUPPRESS_DEPRECATED_START
-    std::shared_ptr<ICNNNetReader> reader;
+    CNNNetReaderPtr reader;
      IE_SUPPRESS_DEPRECATED_END
      /**
       * @brief Network extra interface, might be nullptr
diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/details/ie_so_pointer.hpp

index 8d07706..df219cd 100644 (file)
--- a/inference-engine/include/details/ie_so_pointer.hpp
+++ b/inference-engine/include/details/ie_so_pointer.hpp
@@ -58,6 +58,7 @@ public:
          IE_SUPPRESS_DEPRECATED_END
      }
  
+private:
      /**
       * @brief Loads function from the library and returns a pointer to it
       * @param functionName Name of function to load
@@ -127,6 +128,15 @@ public:
      }
  
      /**
+     * @brief Constructs an object with existing loader
+     * @param so_loader Existing pointer to a library loader
+     */
+    explicit SOPointer(std::shared_ptr<Loader> so_loader)
+        : _so_loader(so_loader),
+          _pointedObj(details::shared_from_irelease(
+              SymbolLoader<Loader>(_so_loader).template instantiateSymbol<T>(SOCreatorTrait<T>::name))) {}
+
+    /**
       * @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U
       * @param that copied SOPointer object
       */
@@ -183,6 +193,7 @@ protected:
       * @brief Gets a smart pointer to the DLL
       */
      std::shared_ptr<Loader> _so_loader;
+
      /**
       * @brief Gets a smart pointer to the custom object
       */
diff --git a/inference-engine/include/ie_icnn_net_reader.h b/inference-engine/include/ie_icnn_net_reader.h

index af79c19..21b611f 100644 (file)
--- a/inference-engine/include/ie_icnn_net_reader.h
+++ b/inference-engine/include/ie_icnn_net_reader.h
@@ -11,8 +11,10 @@
  
  #include <map>
  #include <string>
+#include <vector>
  
  #include "details/ie_no_copy.hpp"
+#include "details/ie_so_pointer.hpp"
  #include "ie_api.h"
  #include "ie_blob.h"
  #include "ie_common.h"
@@ -118,14 +120,45 @@ public:
       * @return IR version number: 1 or 2
       */
      virtual int getVersion(ResponseDesc* resp) noexcept = 0;
+
+    virtual void addExtensions(const std::vector<InferenceEngine::IExtensionPtr>& ext) = 0;
+
+    /**
+     * @brief A virtual destructor.
+     */
+    ~ICNNNetReader() override = default;
+};
+
+IE_SUPPRESS_DEPRECATED_START
+
+namespace details {
+
+/**
+ * @brief This class defines the name of the fabric for creating an IHeteroInferencePlugin object in DLL
+ */
+template<>
+class SOCreatorTrait<ICNNNetReader> {
+public:
+    /**
+     * @brief A name of the fabric for creating IInferencePlugin object in DLL
+     */
+    static constexpr auto name = "CreateICNNNetReader";
  };
  
+}  // namespace details
+
+/**
+ * @brief A C++ helper to work with objects created by the IR readers plugin.
+ * Implements different interfaces.
+ */
+using CNNNetReaderPtr = InferenceEngine::details::SOPointer<ICNNNetReader, InferenceEngine::details::SharedObjectLoader>;
+
  /**
   * @brief Creates a CNNNetReader instance
- *
   * @return An object that implements the ICNNNetReader interface
   */
-IE_SUPPRESS_DEPRECATED_START
-INFERENCE_ENGINE_API(ICNNNetReader*) CreateCNNNetReader() noexcept;
+INFERENCE_ENGINE_API_CPP(CNNNetReaderPtr) CreateCNNNetReaderPtr() noexcept;
+
  IE_SUPPRESS_DEPRECATED_END
+
  }  // namespace InferenceEngine
diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie_icnn_network.hpp

index 68b67ca..c7eca27 100644 (file)
--- a/inference-engine/include/ie_icnn_network.hpp
+++ b/inference-engine/include/ie_icnn_network.hpp
@@ -48,6 +48,12 @@ public:
      using Ptr = std::shared_ptr<ICNNNetwork>;
  
      /**
+     * @brief Returns nGraph function
+     * @return nGraph function
+     */
+    virtual std::shared_ptr<ngraph::Function> getFunction() noexcept = 0;
+
+    /**
       * @brief Returns constant nGraph function
       * @return constant nGraph function
       */
diff --git a/inference-engine/include/ie_layers.h b/inference-engine/include/ie_layers.h

index 0bc591d..fa40cdd 100644 (file)
--- a/inference-engine/include/ie_layers.h
+++ b/inference-engine/include/ie_layers.h
@@ -2290,23 +2290,20 @@ public:
  
  /**
   * @deprecated Migrate to IR v10 and work with ngraph::Function directly. The method will be removed in 2020.3
- * @brief This class represents a standard Scatter layer
+ * @brief This class represents a standard ScatterUpdate layer
   */
-class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(ScatterLayer): public CNNLayer {
+class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(ScatterUpdateLayer): public CNNLayer {
  public:
      /**
-     * @brief The axis in Dictionary to scatter Indexes from
-     */
-    int axis = 0;
-    /**
-     * @brief Creates a new ScatterLayer instance.
+     * @brief Creates a new ScatterUpdateLayer instance.
       */
      using CNNLayer::CNNLayer;
  
-    ~ScatterLayer() override;
+    ~ScatterUpdateLayer() override;
  };
  
  /**
+ * @deprecated Migrate to IR v10 and work with ngraph::Function directly. The method will be removed in 2020.3
   * @brief This class represents an onnx ExperimentalDetectronPriorGridGenerator Layer
   */
  class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(ExperimentalDetectronPriorGridGeneratorLayer): public CNNLayer {
@@ -2341,6 +2338,23 @@ public:
  };
  
  /**
+ * @brief This class represents a standard ExperimentalDetectronTopKROIs layer
+ */
+class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(ExperimentalDetectronTopKROIs): public CNNLayer {
+public:
+    /**
+     * @brief The maximum number of output rois
+     */
+    int max_rois = 0;
+    /**
+     * @brief Creates a new ExperimentalDetectronTopKROIs instance.
+     */
+    using CNNLayer::CNNLayer;
+
+    virtual ~ExperimentalDetectronTopKROIs();
+};
+
+/**
   * @brief This class represents an onnx ExperimentalDetectronGenerateProposalsSingleImage Layer
   */
  class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(ExperimentalDetectronGenerateProposalsSingleImageLayer): public CNNLayer {
diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/include/ie_parameter.hpp

index f9ad11d..bb904ef 100644 (file)
--- a/inference-engine/include/ie_parameter.hpp
+++ b/inference-engine/include/ie_parameter.hpp
@@ -21,6 +21,7 @@
  #include <vector>
  
  #include "ie_api.h"
+#include "ie_blob.h"
  
  namespace ngraph {
  
@@ -325,6 +326,7 @@ private:
  };
  
  #ifdef __clang__
+extern template struct INFERENCE_ENGINE_API_CLASS(InferenceEngine::Parameter::RealData<InferenceEngine::Blob::Ptr>);
  extern template struct INFERENCE_ENGINE_API_CLASS(InferenceEngine::Parameter::RealData<int>);
  extern template struct INFERENCE_ENGINE_API_CLASS(InferenceEngine::Parameter::RealData<bool>);
  extern template struct INFERENCE_ENGINE_API_CLASS(InferenceEngine::Parameter::RealData<float>);
diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie_plugin_config.hpp

index 317a911..49a6212 100644 (file)
--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@@ -348,5 +348,17 @@ DECLARE_CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS);
   */
  DECLARE_CONFIG_KEY(DUMP_EXEC_GRAPH_AS_DOT);
  
+
+/**
+ * @brief The name for setting to execute in bfloat16 precision whenever it is possible
+ *
+ * This option let plugin know to downscale the precision where it see performance benefits from
+ * bfloat16 execution
+ * Such option do not guarantee accuracy of the network, the accuracy in this mode should be
+ * verified separately by the user and basing on performance and accuracy results it should be
+ * user's decision to use this option or not to use
+ */
+DECLARE_CONFIG_KEY(ENFORCE_BF16);
+
  }  // namespace PluginConfigParams
  }  // namespace InferenceEngine
diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/include/ie_precision.hpp

index 4252503..f0a05b5 100644 (file)
--- a/inference-engine/include/ie_precision.hpp
+++ b/inference-engine/include/ie_precision.hpp
@@ -26,7 +26,8 @@ public:
          UNSPECIFIED = 255, /**< Unspecified value. Used by default */
          MIXED = 0,         /**< Mixed value. Can be received from network. No applicable for tensors */
          FP32 = 10,         /**< 32bit floating point value */
-        FP16 = 11,         /**< 16bit floating point value */
+        FP16 = 11,         /**< 16bit floating point value, 5 bit for exponent, 10 bit for mantisa */
+        BF16 = 12,         /**< 16bit floating point value, 8 bit for exponent, 7 bit for mantisa*/
          Q78 = 20,          /**< 16bit specific signed fixed point precision */
          I16 = 30,          /**< 16bit signed integer value */
          U8 = 40,           /**< 8bit unsigned integer value */
@@ -106,6 +107,7 @@ public:
              switch (precisionInfo.value) {
                  CASE(FP32, float);
                  CASE2(FP16, int16_t, uint16_t);
+                CASE2(BF16, int16_t, uint16_t);
                  CASE(I16, int16_t);
                  CASE(I32, int32_t);
                  CASE(I64, int64_t);
@@ -181,9 +183,9 @@ public:
          static std::unordered_map<std::string, ePrecision> names = {
  #define PRECISION_NAME(s) {#s, s}
              PRECISION_NAME(Q78),  PRECISION_NAME(U8),    PRECISION_NAME(I8),    PRECISION_NAME(I16),
-            PRECISION_NAME(I32),  PRECISION_NAME(I64),   PRECISION_NAME(U64),   PRECISION_NAME(U16),
+            PRECISION_NAME(I32),  PRECISION_NAME(I64),   PRECISION_NAME(U64),    PRECISION_NAME(U16),
              PRECISION_NAME(FP32), PRECISION_NAME(FP16),  PRECISION_NAME(MIXED), PRECISION_NAME(BIN),
-            PRECISION_NAME(BOOL),
+            PRECISION_NAME(BOOL), PRECISION_NAME(BF16),
  #undef PRECISION_NAME
          };
          auto i = names.find(str);
@@ -260,6 +262,7 @@ protected:
          switch (v) {
              CASE(FP32);
              CASE(FP16);
+            CASE(BF16);
              CASE(I16);
              CASE(I32);
              CASE(I64);
@@ -295,6 +298,10 @@ struct PrecisionTrait<Precision::FP16> {
      using value_type = int16_t;
  };
  template <>
+struct PrecisionTrait<Precision::BF16> {
+    using value_type = int16_t;
+};
+template<>
  struct PrecisionTrait<Precision::Q78> {
      using value_type = uint16_t;
  };
diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md

index ecc5ee6..7f675a7 100644 (file)
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@@ -92,14 +92,17 @@ Options:
                                Please note that although the automatic selection usually provides a reasonable performance, 
                                it still may be non-optimal for some cases, especially for very small networks.
      -nthreads "<integer>"     Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
-    -pin "YES"/"NUMA"/"NO"    Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") 
-                              CPU threads pinning for CPU-involved inference.
+    -enforcebf16              Optional. Enforcing of floating point operations execution in bfloat16 precision where it is acceptable.
+    -pin "YES"/"NO"/"NUMA"    Optional. Enable threads->cores ("YES", default), threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU threads pinning for CPU-involved inference.
+
  
    Statistics dumping options:
      -report_type "<type>"     Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
      -report_folder            Optional. Path to a folder where statistics report is stored.
      -exec_graph_path          Optional. Path to a file where to store executable graph information serialized.
      -pc                       Optional. Report performance counters.
+    -dump_config              Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.
+    -load_config              Optional. Path to XML/YAML/JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file.
  ```
  
  Running the application with the empty list of options yields the usage message given above and an error message.
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp

index 6b01db6..cefff68 100644 (file)
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -48,6 +48,9 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to
                                                  "usually provides a reasonable performance, it still may be non - optimal for some cases, especially for "
                                                  "very small networks. See sample's README for more details.";
  
+/// @brief message for enforcing of BF16 execution where it is possible
+static const char enforce_bf16_message[] = "Optional. Enforcing of floating point operations execution in bfloat16 precision where it is acceptable.";
+
  /// @brief message for user library argument
  static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";
  
@@ -85,6 +88,15 @@ static const char progress_message[] = "Optional. Show progress bar (can affect
  // @brief message for performance counters option
  static const char pc_message[] = "Optional. Report performance counters.";
  
+#ifdef USE_OPENCV
+// @brief message for load config option
+static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file to load custom IE parameters."
+                                          " Please note, command line parameters have higher priority then parameters from configuration file.";
+
+// @brief message for dump config option
+static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application.";
+#endif
+
  /// @brief Define flag for showing help message <br>
  DEFINE_bool(h, false, help_message);
  
@@ -130,6 +142,9 @@ DEFINE_uint32(nthreads, 0, infer_num_threads_message);
  /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases)
  DEFINE_string(nstreams, "", infer_num_streams_message);
  
+/// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability
+DEFINE_bool(enforcebf16, false, enforce_bf16_message);
+
  /// @brief Define parameter for batch size <br>
  /// Default is 0 (that means don't specify)
  DEFINE_uint32(b, 0, batch_size_message);
@@ -155,6 +170,14 @@ DEFINE_bool(progress, false, progress_message);
  /// @brief Define flag for showing performance counters <br>
  DEFINE_bool(pc, false, pc_message);
  
+#ifdef USE_OPENCV
+/// @brief Define flag for loading configuration file <br>
+DEFINE_string(load_config, "", load_config_message);
+
+/// @brief Define flag for dumping configuration file <br>
+DEFINE_string(dump_config, "", dump_config_message);
+#endif
+
  /**
  * @brief This function show a help message
  */
@@ -180,10 +203,15 @@ static void showUsage() {
      std::cout << std::endl << "  device-specific performance options:" << std::endl;
      std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
      std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
-    std::cout << "    -pin \"YES\"/\"NO\"           " << infer_threads_pinning_message << std::endl;
+    std::cout << "    -enforcebf16              " << enforce_bf16_message << std::endl;
+    std::cout << "    -pin \"YES\"/\"NO\"/\"NUMA\"    " << infer_threads_pinning_message << std::endl;
      std::cout << std::endl << "  Statistics dumping options:" << std::endl;
      std::cout << "    -report_type \"<type>\"     " << report_type_message << std::endl;
      std::cout << "    -report_folder            " << report_folder_message << std::endl;
      std::cout << "    -exec_graph_path          " << exec_graph_path_message << std::endl;
      std::cout << "    -pc                       " << pc_message << std::endl;
+#ifdef USE_OPENCV
+    std::cout << "    -dump_config              " << dump_config_message << std::endl;
+    std::cout << "    -load_config              " << load_config_message << std::endl;
+#endif
  }
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp

index 34b80d0..be7325b 100644 (file)
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -55,9 +55,9 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
      }
  
      if (!FLAGS_report_type.empty() &&
-         FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) {
+        FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) {
          std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) +
-                " report types are supported (invalid -report_type option value)";
+                          " report types are supported (invalid -report_type option value)";
          throw std::logic_error(err);
      }
  
@@ -71,17 +71,17 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
  static void next_step(const std::string additional_info = "") {
      static size_t step_id = 0;
      static const std::map<size_t, std::string> step_names = {
-      { 1, "Parsing and validating input arguments" },
-      { 2, "Loading Inference Engine" },
-      { 3, "Setting device configuration" },
-      { 4, "Reading the Intermediate Representation network" },
-      { 5, "Resizing network to match image sizes and given batch" },
-      { 6, "Configuring input of the model" },
-      { 7, "Loading the model to the device" },
-      { 8, "Setting optimal runtime parameters" },
-      { 9, "Creating infer requests and filling input blobs with images" },
-      { 10, "Measuring performance" },
-      { 11, "Dumping statistics report" }
+            { 1, "Parsing and validating input arguments" },
+            { 2, "Loading Inference Engine" },
+            { 3, "Setting device configuration" },
+            { 4, "Reading the Intermediate Representation network" },
+            { 5, "Resizing network to match image sizes and given batch" },
+            { 6, "Configuring input of the model" },
+            { 7, "Loading the model to the device" },
+            { 8, "Setting optimal runtime parameters" },
+            { 9, "Creating infer requests and filling input blobs with images" },
+            { 10, "Measuring performance" },
+            { 11, "Dumping statistics report" }
      };
  
      step_id++;
@@ -121,38 +121,46 @@ int main(int argc, char *argv[]) {
              slog::info << "Network is compiled" << slog::endl;
          }
  
-        if (!FLAGS_report_type.empty()) {
-            std::vector<gflags::CommandLineFlagInfo> flags;
-            StatisticsReport::Parameters command_line_arguments;
-            gflags::GetAllFlags(&flags);
-
-            for (auto &flag : flags) {
-                if (!flag.is_default) {
-                    command_line_arguments.push_back({ flag.name, flag.current_value });
-                }
+        std::vector<gflags::CommandLineFlagInfo> flags;
+        StatisticsReport::Parameters command_line_arguments;
+        gflags::GetAllFlags(&flags);
+        for (auto &flag : flags) {
+            if (!flag.is_default) {
+                command_line_arguments.push_back({ flag.name, flag.current_value });
              }
+        }
+        if (!FLAGS_report_type.empty()) {
              statistics = std::make_shared<StatisticsReport>(StatisticsReport::Config{FLAGS_report_type, FLAGS_report_folder});
              statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments);
          }
+        auto isFlagSetInCommandLine = [&command_line_arguments] (const std::string& name) {
+           return (std::find_if(command_line_arguments.begin(), command_line_arguments.end(),
+           [ name ] (const std::pair<std::string, std::string>& p) { return p.first == name;}) != command_line_arguments.end());
+        };
+
+        std::string device_name = FLAGS_d;
+
+        // Parse devices
+        auto devices = parseDevices(device_name);
  
+        // Parse nstreams per device
+        std::map<std::string, std::string> device_nstreams = parseNStreamsValuePerDevice(devices, FLAGS_nstreams);
+
+        // Load device config file if specified
+        std::map<std::string, std::map<std::string, std::string>> config;
+#ifdef USE_OPENCV
+        if (!FLAGS_load_config.empty()) {
+            load_config(FLAGS_load_config, config);
+        }
+#endif
          /** This vector stores paths to the processed images **/
          std::vector<std::string> inputFiles;
          parseInputFilesArguments(inputFiles);
  
-        if (FLAGS_nstreams.empty()) {
-            slog::warn << "-nstreams default value is determined automatically for a device. "
-                "Although the automatic selection usually provides a reasonable performance,"
-                "but it still may be non-optimal for some cases, for more information look at README." << slog::endl<< slog::endl;
-        }
-
          // ----------------- 2. Loading the Inference Engine -----------------------------------------------------------
          next_step();
  
-        // Get optimal runtime parameters for device
-        std::string device_name = FLAGS_d;
-
          Core ie;
-
          if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) {
              // CPU (MKLDNN) extensions is loaded as a shared library and passed as a pointer to base extension
              const auto extension_ptr = InferenceEngine::make_so_pointer<InferenceEngine::IExtension>(FLAGS_l);
@@ -160,10 +168,17 @@ int main(int argc, char *argv[]) {
              slog::info << "CPU (MKLDNN) extensions is loaded " << FLAGS_l << slog::endl;
          }
  
+        // Load clDNN Extensions
          if ((FLAGS_d.find("GPU") != std::string::npos) && !FLAGS_c.empty()) {
-            // Load clDNN Extensions
-            ie.SetConfig({ {CONFIG_KEY(CONFIG_FILE), FLAGS_c} });
-            slog::info << "GPU extensions is loaded " << FLAGS_c << slog::endl;
+            // Override config if command line parameter is specified
+            if (!config.count("GPU"))
+                config["GPU"] = {};
+            config["GPU"][CONFIG_KEY(CONFIG_FILE)] = FLAGS_c;
+        }
+        if (config.count("GPU") && config.at("GPU").count(CONFIG_KEY(CONFIG_FILE))) {
+            auto ext = config.at("GPU").at(CONFIG_KEY(CONFIG_FILE));
+            ie.SetConfig({{ CONFIG_KEY(CONFIG_FILE), ext }}, "GPU");
+            slog::info << "GPU extensions is loaded " << ext << slog::endl;
          }
  
          slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl;
@@ -173,70 +188,108 @@ int main(int argc, char *argv[]) {
          // ----------------- 3. Setting device configuration -----------------------------------------------------------
          next_step();
  
-        bool perf_counts = (FLAGS_report_type == detailedCntReport ||
-                            FLAGS_report_type == averageCntReport ||
-                            FLAGS_pc ||
-                            !FLAGS_exec_graph_path.empty());
-
-        auto devices = parseDevices(device_name);
-        std::map<std::string, uint32_t> device_nstreams = parseNStreamsValuePerDevice(devices, FLAGS_nstreams);
-        for (auto& pair : device_nstreams) {
-            auto key = std::string(pair.first + "_THROUGHPUT_STREAMS");
-            std::vector<std::string> supported_config_keys = ie.GetMetric(pair.first, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
-            if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) {
-                 throw std::logic_error("Device " + pair.first + " doesn't support config key '" + key + "'! " +
-                                        "Please specify -nstreams for correct devices in format  <dev1>:<nstreams1>,<dev2>:<nstreams2>");
+        bool perf_counts = false;
+        // Update config per device according to command line parameters
+        for (auto& device : devices) {
+            if (!config.count(device)) config[device] = {};
+            std::map<std::string, std::string>& device_config = config.at(device);
+
+            // Set performance counter
+            if (isFlagSetInCommandLine("pc")) {
+                // set to user defined value
+                device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
+            } else if (device_config.count(CONFIG_KEY(PERF_COUNT)) &&
+                      (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) {
+                slog::warn << "Performance counters for " << device <<
+                              " device is turned on. To print results use -pc option." << slog::endl;
+            } else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) {
+                slog::warn << "Turn on performance counters for " << device <<
+                              " device since report type is " << FLAGS_report_type << "." << slog::endl;
+                device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
+            } else if (!FLAGS_exec_graph_path.empty()) {
+                slog::warn << "Turn on performance counters for " << device <<
+                              " device due to execution graph dumping." << slog::endl;
+                device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES);
+            } else {
+                // set to default value
+                device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
              }
-        }
+            perf_counts = (device_config.at(CONFIG_KEY(PERF_COUNT)) == CONFIG_VALUE(YES)) ? true : perf_counts;
+
+            auto setThroughputStreams = [&] () {
+                const std::string key = device + "_THROUGHPUT_STREAMS";
+                if (device_nstreams.count(device)) {
+                    // set to user defined value
+                    std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+                    if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) {
+                        throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " +
+                                               "Please specify -nstreams for correct devices in format  <dev1>:<nstreams1>,<dev2>:<nstreams2>" +
+                                               " or via configuration file.");
+                    }
+                    device_config[key] = device_nstreams.at(device);
+                } else if (!device_config.count(key) && (FLAGS_api == "async")) {
+                    slog::warn << "-nstreams default value is determined automatically for " << device << " device. "
+                          "Although the automatic selection usually provides a reasonable performance,"
+                          "but it still may be non-optimal for some cases, for more information look at README." << slog::endl;
+                    device_config[key] = std::string(device + "_THROUGHPUT_AUTO");
+                }
+                if (device_config.count(key))
+                    device_nstreams[device] = device_config.at(key);
+            };
  
-        for (auto& device : devices) {
              if (device == "CPU") {  // CPU supports few special performance-oriented keys
                  // limit threading for CPU portion of inference
-                if (FLAGS_nthreads != 0)
-                    ie.SetConfig({{ CONFIG_KEY(CPU_THREADS_NUM), std::to_string(FLAGS_nthreads) }}, device);
-
-                if ((device_name.find("MULTI") != std::string::npos) &&
-                    (device_name.find("GPU") != std::string::npos)) {
-                    ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), CONFIG_VALUE(NO) }}, device);
-                } else {
-                    // pin threads for CPU portion of inference
-                    ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device);
+                if (isFlagSetInCommandLine("nthreads"))
+                    device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
+
+                if (isFlagSetInCommandLine("enforcebf16"))
+                    device_config[CONFIG_KEY(ENFORCE_BF16)] = FLAGS_enforcebf16 ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
+
+                if (isFlagSetInCommandLine("pin")) {
+                    // set to user defined value
+                    device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
+                } else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
+                    if ((device_name.find("MULTI") != std::string::npos) &&
+                        (device_name.find("GPU") != std::string::npos)) {
+                         slog::warn << "Turn off threads pinning for " << device <<
+                                       " device since multi-scenario with GPU device is used." << slog::endl;
+                        device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
+                    } else {
+                        // set to default value
+                        device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
+                    }
                  }
  
                  // for CPU execution, more throughput-oriented execution via streams
-                if (FLAGS_api == "async")
-                    ie.SetConfig({{ CONFIG_KEY(CPU_THROUGHPUT_STREAMS),
-                                    (device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) :
-                                                                         "CPU_THROUGHPUT_AUTO") }}, device);
-                device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(CPU_THROUGHPUT_STREAMS)).as<std::string>());
+                setThroughputStreams();
              } else if (device == ("GPU")) {
-                if (FLAGS_api == "async")
-                    ie.SetConfig({{ CONFIG_KEY(GPU_THROUGHPUT_STREAMS),
-                                    (device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) :
-                                                                         "GPU_THROUGHPUT_AUTO") }}, device);
-                device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(GPU_THROUGHPUT_STREAMS)).as<std::string>());
+                // for GPU execution, more throughput-oriented execution via streams
+                setThroughputStreams();
  
                  if ((device_name.find("MULTI") != std::string::npos) &&
                      (device_name.find("CPU") != std::string::npos)) {
-                    // multi-device execution with the CPU + GPU performs best with GPU trottling hint,
-                    // which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
-                    ie.SetConfig({{ CLDNN_CONFIG_KEY(PLUGIN_THROTTLE), "1" }}, "GPU");
+                    slog::warn << "Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint," <<
+                                  "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)"<< slog::endl;
+                    device_config[CLDNN_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
                  }
              } else if (device == "MYRIAD") {
-                ie.SetConfig({{ CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_WARNING) }}, device);
+                device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
              }
          }
  
+        for (auto&& item : config) {
+            ie.SetConfig(item.second, item.first);
+        }
+
          auto double_to_string = [] (const double number) {
-                    std::stringstream ss;
-                    ss << std::fixed << std::setprecision(2) << number;
-                    return ss.str();
-                };
+            std::stringstream ss;
+            ss << std::fixed << std::setprecision(2) << number;
+            return ss.str();
+        };
          auto get_total_ms_time = [] (Time::time_point& startTime) {
              return std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
          };
  
-
          size_t batchSize = FLAGS_b;
          Precision precision = Precision::UNSPECIFIED;
          std::string topology_name = "";
@@ -253,7 +306,7 @@ int main(int argc, char *argv[]) {
              if (statistics)
                  statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                            {
-                                              {"read network time (ms)", duration_ms}
+                                                  {"read network time (ms)", duration_ms}
                                            });
  
              const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
@@ -305,17 +358,14 @@ int main(int argc, char *argv[]) {
              }
              // ----------------- 7. Loading the model to the device --------------------------------------------------------
              next_step();
-
-            std::map<std::string, std::string> config = {{ CONFIG_KEY(PERF_COUNT), perf_counts ? CONFIG_VALUE(YES) :
-                                                                                                CONFIG_VALUE(NO) }};
              startTime = Time::now();
-            exeNetwork = ie.LoadNetwork(cnnNetwork, device_name, config);
+            exeNetwork = ie.LoadNetwork(cnnNetwork, device_name);
              duration_ms = double_to_string(get_total_ms_time(startTime));
              slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
              if (statistics)
                  statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                            {
-                                              {"load network time (ms)", duration_ms}
+                                                  {"load network time (ms)", duration_ms}
                                            });
          } else {
              next_step();
@@ -333,7 +383,7 @@ int main(int argc, char *argv[]) {
              if (statistics)
                  statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                            {
-                                              {"import network time (ms)", duration_ms}
+                                                  {"import network time (ms)", duration_ms}
                                            });
              if (batchSize == 0) {
                  batchSize = 1;
@@ -342,6 +392,12 @@ int main(int argc, char *argv[]) {
          // ----------------- 8. Setting optimal runtime parameters -----------------------------------------------------
          next_step();
  
+        // Update number of streams
+        for (auto&& ds : device_nstreams) {
+            const std::string key = ds.first + "_THROUGHPUT_STREAMS";
+            device_nstreams[ds.first] = ie.GetConfig(ds.first, key).as<std::string>();
+        }
+
          // Number of requests
          uint32_t nireq = FLAGS_nireq;
          if (nireq == 0) {
@@ -384,21 +440,21 @@ int main(int argc, char *argv[]) {
          if (statistics) {
              statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
                                        {
-                                            {"topology", topology_name},
-                                            {"target device", device_name},
-                                            {"API", FLAGS_api},
-                                            {"precision", std::string(precision.name())},
-                                            {"batch size", std::to_string(batchSize)},
-                                            {"number of iterations", std::to_string(niter)},
-                                            {"number of parallel infer requests", std::to_string(nireq)},
-                                            {"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
+                                              {"topology", topology_name},
+                                              {"target device", device_name},
+                                              {"API", FLAGS_api},
+                                              {"precision", std::string(precision.name())},
+                                              {"batch size", std::to_string(batchSize)},
+                                              {"number of iterations", std::to_string(niter)},
+                                              {"number of parallel infer requests", std::to_string(nireq)},
+                                              {"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))},
                                        });
              for (auto& nstreams : device_nstreams) {
                  std::stringstream ss;
                  ss << "number of " << nstreams.first << " streams";
                  statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG,
                                            {
-                                                {ss.str(), std::to_string(nstreams.second)},
+                                                  {ss.str(), nstreams.second},
                                            });
              }
          }
@@ -511,23 +567,23 @@ int main(int argc, char *argv[]) {
          double latency = getMedianValue<double>(inferRequestsQueue.getLatencies());
          double totalDuration = inferRequestsQueue.getDurationInMilliseconds();
          double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency :
-                                             batchSize * 1000.0 * iteration / totalDuration;
+                     batchSize * 1000.0 * iteration / totalDuration;
  
          if (statistics) {
              statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                        {
-                                        {"total execution time (ms)", double_to_string(totalDuration)},
-                                        {"total number of iterations", std::to_string(iteration)},
+                                              {"total execution time (ms)", double_to_string(totalDuration)},
+                                              {"total number of iterations", std::to_string(iteration)},
                                        });
              if (device_name.find("MULTI") == std::string::npos) {
                  statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                            {
-                                            {"latency (ms)", double_to_string(latency)},
+                                                  {"latency (ms)", double_to_string(latency)},
                                            });
              }
              statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                        {
-                                          {"throughput", double_to_string(fps)}
+                                              {"throughput", double_to_string(fps)}
                                        });
          }
  
@@ -536,6 +592,13 @@ int main(int argc, char *argv[]) {
          // ----------------- 11. Dumping statistics report -------------------------------------------------------------
          next_step();
  
+#ifdef USE_OPENCV
+        if (!FLAGS_dump_config.empty()) {
+            dump_config(FLAGS_dump_config, config);
+            slog::info << "Inference Engine configuration settings were dumped to " << FLAGS_dump_config << slog::endl;
+        }
+#endif
+
          if (!FLAGS_exec_graph_path.empty()) {
              try {
                  CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo();
@@ -575,7 +638,7 @@ int main(int argc, char *argv[]) {
          if (statistics) {
              statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS,
                                        {
-                                            {"error", ex.what()},
+                                              {"error", ex.what()},
                                        });
              statistics->dump();
          }
diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp

index a7c05d7..d0820bc 100644 (file)
--- a/inference-engine/samples/benchmark_app/utils.cpp
+++ b/inference-engine/samples/benchmark_app/utils.cpp
@@ -13,6 +13,10 @@
  
  #include "utils.hpp"
  
+#ifdef USE_OPENCV
+#include <opencv2/core.hpp>
+#endif
+
  uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) {
      static const std::map<std::string, uint32_t> deviceDefaultDurationInSeconds {
              { "CPU",     60  },
@@ -60,32 +64,33 @@ std::vector<std::string> parseDevices(const std::string& device_string) {
      if (comma_separated_devices.find(":") != std::string::npos) {
          comma_separated_devices = comma_separated_devices.substr(comma_separated_devices.find(":") + 1);
      }
+    if ((comma_separated_devices == "MULTI") || (comma_separated_devices == "HETERO"))
+        return std::vector<std::string>();
      auto devices = split(comma_separated_devices, ',');
      for (auto& device : devices)
          device = device.substr(0, device.find_first_of(".("));
      return devices;
  }
  
-std::map<std::string, uint32_t> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
-                                                            const std::string& values_string) {
+std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
+                                                               const std::string& values_string) {
      //  Format: <device1>:<value1>,<device2>:<value2> or just <value>
-    auto values_string_upper = values_string;
-    std::map<std::string, uint32_t> result;
-    auto device_value_strings = split(values_string_upper, ',');
+    std::map<std::string, std::string> result;
+    auto device_value_strings = split(values_string, ',');
      for (auto& device_value_string : device_value_strings) {
-        auto device_value_vec =  split(device_value_string, ':');
+        auto device_value_vec = split(device_value_string, ':');
          if (device_value_vec.size() == 2) {
              auto device_name = device_value_vec.at(0);
              auto nstreams = device_value_vec.at(1);
              auto it = std::find(devices.begin(), devices.end(), device_name);
              if (it != devices.end()) {
-                result[device_name] = std::stoi(nstreams);
+                result[device_name] = nstreams;
              } else {
                  throw std::logic_error("Can't set nstreams value " + std::string(nstreams) +
                                         " for device '" + device_name + "'! Incorrect device name!");
              }
          } else if (device_value_vec.size() == 1) {
-            uint32_t value = std::stoi(device_value_vec.at(0));
+            auto value = device_value_vec.at(0);
              for (auto& device : devices) {
                  result[device] = value;
              }
@@ -95,3 +100,37 @@ std::map<std::string, uint32_t> parseNStreamsValuePerDevice(const std::vector<st
      }
      return result;
  }
+
+#ifdef USE_OPENCV
+void dump_config(const std::string& filename,
+                 const std::map<std::string, std::map<std::string, std::string>>& config) {
+    cv::FileStorage fs(filename, cv::FileStorage::WRITE);
+    if (!fs.isOpened())
+        throw std::runtime_error("Error: Can't open config file : " + filename);
+    for (auto device_it = config.begin(); device_it != config.end(); ++device_it) {
+        fs << device_it->first  << "{:";
+        for (auto param_it = device_it->second.begin(); param_it != device_it->second.end(); ++param_it)
+            fs << param_it->first << param_it->second;
+        fs << "}";
+    }
+    fs.release();
+}
+
+void load_config(const std::string& filename,
+                 std::map<std::string, std::map<std::string, std::string>>& config) {
+    cv::FileStorage fs(filename, cv::FileStorage::READ);
+    if (!fs.isOpened())
+        throw std::runtime_error("Error: Can't load config file : " + filename);
+    cv::FileNode root = fs.root();
+    for (auto it = root.begin(); it != root.end(); ++it) {
+        auto device = *it;
+        if (!device.isMap()) {
+            throw std::runtime_error("Error: Can't parse config file : " + filename);
+        }
+        for (auto iit = device.begin(); iit != device.end(); ++iit) {
+            auto item = *iit;
+            config[device.name()][item.name()] = item.string();
+        }
+    }
+}
+#endif
+\ No newline at end of file
diff --git a/inference-engine/samples/benchmark_app/utils.hpp b/inference-engine/samples/benchmark_app/utils.hpp

index b7ae051..70c2ce7 100644 (file)
--- a/inference-engine/samples/benchmark_app/utils.hpp
+++ b/inference-engine/samples/benchmark_app/utils.hpp
@@ -10,5 +10,11 @@
  
  std::vector<std::string> parseDevices(const std::string& device_string);
  uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device);
-std::map<std::string, uint32_t> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
-                                                            const std::string& values_string);
+std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector<std::string>& devices,
+                                                               const std::string& values_string);
+#ifdef USE_OPENCV
+void dump_config(const std::string& filename,
+                 const std::map<std::string, std::map<std::string, std::string>>& config);
+void load_config(const std::string& filename,
+                 std::map<std::string, std::map<std::string, std::string>>& config);
+#endif
+\ No newline at end of file
diff --git a/inference-engine/samples/hello_query_device/main.cpp b/inference-engine/samples/hello_query_device/main.cpp

index 79779cb..e656dcc 100644 (file)
--- a/inference-engine/samples/hello_query_device/main.cpp
+++ b/inference-engine/samples/hello_query_device/main.cpp
@@ -85,30 +85,28 @@ int main(int argc, char *argv[]) {
          std::vector<std::string> availableDevices = ie.GetAvailableDevices();
  
          // --------------------------- 3. Query and print supported metrics and config keys--------------------
-        std::set<std::string> printedDevices;
  
          std::cout << "Available devices: " << std::endl;
          for (auto && device : availableDevices) {
-            std::string deviceFamilyName = device.substr(0, device.find_first_of('.'));
-            if (printedDevices.find(deviceFamilyName) == printedDevices.end())
-                printedDevices.insert(deviceFamilyName);
-            else
-                continue;
-
-            std::cout << "\tDevice: " << deviceFamilyName << std::endl;
+            std::cout << "\tDevice: " << device << std::endl;
  
              std::cout << "\tMetrics: " << std::endl;
-            std::vector<std::string> supportedMetrics = ie.GetMetric(deviceFamilyName, METRIC_KEY(SUPPORTED_METRICS));
+            std::vector<std::string> supportedMetrics = ie.GetMetric(device, METRIC_KEY(SUPPORTED_METRICS));
              for (auto && metricName : supportedMetrics) {
-                std::cout << "\t\t" << metricName << " : " << std::flush;
-                printParameterValue(ie.GetMetric(device, metricName));
+                if (metricName != METRIC_KEY(AVAILABLE_DEVICES)) {
+                    std::cout << "\t\t" << metricName << " : " << std::flush;
+                    printParameterValue(ie.GetMetric(device, metricName));
+                }
              }
  
-            std::cout << "\tDefault values for device configuration keys: " << std::endl;
-            std::vector<std::string> supportedConfigKeys = ie.GetMetric(deviceFamilyName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
-            for (auto && configKey : supportedConfigKeys) {
-                std::cout << "\t\t" << configKey << " : " << std::flush;
-                printParameterValue(ie.GetConfig(deviceFamilyName, configKey));
+            if (std::find(supportedMetrics.begin(), supportedMetrics.end(),
+                METRIC_KEY(SUPPORTED_CONFIG_KEYS)) != supportedMetrics.end()) {
+                std::cout << "\tDefault values for device configuration keys: " << std::endl;
+                std::vector<std::string> supportedConfigKeys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+                for (auto && configKey : supportedConfigKeys) {
+                    std::cout << "\t\t" << configKey << " : " << std::flush;
+                    printParameterValue(ie.GetConfig(device, configKey));
+                }
              }
  
              std::cout << std::endl;
diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp

index 3030771..0a594eb 100644 (file)
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@@ -643,12 +643,13 @@ int main(int argc, char *argv[]) {
          auto t0 = Time::now();
          ExecutableNetwork executableNet;
  
+        ie.SetConfig(genericPluginConfig, deviceStr);
          if (!FLAGS_m.empty()) {
              slog::info << "Loading model to the device" << slog::endl;
-            executableNet = ie.LoadNetwork(network, deviceStr, genericPluginConfig);
+            executableNet = ie.LoadNetwork(network, deviceStr);
          } else {
              slog::info << "Importing model to the device" << slog::endl;
-            executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr, genericPluginConfig);
+            executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr);
          }
  
          ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
diff --git a/inference-engine/scripts/run_code_checks.sh b/inference-engine/scripts/run_code_checks.sh

new file mode 100644 (file)

index 0000000..11689b7
--- /dev/null
+++ b/inference-engine/scripts/run_code_checks.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+CURRENT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+command -v realpath >/dev/null 2>&1 || { echo >&2 "cpplint require realpath executable but it's not installed.  Aborting."; exit 1; }
+SOURCE_DIR=$(realpath ${CURRENT_DIR}/..)
+REPORT_DIR="${SOURCE_DIR}/report"
+CPPLINT_REPORT_DIR="${REPORT_DIR}/cpplint"
+PROJECT_NAME="Inference Engine"
+
+function run_cpplint() {
+    echo "-> CppLint started..."
+    if [ -d ${CPPLINT_REPORT_DIR} ]; then
+        rm -Rf ${CPPLINT_REPORT_DIR}
+    fi
+
+    mkdir -p ${CPPLINT_REPORT_DIR}
+    python ${CURRENT_DIR}/cpplint.py --linelength=160 --counting=detailed --quiet --filter="
+        -build/header_guard,
+        -build/include,
+        -build/include_order,
+        -build/include_subdir,
+        -build/include_what_you_use,
+        -build/namespaces,
+        -build/c++11,
+        -whitespace/indent,
+        -whitespace/comments,
+        -whitespace/ending_newline,
+        -runtime/references,
+        -runtime/int,
+        -runtime/explicit,
+        -readability/todo,
+        -readability/fn_size
+    " $(find ${SOURCE_DIR} -name '*.h' -or -name '*.cc' -or -name '*.c' -or -name '*.cpp' -or -name '*.hpp' |
+        grep -v 'inference-engine/bin\|inference-engine/build\|inference-engine/report\|inference-engine/scripts\|inference-engine/temp\|inference-engine/tests_deprecated/\|gtest\|inference-engine/ie_bridges\|pugixml\|inference-engine/tools/vpu_perfcheck\|thirdparty/gflags\|thirdparty/ade\|thirdparty/fluid\|thirdparty/mkl-dnn\|thirdparty/movidius\|thirdparty/ocv\|thirdparty/plugixml\|thirdparty/std_lib\|thirdparty/clDNN/common\|thirdparty/clDNN/tutorial\|thirdparty/clDNN/utils' |
+        grep 'include\|src\|inference-engine/samples\|thirdparty/clDNN/kernel_selector\|thirdparty/clDNN/api\|thirdparty/clDNN/api_extension\|inference-engine/tests_' ) 2>&1 |
+        sed 's/"/\&quot;/g' >&1| sed 's/</\&lt;/g' >&1| sed 's/>/\&gt;/g' >&1| sed "s/'/\&apos;/g" >&1|
+        sed 's/\&/\&amp;/g' >&1| python ${CURRENT_DIR}/cpplint_to_cppcheckxml.py &> ${CPPLINT_REPORT_DIR}/cpplint-cppcheck-result.xml
+
+       # Generate html from it
+       ${CURRENT_DIR}/cppcheck-htmlreport.py --file=${CPPLINT_REPORT_DIR}/cpplint-cppcheck-result.xml --report-dir=${CPPLINT_REPORT_DIR} --source-dir=${SOURCE_DIR} --title=${PROJECT_NAME}
+
+       # Change Cppcheck things to cpplint
+       sed -i.bak 's/Cppcheck/cpplint/g' ${CPPLINT_REPORT_DIR}/index.html
+       sed -i.bak 's/a\ tool\ for\ static\ C\/C++\ code\ analysis/an\ open\ source\ lint\-like\ tool\ from\ Google/g' ${CPPLINT_REPORT_DIR}/index.html
+       sed -i.bak 's/http:\/\/cppcheck.sourceforge.net/http:\/\/google\-styleguide.googlecode.com\/svn\/trunk\/cpplint\/cpplint.py/g' ${CPPLINT_REPORT_DIR}/index.html
+       sed -i.bak 's/IRC: <a href=\"irc:\/\/irc.freenode.net\/cppcheck\">irc:\/\/irc.freenode.net\/cppcheck<\/a>/\ /g' ${CPPLINT_REPORT_DIR}/index.html
+
+    echo "-> CppLint finished..."
+}
+
+function run_cpp_check() {
+    echo "-> Cppcheck started..."
+    CPPCHECK_REPORT_DIR="${REPORT_DIR}/cppcheck"
+    if [ -d ${CPPCHECK_REPORT_DIR} ]; then
+        rm -Rf ${CPPCHECK_REPORT_DIR}
+    fi
+
+    mkdir -p ${CPPCHECK_REPORT_DIR}
+
+       # Generate cppcheck xml
+       cppcheck -v --enable=all --suppress=missingIncludeSystem --std=c++11 ${SOURCE_DIR} -i${SOURCE_DIR}/thirdparty -i${SOURCE_DIR}/tests/libs -i${SOURCE_DIR}/temp -i${SOURCE_DIR}/build \
+         -i${SOURCE_DIR}/bin -i${SOURCE_DIR}/report -I${SOURCE_DIR}/include -I${SOURCE_DIR}/src -I${SOURCE_DIR}/thirdparty/pugixml/src -I${SOURCE_DIR}/thirdparty/gflags/src -I${SOURCE_DIR}/samples/scoring_agent/HTTPClient -I${SOURCE_DIR}/src/inference_engine --xml-version=2 2> ${CPPCHECK_REPORT_DIR}/cppcheck-only-result.xml
+
+       # Generate html from it
+       python ${CURRENT_DIR}/cppcheck-htmlreport.py\
+               --file=${CPPCHECK_REPORT_DIR}/cppcheck-only-result.xml\
+               --report-dir=${CPPCHECK_REPORT_DIR}\
+               --source-dir=${SOURCE_DIR}\
+               --title=${PROJECT_NAME}
+    echo "-> Cppcheck finished..."
+}
+
+if [ ! -d ${REPORT_DIR} ]; then
+    mkdir -p ${REPORT_DIR}
+fi
+
+run_cpplint
+
+out_cpp_lint=`cat ${CPPLINT_REPORT_DIR}/cpplint-cppcheck-result.xml`
+if [[ ${out_cpp_lint} == *"error"* ]]; then
+    exit 1
+fi
+#run_cpp_check
diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt

index c620939..5774f02 100644 (file)
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@@ -4,6 +4,8 @@
  
  add_subdirectory(preprocessing)
  
+add_subdirectory(ir_readers)
+
  add_subdirectory(legacy_api)
  
  if(ENABLE_MKL_DNN)
diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp

index c63f204..25bd481 100644 (file)
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@@ -269,5 +269,6 @@ void Config::adjustKeyMapValues() {
  
      key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams);
      key_config_map[PluginConfigParams::KEY_DEVICE_ID] = device_id;
+    key_config_map[PluginConfigParams::KEY_CONFIG_FILE] = "";
  }
  }  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp

index 6d6fd9e..c18dc0f 100644 (file)
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@@ -2704,7 +2704,7 @@ void Program::CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine:
          } else {
              size = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS]));
              stride = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS]));
-            input_offset = { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) };
+            input_offset = { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]), 0 };
          }
  
          auto dt = DataTypeFromPrecision(poolLayer->outData[0]->getPrecision());
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp

index 1ab4e64..c3e5244 100644 (file)
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -8,6 +8,7 @@
  #include <set>
  #include <string>
  #include <algorithm>
+#include <map>
  
  #if defined __INTEL_COMPILER || defined _MSC_VER
  #include <malloc.h>
@@ -28,6 +29,7 @@
  #include "gna2_model_debug_log.hpp"
  #else
  #include <gna-api-types-xnn.h>
+#include <map>
  
  #endif
  
@@ -373,6 +375,13 @@ float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t
      return comp.output_scale_factor;
  }
  
+struct InputEndPoint {
+    int idx = 0;
+    size_t size = 0;
+    size_t num_bytes_per_output = 1;
+    InputEndPoint() = default;
+    InputEndPoint(int nidx, size_t sz, size_t esize) : idx(nidx), size(sz), num_bytes_per_output(esize) {}
+};
  
  void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
      auto & components = component;
@@ -414,11 +423,21 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
          return ptra >= ptrb  && ptra < reinterpret_cast<char*>(ptrb) + bsize;
      };
  
+    auto startPtr = [](void* ptr, size_t size) {
+        return reinterpret_cast<int8_t*>(ptr);
+    };
+    auto endPtr = [](void* ptr, size_t size) {
+        return reinterpret_cast<int8_t*>(ptr) + size;
+    };
+    auto sizeofTensor = [](void* ptr, size_t size) {
+        return size;
+    };
+
      std::fstream graph(filename, std::ios::out);
      graph << "strict digraph {";
      std::set<void*> weights;
      std::set<void*> biases;
-    std::set<void*> outputs;
+    std::map<void*, InputEndPoint> outputs;
      std::set<std::string> layersNames;
  
      auto generate_layer_name = [&](int k) {
@@ -565,11 +584,25 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
              }
          }
          if (!inputConnected) {
-            // drawing tmp connection
-            outputs.insert(components[k].ptr_inputs);
-            auto tidx = std::distance(outputs.begin(), outputs.find(components[k].ptr_inputs));
-            graph << tidx << " -> " << l
-                  << " [label=\"FROM_TMP\", fontcolor=darkgreen,color=orange, style=dashed];";
+            // searching for TMP connection
+            size_t tidx = -1;
+            for (auto && en : outputs) {
+                if (intersected(en.first, en.second.size, INPUTS(k))) {
+                    tidx = en.second.idx;
+                    auto  updated_ptr  = std::min(startPtr(en.first, en.second.size), startPtr(INPUTS(k)));
+                    auto  updated_size = std::max(endPtr(en.first, en.second.size), endPtr(INPUTS(k))) - updated_ptr;
+                    outputs.erase(en.first);
+                    outputs[updated_ptr] = InputEndPoint(tidx, updated_size, components[k].num_bytes_per_input);
+                    break;
+                }
+            }
+
+            if (tidx == -1) {
+                outputs[components[k].ptr_inputs] = InputEndPoint(outputs.size(), sizeofTensor(INPUTS(k)), components[k].num_bytes_per_input);
+            }
+            tidx = outputs[components[k].ptr_inputs].idx;
+            graph << "parameter_" << tidx << " -> " << l
+                  << " [fontcolor=darkgreen,color=orange, style=dashed];";
          }
      }
  
@@ -578,13 +611,25 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
  
          int tidx = 0;
          for (auto tmpOutPtrs : outputs) {
-            if (components[k].ptr_outputs == tmpOutPtrs) {
+            if (components[k].ptr_outputs == tmpOutPtrs.first) {
                  graph << l << " -> " << tidx << " [label=\"TO_TMP\", fontcolor=darkgreen,color=orange, style=dashed];";
              }
              tidx++;
          }
      }
  
+    // writing inputs info
+    for (auto && en : outputs) {
+        std::string l = "parameter_" + std::to_string(en.second.idx);
+        graph <<  l << " [shape=box, style=filled, fillcolor=\"#85C1E9\"";
+        graph << ", label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n"
+                 "  <TR><TD  colspan=\"2\">" <<  l << "</TD></TR>\n";
+        graph << "  <TR><TD> dims</TD><TD>" << 1 << "x" << en.second.size / en.second.num_bytes_per_output << "</TD></TR>\n";
+        graph << "  <TR><TD> obit</TD><TD>" << en.second.num_bytes_per_output << "</TD></TR>\n";
+        graph << "  <TR><TD> ptr</TD><TD>" <<  en.first << "</TD></TR>\n";
+        graph << "</TABLE>>];\n";
+    }
+
      graph << "}";
  }
  
diff --git a/inference-engine/src/gna_plugin/gna_executable_network.hpp b/inference-engine/src/gna_plugin/gna_executable_network.hpp

index d58a96a..90f01ff 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_executable_network.hpp
+++ b/inference-engine/src/gna_plugin/gna_executable_network.hpp
@@ -21,20 +21,28 @@ class GNAExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafe
      std::shared_ptr<GNAPlugin> plg;
  
   public:
-    GNAExecutableNetwork(const std::string &aotFileName, const std::map<std::string, std::string> &config) :
-        plg(std::make_shared<GNAPlugin>(config)) {
+    GNAExecutableNetwork(const std::string &aotFileName, std::shared_ptr<GNAPlugin> plg)
+        : plg(plg) {
          plg->ImportNetwork(aotFileName);
          _networkInputs  = plg->GetInputs();
          _networkOutputs = plg->GetOutputs();
      }
  
-    GNAExecutableNetwork(InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config)
-        : plg(std::make_shared<GNAPlugin>(config)) {
+    GNAExecutableNetwork(InferenceEngine::ICNNNetwork &network, std::shared_ptr<GNAPlugin> plg)
+        : plg(plg) {
          InferenceEngine::NetPass::ConvertPrecision(network, InferenceEngine::Precision::I64, InferenceEngine::Precision::I32);
          InferenceEngine::NetPass::ConvertPrecision(network, InferenceEngine::Precision::U64, InferenceEngine::Precision::I32);
          plg->LoadNetwork(network);
      }
  
+    GNAExecutableNetwork(const std::string &aotFileName, const std::map<std::string, std::string> &config)
+        : GNAExecutableNetwork(aotFileName, std::make_shared<GNAPlugin>(config)) {
+    }
+
+    GNAExecutableNetwork(InferenceEngine::ICNNNetwork &network, const std::map<std::string, std::string> &config)
+        : GNAExecutableNetwork(network, std::make_shared<GNAPlugin>(config)) {
+    }
+
      InferenceEngine::AsyncInferRequestInternal::Ptr
          CreateAsyncInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                      InferenceEngine::OutputsDataMap networkOutputs) override {
@@ -58,5 +66,18 @@ class GNAExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafe
      void ExportImpl(std::ostream&) override {
          THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
      }
+
+    void GetConfig(const std::string &name,
+                   InferenceEngine::Parameter &result,
+                   InferenceEngine::ResponseDesc* /*resp*/) const override {
+        result = plg->GetConfig(name, {});
+    }
+
+    void GetMetric(const std::string& name,
+                   InferenceEngine::Parameter& result,
+                   InferenceEngine::ResponseDesc* /* resp */) const override {
+        result = plg->GetMetric(name, {});
+    }
  };
+
  }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp

index a0a105e..a5b352c 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -34,6 +34,7 @@
  #include "layers/gna_concat_layer.hpp"
  #include "layers/gna_crop_layer.hpp"
  #include "round_float_define.hpp"
+#include "gna_plugin_policy.hpp"
  
  using namespace InferenceEngine;
  using namespace std;
@@ -58,6 +59,10 @@ void GNAGraphCompiler::setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gna
      this->gnaFlags = std::move(gnaFlagsPtr);
  }
  
+void GNAGraphCompiler::setPolicy(GNAPluginNS::Policy policyToSet) {
+    this->policy = policyToSet;
+}
+
  intel_dnn_component_t * GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
      if (current->insData.empty())
          return nullptr;
@@ -987,13 +992,57 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
      auto outputs = *layer->outData.begin();
      auto inputs = layer->insData.begin()->lock();
  
-    // auto offset = filterLayer->GetParamAsInt("output_offset");
-
      uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
      uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
      uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
      uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
  
+    auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
+    // number of rows we handled by inserting copy layer
+    uint32_t num_rows_copied = 0;
+    // in case of left alignment succeed, but due to number of elements not multiple of 8 we need to insert align_filter
+    // we are improving it by inserting copy layer of size that covers most of elements - remained max of 32x31 affine filter
+    if (policy.ConcatAlignmentPolicy == Policy::ConcatAlignment::FAST &&  0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
+        // can we use copy at all
+        num_rows_copied = ALIGN(num_rows_in, 32) - 32;
+
+        auto orientation = kDnnInterleavedOrientation;
+
+        auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", "copy");
+
+        dnn->InitCopyComponent(copyComponent,
+                               orientation,
+                               num_rows_copied,
+                               num_columns_in,
+                               num_rows_copied,
+                               num_columns_in,
+                               inputs->getPrecision().size(),
+                               inputs->getPrecision().size(),
+                               quantized == nullptr ? 1 : quantized->_dst_quant.scale,
+                               num_rows_copied,
+                               num_columns_in,
+                               ptr_inputs,
+                               ptr_outputs);
+
+
+        size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
+            * inputs->getPrecision().size();
+        // need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
+        size_t num_data_bytes_out = num_rows_out * num_columns_in * inputs->getPrecision().size();
+
+        connectInput(layer, ptr_inputs, num_data_bytes_in);
+        auto isNonFunctional = [](CNNLayerPtr l) {
+            return LayerInfo(l).isNonFunctional();
+        };
+        auto identity = CNNNetGetNextLayerSkipCertain(layer, 0, 0, isNonFunctional);
+        connectOutput(identity.first, ptr_outputs, num_data_bytes_out);
+
+        num_rows_in  -= num_rows_copied;
+        num_rows_out -= num_rows_copied;
+    }
+    filterLayer->params["rows_copied_offset"] = std::to_string(num_rows_copied * inputs->getPrecision().size());
+
+
      auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
      auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
  
@@ -1013,35 +1062,36 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
          ptr_biases,
          false);
  
-    size_t num_data_bytes_out =
-        InferenceEngine::details::product(
-            begin(outputs->getDims()), end(outputs->getDims())) * 4;
-
+    size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
      size_t num_data_bytes_in = num_columns_in *
          ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
  
-    connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
+    connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
      connectOutput(layer, ptr_outputs, num_data_bytes_out);
  
-    if (num_padding == 0) {
-        gnamem->readonly().push_ptr(ptr_weights,
-            filterLayer->_weights->cbuffer().as<const void*>(),
-            filterLayer->_weights->byteSize(),
-            64);
-    } else {
+    {
+        auto weightsElementSize = filterLayer->_weights->getTensorDesc().getPrecision().size();
          auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
          auto paddedWeights = elementsIn * num_rows_out;
-        auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
+        auto paddedWeightsSize = paddedWeights * weightsElementSize;
+
+        // TODO: this can be improved to not generate unneeded weights at all
+
+        size_t weights_stride =  (num_rows_in + num_rows_copied) * weightsElementSize;
+        size_t weights_offset = weights_stride * num_rows_copied +  num_rows_copied * weightsElementSize;
  
          gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
-            size_t offset = 0;
-            for (int i = 0; i < num_rows_out && size >= offset; i++) {
-                ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
-                    filterLayer->_weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * filterLayer->precision.size(),
-                    num_rows_in* filterLayer->precision.size());
-                offset += (num_rows_in + num_padding) * filterLayer->precision.size();
+            size_t roffset = weights_offset;
+            size_t woffset = 0;
+            for (int i = 0; i < num_rows_out && size >= woffset; i++) {
+                ie_memcpy(reinterpret_cast<uint8_t*>(data) + woffset,
+                          size - woffset,
+                          filterLayer->_weights->cbuffer().as<const uint8_t*>() + roffset,
+                          num_rows_in * weightsElementSize);
+                roffset += weights_stride;
+                woffset += elementsIn * weightsElementSize;
              }
-            }, 64);
+         }, 64);
      }
  
      if (filterLayer->_biases) {
@@ -1189,11 +1239,18 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
          num_rows = FROM_IR_DIM(inputs, 1);
      }
  
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
-        * outputs->getPrecision().size();
+    // TODO: solve this by layer level transformations
+    auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
+    if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
+        auto rowsCopiedOffset = concatAlignFilter->GetParamAsInt("rows_copied_offset");
+        if (rowsCopiedOffset != 0) {
+            num_rows -= rowsCopiedOffset / outputs->getPrecision().size();
+            layer->params["output_offset"] = std::to_string(rowsCopiedOffset);
+        }
+    }
  
-    size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs->getDims()), end(inputs->getDims()))
-        * inputs->getPrecision().size();
+    size_t num_data_bytes_out = num_columns * num_rows * outputs->getPrecision().size();
+    size_t num_data_bytes_in = num_columns * num_rows * inputs->getPrecision().size();
  
      static InferenceEngine::details::caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
          {"sigmoid", kActSigmoid},
@@ -1626,7 +1683,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
  
              if (it != splitLayerInfoItem.splitOutputLayers.end()) {
                  gnalog()  << "Connecting " << splitName << " input \n";
-                auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset, 0);
+                auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
                  gnalog()  << "Connected \n";
                  return res;
              }
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp

index 87f80ce..ccaf60a 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -26,6 +26,7 @@
  #include "backend/am_intel_dnn.hpp"
  #include "gna_device.hpp"
  #include "gna_data_types.hpp"
+#include "gna_plugin_policy.hpp"
  
  namespace GNAPluginNS {
  class GNAGraphCompiler {
@@ -34,6 +35,7 @@ private:
      std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
      std::shared_ptr<GNAPluginNS::InputDesc> inputDesc;
      std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
+    Policy policy;
  
      // layers with extra storage for connections and additional
      // non trivial processing
@@ -53,6 +55,7 @@ public:
      void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
      void setInputDescPtr(std::shared_ptr<GNAPluginNS::InputDesc> inputDescPtr);
      void setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlagsPtr);
+    void setPolicy(GNAPluginNS::Policy policy);
  
      void fillMemoryConnections(std::unordered_map<std::string,
              std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp

index 1bf2dd3..1099911 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -237,7 +237,6 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
      auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
          ModelHeader::EndPoint out;
          out.elements_count = ep.elements_count;
-        out.element_size = ep.element_size;
          out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
          out.scaleFactor = ep.scaleFactor;
          return out;
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp

index 8010c65..23f0cdc 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -22,6 +22,7 @@
  #include <graph_tools.hpp>
  #include <debug.h>
  #include <gna/gna_config.hpp>
+#include "gna_plugin_config.hpp"
  #include <ie_util_internal.hpp>
  #include "gna_plugin.hpp"
  #include "optimizer/gna_pass_manager.hpp"
@@ -302,6 +303,7 @@ void GNAPlugin::ImportFrames(
  
  GNAPlugin::GNAPlugin() {
      Init();
+    UpdateFieldsFromConfig();
  }
  
  GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
@@ -321,13 +323,13 @@ void GNAPlugin::Init() {
  
  void GNAPlugin::InitGNADevice() {
  #if GNA_LIB_VER == 1
-    gnadevice = std::make_shared<GNADeviceHelper>(gna_proc_type,
+    gnadevice = std::make_shared<GNADeviceHelper>(config.gna_proc_type,
                                          gnaFlags->gna_lib_async_threads_num,
                                          gnaFlags->gna_openmp_multithreading,
                                          gnaFlags->performance_counting);
  #else
-    gnadevice = std::make_shared<GNADeviceHelper>(pluginGna2AccMode,
-                pluginGna2DeviceConsistent,
+    gnadevice = std::make_shared<GNADeviceHelper>(config.pluginGna2AccMode,
+                                                  config.pluginGna2DeviceConsistent,
                  gnaFlags->gna_lib_async_threads_num,
                  gnaFlags->gna_openmp_multithreading,
                  gnaFlags->performance_counting);
@@ -387,7 +389,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
          run_passes(newNet, true);
          run_passes(newNet, false);
      } else {
-        switch (gnaPrecision) {
+        switch (config.gnaPrecision) {
              case Precision::I16:
                  ModelQuantizer<QuantI16> q16;
                  newNet = q16.quantize(network, run_passes, inputsDesc->inputScaleFactors);
@@ -421,6 +423,9 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
  
      auto sortedNet = CNNNetSortTopologicallyEx(*newNet, make_fuzed_order);
  
+    // passing policy to compiler
+    graphCompiler.setPolicy(policy);
+
      if (sortedNet.empty()) {
          THROW_GNA_EXCEPTION << "Sorted network is empty";
      }
@@ -534,10 +539,33 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
  
              gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
  
+            // probing gna_primitives
              if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
                  initOutput(portId, irLayerAvatar->second, layer);
                  stopSearching = true;
              }
+
+            // probing concatInfo
+            if (!stopSearching && LayerInfo(layer).isConcat()) {
+                auto concatConnection  = graphCompiler.concat_connection.find(layer->name);
+                if (concatConnection != graphCompiler.concat_connection.end()) {
+                    //initOutput(portId, irLayerAvatar->second, layer);
+
+                    auto &desc = outputsDesc[portId];
+                    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+
+                    desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
+                    // TODO: what is orientation for concat
+                    desc.orientation = kDnnInterleavedOrientation;
+                    desc.num_bytes_per_element = layer->outData.front()->getPrecision().size();
+                    desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
+                    desc.num_elements = concatConnection->second.reserved_size / desc.num_bytes_per_element;
+
+                    // binding ptr for first infer request - then others will be setup during relocation
+                    gnamem->bind_ptr(&desc.ptrs.front(), &concatConnection->second.gna_ptr);
+                    stopSearching = true;
+                }
+            }
          }, true, [&stopSearching](InferenceEngine::CNNLayer* from) {
              return make_upstream_order(!stopSearching ? from : nullptr);
          });
@@ -722,20 +750,20 @@ void GNAPlugin::createRequestConfigsForGnaModels() {
  void GNAPlugin::DumpXNNToFile() const {
      // TODO: output  precision as well as pointer might be incorrect, LSTM for sure
      // gna looks automatically set layer 0 as output and adjust it's pointer / precision/ size respectively
-    if (dumpXNNPath.empty()) {
+    if (config.dumpXNNPath.empty()) {
          return;
      }
  
-    if (dumpXNNGeneration != "GNA1" &&
-        dumpXNNGeneration != "GNA3" &&
-        !dumpXNNGeneration.empty()) {
-        THROW_GNA_EXCEPTION << "Wrong GNA generation for embedded model dump: " << dumpXNNGeneration;
+    if (config.dumpXNNGeneration != "GNA1" &&
+        config.dumpXNNGeneration != "GNA3" &&
+        !config.dumpXNNGeneration.empty()) {
+        THROW_GNA_EXCEPTION << "Wrong GNA generation for embedded model dump: " << config.dumpXNNGeneration;
      }
  
      if (!gnadevice) {
          THROW_GNA_EXCEPTION << "Cannot generate XNNDump for float network";
      }
-    std::ofstream dumpStream(dumpXNNPath, std::ios::out | std::ios::binary);
+    std::ofstream dumpStream(config.dumpXNNPath, std::ios::out | std::ios::binary);
  #if GNA_LIB_VER == 1
      auto dump = gnadevice->dumpXnn(&std::get<0>(nnets.front())->obj, ptr_active_indices, num_active_indices);
      dump.header.rw_region_size = gnamem->getRWBytes();
@@ -745,7 +773,7 @@ void GNAPlugin::DumpXNNToFile() const {
      dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.model_size);
  #else
      auto const modelId = gnadevice->createModel(std::get<0>(gnaModels.front())->obj);
-    if (dumpXNNGeneration != "GNA3") {
+    if (config.dumpXNNGeneration != "GNA3") {
          auto dump = gnadevice->dumpXnn(modelId);
          dump.header.RwRegionSize = gnamem->getRWBytes();
          dump.header.InputScalingFactor = inputsDesc->inputScaleFactors.front();
@@ -1204,228 +1232,14 @@ void GNAPlugin::GetPerformanceCounts(std::map<std::string, InferenceEngine::Infe
  
  void GNAPlugin::AddExtension(InferenceEngine::IExtensionPtr extension) {}
  
-void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
-    Init();
-    auto supportedConfigOptions = supportedConfigKeys();
-
-    for (auto& item : config) {
-        auto keys = std::find_if(supportedConfigOptions.begin(), supportedConfigOptions.end(), [&item](const std::string& supportedConfigOption) {
-            return item.first == supportedConfigOption ||
-                   item.first.find(GNA_CONFIG_KEY(SCALE_FACTOR)) == 0;
-        });
-        if (keys == supportedConfigOptions.end()) {
-            THROW_GNA_EXCEPTION << as_status << NOT_FOUND << "Incorrect GNA Plugin config. Key " << item.first << " not supported";
-        }
-    }
-
-    // holds actual value of a found key
-    std::string key;
-    std::string value;
-    auto if_set = [&](const std::string& keyInput, const std::function<void()> & handler) {
-        auto keyInMap = config.find(keyInput);
-        if (keyInMap != config.end()) {
-            value = keyInMap->second;
-            handler();
-        }
-    };
-
-    auto if_start = [&](const std::string& keyInput, const std::function<void()> & handler) {
-        for (auto && c : config) {
-            if (c.first.find(keyInput) == 0) {
-                if (c.first.size() > keyInput.size() + 1) {
-                    key = c.first.substr(keyInput.size() + 1);
-                    value = c.second;
-                    handler();
-                }
-            }
-        }
-    };
-
-    auto fp32eq = [](float p1, float p2) -> bool {
-        return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
-    };
-
-    auto & log = gnalog();
-
-    if_start(GNA_CONFIG_KEY(SCALE_FACTOR), [&, this] {
-        uint64_t scaleForInput = std::stoul(key, NULL, 10);
-        if (scaleForInput > 10) {
-            THROW_GNA_EXCEPTION << "input scale factor with index(" << key << ") unsupported";
-        }
-        auto scaleFactor = std::stod(value);
-        if (fp32eq(scaleFactor, 0.0f)) {
-            THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
-        }
-        // not appeared scale factors are to be 1.0f
-        if (inputsDesc->inputScaleFactors.size() <= scaleForInput) {
-            inputsDesc->inputScaleFactors.resize(scaleForInput + 1, 1.f);
-        }
-        inputsDesc->inputScaleFactors[scaleForInput] = InferenceEngine::CNNLayer::ie_parse_float(value);
-    });
-
-    if (inputsDesc->inputScaleFactors.empty()) {
-        if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
-            auto scaleFactor = InferenceEngine::CNNLayer::ie_parse_float(value);
-            if (fp32eq(scaleFactor, 0.0f)) {
-                THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
-            }
-            inputsDesc->inputScaleFactors.push_back(scaleFactor);
-        });
-    }
-
-    if (inputsDesc->inputScaleFactors.empty()) {
-        inputsDesc->inputScaleFactors.push_back(1.f);
-    }
-
-    if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
-        dumpXNNPath = value;
-    });
-
-    if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), [&] {
-        dumpXNNGeneration = value;
-    });
-
-    if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
-#if GNA_LIB_VER == 1
-        static caseless_unordered_map <std::string, uint32_t> supported_values = {
-                {GNAConfigParams::GNA_AUTO, GNA_AUTO},
-                {GNAConfigParams::GNA_HW, GNA_HARDWARE},
-                {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
-                {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
-        };
-        static std::vector <std::string> supported_values_on_gna2 = {
-            GNAConfigParams::GNA_GEN,
-            GNAConfigParams::GNA_GEN_EXACT,
-            GNAConfigParams::GNA_SSE,
-            GNAConfigParams::GNA_SSE_EXACT,
-            GNAConfigParams::GNA_AVX1,
-            GNAConfigParams::GNA_AVX1_EXACT,
-            GNAConfigParams::GNA_AVX2,
-            GNAConfigParams::GNA_AVX2_EXACT
-        };
-#else
-        static caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, Gna2DeviceVersion> > supported_values = {
-            {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, Gna2DeviceVersion1_0}},
-            {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, Gna2DeviceVersion1_0}},
-            {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, Gna2DeviceVersion1_0}},
-            {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, Gna2DeviceVersion1_0}},
-            {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, Gna2DeviceVersionSoftwareEmulation}},
-            {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, Gna2DeviceVersion1_0}},
-        };
-#endif
-        auto procType = supported_values.find(value);
-        if (procType == supported_values.end()) {
-            if (value == GNA_CONFIG_VALUE(SW_FP32)) {
-                gnaFlags->sw_fp32 = true;
-            } else {
-#if GNA_LIB_VER == 1
-                auto is_gna2_mode = std::find(
-                        supported_values_on_gna2.begin(),
-                        supported_values_on_gna2.end(),
-                        value);
-                if (is_gna2_mode != supported_values_on_gna2.end()) {
-                    THROW_GNA_EXCEPTION << "This GNA device mode require GNA2 library: " << value;
-                }
-#endif
-                THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
-            }
-        } else {
-#if GNA_LIB_VER == 1
-            gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
-#else
-            pluginGna2AccMode = procType->second.first;
-            pluginGna2DeviceConsistent = procType->second.second;
-#endif
-        }
-    });
-
-    if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
-        if (value == PluginConfigParams::YES) {
-            gnaFlags->compact_mode = true;
-        } else if (value == PluginConfigParams::NO) {
-            gnaFlags->compact_mode = false;
-        } else {
-            log << "GNA compact mode should be YES/NO, but not" << value;
-            THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
-        }
-    });
-
-    if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
-        if (value == PluginConfigParams::YES) {
-            gnaFlags->exclusive_async_requests  = true;
-        } else if (value == PluginConfigParams::NO) {
-            gnaFlags->exclusive_async_requests  = false;
-        } else {
-            log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-        }
-    });
-
-    if_set(GNA_CONFIG_KEY(PRECISION), [&] {
-        auto precision = Precision::FromStr(value);
-        if (precision != Precision::I8 && precision != Precision::I16) {
-            log << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
-            THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
-        }
-        gnaPrecision = precision;
-    });
-
-    if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
-        if (value == PluginConfigParams::YES) {
-            gnaFlags->uniformPwlDesign = true;
-        } else if (value == PluginConfigParams::NO) {
-            gnaFlags->uniformPwlDesign = false;
-        } else {
-            log << "GNA pwl uniform algorithm parameter "
-                << "should be equal to YES/NO, but not" << value;
-            THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
-                                << "should be equal to YES/NO, but not" << value;
-        }
-    });
-
-    if_set(CONFIG_KEY(PERF_COUNT), [&] {
-        if (value == PluginConfigParams::YES) {
-            gnaFlags->performance_counting = true;
-        } else if (value == PluginConfigParams::NO) {
-            gnaFlags->performance_counting = false;
-        } else {
-            log << "GNA performance counter enabling parameter "
-                << "should be equal to YES/NO, but not" << value;
-            THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
-                                << "should be equal to YES/NO, but not" << value;
-        }
-    });
-
-    if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
-        uint64_t lib_threads = std::stoul(value, NULL, 10);
-        if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
-            log << "Unsupported accelerator lib number of threads: " << value << ", should be greateer than 0 and less than 127";
-            THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
-                                << ", should be greateer than 0 and less than 127";
-        }
-        gnaFlags->gna_lib_async_threads_num = lib_threads;
-    });
-
-    if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
-        if (value == PluginConfigParams::YES) {
-            gnaFlags->gna_openmp_multithreading  = false;
-        } else if (value == PluginConfigParams::NO) {
-            gnaFlags->gna_openmp_multithreading  = true;
-        } else {
-            log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-        }
-    });
+void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config_map) {
+    config.UpdateFromMap(config_map);
+    UpdateFieldsFromConfig();
+}
  
-    if (gnaFlags->sw_fp32 && gnaFlags->gna_lib_async_threads_num > 1) {
-        THROW_GNA_EXCEPTION << "GNA plugin not support async mode on GNA_SW_FP32!";
-    }
+void GNAPlugin::UpdateFieldsFromConfig() {
+    inputsDesc->inputScaleFactors = config.inputScaleFactors;
+    *gnaFlags = config.gnaFlags;
  }
  
  void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp

index eb9f37b..10ccc7b 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -22,6 +22,7 @@
  #include "gna_graph_compiler.hpp"
  #include "gna_plugin_policy.hpp"
  #include "gna_plugin_log.hpp"
+#include "gna_plugin_config.hpp"
  
  #if GNA_LIB_VER == 2
  #include <gna2-model-api.h>
@@ -32,6 +33,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
   protected:
      std::string _pluginName = "GNA";
  
+    Config config;
      std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
      std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
      std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
@@ -63,20 +65,12 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      // index matches iterating order of cnnnetwork outputs info
      std::vector<GNAPluginNS::OutputDesc> outputsDesc = std::vector<OutputDesc>();
  
-    // precision of GNA hardware model
-    InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;
-
      intel_dnn_number_type_t output_type = kDnnInt;
  
      GNAPluginNS::Policy policy;
-    std::string dumpXNNPath;
-    std::string dumpXNNGeneration;
-#if GNA_LIB_VER == 1
-    intel_gna_proc_t gna_proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
-#else
-    Gna2AccelerationMode pluginGna2AccMode = Gna2AccelerationModeSoftware;
-Gna2DeviceVersion pluginGna2DeviceConsistent = Gna2DeviceVersion1_0;
-void createRequestConfigsForGnaModels();
+
+#if GNA_LIB_VER == 2
+    void createRequestConfigsForGnaModels();
  #endif
  
      std::shared_ptr<GNADeviceHelper> gnadevice;
@@ -104,15 +98,12 @@ void createRequestConfigsForGnaModels();
      void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap);
      void AddExtension(InferenceEngine::IExtensionPtr extension) override;
  
-    std::vector<std::string> supportedConfigKeys() const;
-    std::map<std::string, std::string> supportedConfigKeysWithDefaults() const;
-
      void SetConfig(const std::map<std::string, std::string> &config) override;
      void LoadNetwork(InferenceEngine::IExecutableNetwork::Ptr &executableNetwork,
                       const InferenceEngine::ICNNNetwork &network,
-                     const std::map<std::string, std::string> &config) override { THROW_GNA_EXCEPTION << "Not implemented"; }
+                     const std::map<std::string, std::string> &config_map) override { THROW_GNA_EXCEPTION << "Not implemented"; }
      InferenceEngine::ExecutableNetwork LoadNetwork(const InferenceEngine::ICNNNetwork &network,
-                                  const std::map<std::string, std::string> &config,
+                                  const std::map<std::string, std::string> &config_map,
                                    InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
      void Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
      void SetCore(InferenceEngine::ICore*) noexcept override {}
@@ -221,5 +212,8 @@ void createRequestConfigsForGnaModels();
                      const GNASplitLayer& splitInfo,
                      size_t precision_size,
                      int idx = 0);
+
+    void UpdateFieldsFromConfig();
  };
+
  }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp

new file mode 100644 (file)

index 0000000..5315920
--- /dev/null
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -0,0 +1,278 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gna/gna_config.hpp>
+#include "gna_plugin.hpp"
+#include "gna_plugin_config.hpp"
+#include "ie_common.h"
+#include <details/caseless.hpp>
+#include <unordered_map>
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+namespace GNAPluginNS {
+
+#if GNA_LIB_VER == 1
+static caseless_unordered_map<std::string, uint32_t> supported_values = {
+        {GNAConfigParams::GNA_AUTO,     GNA_AUTO},
+        {GNAConfigParams::GNA_HW,       GNA_HARDWARE},
+        {GNAConfigParams::GNA_SW,       GNA_SOFTWARE},
+        {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
+};
+static std::vector<std::string> supported_values_on_gna2 = {
+        GNAConfigParams::GNA_GEN,
+        GNAConfigParams::GNA_GEN_EXACT,
+        GNAConfigParams::GNA_SSE,
+        GNAConfigParams::GNA_SSE_EXACT,
+        GNAConfigParams::GNA_AVX1,
+        GNAConfigParams::GNA_AVX1_EXACT,
+        GNAConfigParams::GNA_AVX2,
+        GNAConfigParams::GNA_AVX2_EXACT
+};
+#else
+static caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, Gna2DeviceVersion>> supported_values = {
+                {GNAConfigParams::GNA_AUTO,       {Gna2AccelerationModeAuto,     Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_HW,         {Gna2AccelerationModeHardware, Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_SW,         {Gna2AccelerationModeSoftware, Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_SW_EXACT,   {Gna2AccelerationModeSoftware, Gna2DeviceVersion1_0}},
+                {GNAConfigParams::GNA_GEN,        {Gna2AccelerationModeGeneric,  Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_GEN_EXACT,  {Gna2AccelerationModeGeneric,  Gna2DeviceVersion1_0}},
+                {GNAConfigParams::GNA_SSE,        {Gna2AccelerationModeSse4x2,   Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_SSE_EXACT,  {Gna2AccelerationModeSse4x2,   Gna2DeviceVersion1_0}},
+                {GNAConfigParams::GNA_AVX1,       {Gna2AccelerationModeAvx1,     Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1,     Gna2DeviceVersion1_0}},
+                {GNAConfigParams::GNA_AVX2,       {Gna2AccelerationModeAvx2,     Gna2DeviceVersionSoftwareEmulation}},
+                {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2,     Gna2DeviceVersion1_0}},
+        };
+#endif
+
+void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
+    for (auto&& item : config) {
+        auto key = item.first;
+        auto value = item.second;
+
+        auto fp32eq = [](float p1, float p2) -> bool {
+            return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+        };
+
+        auto &log = gnalog();
+
+        if (key.find(GNA_CONFIG_KEY(SCALE_FACTOR)) == 0) {
+            uint64_t input_index;
+            if (key == GNA_CONFIG_KEY(SCALE_FACTOR)) {
+                input_index = 0;
+            } else {
+                key.erase(0, strlen(GNA_CONFIG_KEY(SCALE_FACTOR)));
+                if (key[0] != '_') {
+                    THROW_GNA_EXCEPTION << "Invalid format of scale factor configuration key";
+                }
+                key.erase(0, 1);
+                try {
+                    input_index = std::stoi(key);
+                    if (input_index < 0 | input_index > 9) {
+                        throw std::out_of_range("");
+                    }
+                } catch (std::invalid_argument&) {
+                    THROW_GNA_EXCEPTION << "Invalid value of index of input scale factor";
+                } catch (std::out_of_range&) {
+                    THROW_GNA_EXCEPTION << "Index of input scale factor must be in the range [0..9], " << value << " provided";
+                }
+            }
+            auto scale_factor = InferenceEngine::CNNLayer::ie_parse_float(value);
+            if (fp32eq(scale_factor, 0.0f)) {
+                THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
+            }
+            // missing scale factors are set to be 1.0f
+            if (inputScaleFactors.size() <= input_index) {
+                inputScaleFactors.resize(input_index + 1, 1.f);
+            }
+            inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
+        } else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) {
+            dumpXNNPath = value;
+        } else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION)) {
+            dumpXNNGeneration = value;
+        } else if (key == GNA_CONFIG_KEY(DEVICE_MODE)) {
+            auto procType = supported_values.find(value);
+            if (procType == supported_values.end()) {
+                if (value == GNA_CONFIG_VALUE(SW_FP32)) {
+                    gnaFlags.sw_fp32 = true;
+                } else {
+#if GNA_LIB_VER == 1
+                    auto is_gna2_mode = std::find(
+                            supported_values_on_gna2.begin(),
+                            supported_values_on_gna2.end(),
+                            value);
+                    if (is_gna2_mode != supported_values_on_gna2.end()) {
+                        THROW_GNA_EXCEPTION << "This GNA device mode requires GNA2 library: " << value;
+                    }
+#endif
+                    THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
+                }
+            } else {
+#if GNA_LIB_VER == 1
+                gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
+#else
+                pluginGna2AccMode = procType->second.first;
+                pluginGna2DeviceConsistent = procType->second.second;
+#endif
+            }
+        } else if (key == GNA_CONFIG_KEY(COMPACT_MODE)) {
+            if (value == PluginConfigParams::YES) {
+                gnaFlags.compact_mode = true;
+            } else if (value == PluginConfigParams::NO) {
+                gnaFlags.compact_mode = false;
+            } else {
+                log << "GNA compact mode should be YES/NO, but not " << value;
+                THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not " << value;
+            }
+        } else if (key == CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)) {
+            if (value == PluginConfigParams::YES) {
+                gnaFlags.exclusive_async_requests = true;
+            } else if (value == PluginConfigParams::NO) {
+                gnaFlags.exclusive_async_requests = false;
+            } else {
+                log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+                THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+            }
+        } else if (key == GNA_CONFIG_KEY(PRECISION)) {
+            auto precision = Precision::FromStr(value);
+            if (precision != Precision::I8 && precision != Precision::I16) {
+                log << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
+                THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: "
+                                    << value;
+            }
+            gnaPrecision = precision;
+        } else if (key == GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)) {
+            if (value == PluginConfigParams::YES) {
+                gnaFlags.uniformPwlDesign = true;
+            } else if (value == PluginConfigParams::NO) {
+                gnaFlags.uniformPwlDesign = false;
+            } else {
+                log << "GNA pwl uniform algorithm parameter "
+                    << "should be equal to YES/NO, but not" << value;
+                THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
+                                    << "should be equal to YES/NO, but not" << value;
+            }
+        } else if (key == CONFIG_KEY(PERF_COUNT)) {
+            if (value == PluginConfigParams::YES) {
+                gnaFlags.performance_counting = true;
+            } else if (value == PluginConfigParams::NO) {
+                gnaFlags.performance_counting = false;
+            } else {
+                log << "GNA performance counter enabling parameter "
+                    << "should be equal to YES/NO, but not" << value;
+                THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
+                                    << "should be equal to YES/NO, but not" << value;
+            }
+        } else if (key == GNA_CONFIG_KEY(LIB_N_THREADS)) {
+            uint64_t lib_threads;
+            try {
+                lib_threads = std::stoul(value);
+                if (lib_threads == 0 || lib_threads > (std::numeric_limits<uint8_t>::max()+1) / 2 - 1) {
+                    throw std::out_of_range("");
+                }
+            } catch (std::invalid_argument&) {
+                THROW_GNA_EXCEPTION << "Invalid value of number of threads";
+            } catch (std::out_of_range&) {
+                log << "Unsupported accelerator lib number of threads: " << value
+                    << ", should be greater than 0 and less than 127";
+                THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
+                                    << ", should be greater than 0 and less than 127";
+            }
+            gnaFlags.gna_lib_async_threads_num = lib_threads;
+        } else if (key == CONFIG_KEY(SINGLE_THREAD)) {
+            if (value == PluginConfigParams::YES) {
+                gnaFlags.gna_openmp_multithreading = false;
+            } else if (value == PluginConfigParams::NO) {
+                gnaFlags.gna_openmp_multithreading = true;
+            } else {
+                log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+                THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+            }
+        } else {
+            THROW_GNA_EXCEPTION << as_status << NOT_FOUND << "Incorrect GNA Plugin config. Key " << item.first
+                                << " not supported";
+        }
+
+        if (gnaFlags.sw_fp32 && gnaFlags.gna_lib_async_threads_num > 1) {
+            THROW_GNA_EXCEPTION << "GNA plugin does not support async mode on GNA_SW_FP32!";
+        }
+    }
+
+    if (inputScaleFactors.empty()) {
+        inputScaleFactors.push_back(1.0f);
+    }
+
+    AdjustKeyMapValues();
+}
+
+void Config::AdjustKeyMapValues() {
+    key_config_map.clear();
+
+    if (inputScaleFactors.empty()) {
+        inputScaleFactors.push_back(1.0);
+    }
+    key_config_map[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(inputScaleFactors[0]);
+    for (int n = 0; n < inputScaleFactors.size(); n++) {
+        key_config_map[GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(n)] =
+                std::to_string(inputScaleFactors[n]);
+    }
+    key_config_map[GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)] = dumpXNNPath;
+    key_config_map[GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION)] = dumpXNNGeneration;
+
+    std::string device_mode;
+    if (gnaFlags.sw_fp32) {
+        device_mode = GNA_CONFIG_VALUE(SW_FP32);
+    } else {
+        for (auto&& value : supported_values) {
+#if GNA_LIB_VER == 1
+            if (value.second == gna_proc_type) {
+                device_mode = value.first;
+                break;
+            }
+#else
+            if (value.second.first == pluginGna2AccMode &&
+                value.second.second == pluginGna2DeviceConsistent) {
+                device_mode = value.first;
+            break;
+        }
+#endif
+        }
+    }
+    IE_ASSERT(!device_mode.empty());
+    key_config_map[GNA_CONFIG_KEY(DEVICE_MODE)] = device_mode;
+    key_config_map[GNA_CONFIG_KEY(COMPACT_MODE)] =
+            gnaFlags.compact_mode ? PluginConfigParams::YES: PluginConfigParams::NO;
+    key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
+            gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
+    key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
+    key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
+            gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
+    key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
+            gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
+    key_config_map[CONFIG_KEY(PERF_COUNT)] =
+            gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
+    key_config_map[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
+    key_config_map[CONFIG_KEY(SINGLE_THREAD)] =
+            gnaFlags.gna_openmp_multithreading ? PluginConfigParams::NO: PluginConfigParams::YES;
+}
+
+std::string Config::GetParameter(const std::string& name) const {
+    auto result = key_config_map.find(name);
+    if (result == key_config_map.end()) {
+        THROW_GNA_EXCEPTION << "Unsupported config key: " << name;
+    }
+    return result->second;
+}
+
+std::vector<std::string> Config::GetSupportedKeys() const {
+    std::vector<std::string> result;
+    for (auto&& configOption : key_config_map) {
+        result.push_back(configOption.first);
+    }
+    return result;
+}
+
+}  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.hpp b/inference-engine/src/gna_plugin/gna_plugin_config.hpp

new file mode 100644 (file)

index 0000000..4bc24bd
--- /dev/null
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if GNA_LIB_VER == 1
+#include <gna-api.h>
+#else
+#include <gna2-inference-api.h>
+#include <gna2-common-api.h>
+#endif
+#include "ie_precision.hpp"
+#include "descriptions/gna_flags.hpp"
+#include <vector>
+#include <map>
+
+namespace GNAPluginNS {
+
+struct Config {
+    Config() {
+        AdjustKeyMapValues();
+    }
+    void UpdateFromMap(const std::map<std::string, std::string>& configMap);
+    void AdjustKeyMapValues();
+    std::string GetParameter(const std::string& name) const;
+    std::vector<std::string> GetSupportedKeys() const;
+
+    // precision of GNA hardware model
+    InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;
+
+    std::string dumpXNNPath;
+    std::string dumpXNNGeneration;
+
+#if GNA_LIB_VER == 1
+    intel_gna_proc_t gna_proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
+#else
+    Gna2AccelerationMode pluginGna2AccMode = Gna2AccelerationModeSoftware;
+    Gna2DeviceVersion pluginGna2DeviceConsistent = Gna2DeviceVersion1_0;
+#endif
+
+    std::vector<float> inputScaleFactors;
+    GNAFlags gnaFlags;
+
+    std::map<std::string, std::string> key_config_map;
+};
+
+}  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp

index 7c5a8c7..dc5a584 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
@@ -10,36 +10,58 @@
  #include <cpp_interfaces/impl/ie_plugin_internal.hpp>
  #include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
  #include "gna_executable_network.hpp"
+#include "gna_plugin_config.hpp"
  
  namespace GNAPluginNS {
  
  class GNAPluginInternal  : public InferenceEngine::InferencePluginInternal {
- public:
+private:
+    Config defaultConfig;
+    std::weak_ptr <GNAPlugin> plgPtr;
+    std::shared_ptr<GNAPlugin> GetCurrentPlugin() const {
+        auto ptr = plgPtr.lock();
+        if (ptr == nullptr) {
+            return std::make_shared<GNAPlugin>();
+        } else {
+            return ptr;
+        }
+    }
+
+public:
      InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core,
                                                  const InferenceEngine::ICNNNetwork &network,
                                                  const std::map<std::string, std::string> &config) override {
-        return std::make_shared<GNAExecutableNetwork>(*cloneNet(network), config);
+        Config updated_config(defaultConfig);
+        updated_config.UpdateFromMap(config);
+        auto plg = std::make_shared<GNAPlugin>(updated_config.key_config_map);
+        plgPtr = plg;
+        return std::make_shared<GNAExecutableNetwork>(*cloneNet(network), plg);
      }
+
      void SetConfig(const std::map<std::string, std::string> &config) override {
-        auto plg = std::make_shared<GNAPlugin>();
-        plg->SetConfig(config);
+        defaultConfig.UpdateFromMap(config);
      }
+
      InferenceEngine::IExecutableNetwork::Ptr  ImportNetwork(
                                                  const std::string &modelFileName,
                                                  const std::map<std::string, std::string> &config) override {
-        return make_executable_network(std::make_shared<GNAExecutableNetwork>(modelFileName, config));
+        Config updated_config(defaultConfig);
+        updated_config.UpdateFromMap(config);
+        auto plg = std::make_shared<GNAPlugin>(updated_config.key_config_map);
+        plgPtr = plg;
+        return make_executable_network(std::make_shared<GNAExecutableNetwork>(modelFileName, plg));
      }
+
      using InferenceEngine::InferencePluginInternal::ImportNetwork;
  
      std::string GetName() const noexcept override {
-        auto plg = std::make_shared<GNAPlugin>();
-        return plg->GetName();
+        return GetCurrentPlugin()->GetName();
      }
  
      void QueryNetwork(const InferenceEngine::ICNNNetwork& network,
                        const std::map<std::string, std::string>& config,
                        InferenceEngine::QueryNetworkResult& res) const override {
-        auto plg = std::make_shared<GNAPlugin>();
+        auto plg = GetCurrentPlugin();
          try {
              plg->SetConfig(config);
          } catch (InferenceEngine::details::InferenceEngineException) {}
@@ -48,13 +70,11 @@ class GNAPluginInternal  : public InferenceEngine::InferencePluginInternal {
  
      InferenceEngine::Parameter GetMetric(const std::string& name,
                                           const std::map<std::string, InferenceEngine::Parameter> & options) const override {
-        GNAPlugin statelessPlugin;
-        return statelessPlugin.GetMetric(name, options);
+        return GetCurrentPlugin()->GetMetric(name, options);
      }
  
      InferenceEngine::Parameter GetConfig(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const override {
-        GNAPlugin statelessPlugin;
-        return statelessPlugin.GetConfig(name, options);
+        return defaultConfig.GetParameter(name);
      }
  };
  
diff --git a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp

index 38dab44..8e39258 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
@@ -37,8 +37,9 @@ class Policy {
      enum class ConcatAlignment {
          DISABLED,
          DISABLED_FOR_FP32,
-        ENABLED
-    } ConcatAlignmentPolicy = ConcatAlignment::ENABLED;
+        ENABLED,
+        FAST
+    } ConcatAlignmentPolicy = ConcatAlignment::FAST;
  };
  
  inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
@@ -51,4 +52,16 @@ inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
      return os;
  }
  
+inline std::ostream& operator<<(std::ostream& os, Policy::ConcatAlignment policy) {
+    switch (policy) {
+        case Policy::ConcatAlignment::DISABLED   : os << "DISABLED";    break;
+        case Policy::ConcatAlignment::DISABLED_FOR_FP32   : os << "DISABLED_FOR_FP32";    break;
+        case Policy::ConcatAlignment::ENABLED   : os << "ENABLED";    break;
+        case Policy::ConcatAlignment::FAST   : os << "FAST";    break;
+        default    : os.setstate(std::ios_base::failbit);
+    }
+    return os;
+}
+
+
  }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp

index eaa116d..9d16642 100644 (file)
--- a/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_query_api.cpp
@@ -16,19 +16,14 @@ using namespace GNAPluginNS;
  using namespace InferenceEngine;
  using namespace InferenceEngine::PluginConfigParams;
  
-Parameter GNAPlugin::GetConfig(const std::string& name, const std::map<std::string, Parameter> & options) const {
-    auto configKeys = supportedConfigKeysWithDefaults();
-    auto result = configKeys.find(name);
-    if (result == configKeys.end()) {
-        THROW_GNA_EXCEPTION << "unsupported config key: " << name;
-    }
-    return result->second;
+Parameter GNAPlugin::GetConfig(const std::string& name, const std::map<std::string, Parameter> & /*options*/) const {
+    return config.GetParameter(name);
  }
  
  Parameter GNAPlugin::GetMetric(const std::string& name, const std::map<std::string, InferenceEngine::Parameter> & options) const {
      const std::unordered_map<std::string, std::function<Parameter()>> queryApiSupported = {
          {METRIC_KEY(AVAILABLE_DEVICES), [this]() {return GetAvailableDevices();}},
-        {METRIC_KEY(SUPPORTED_CONFIG_KEYS), [this]() {return supportedConfigKeys();}},
+        {METRIC_KEY(SUPPORTED_CONFIG_KEYS), [this]() {return config.GetSupportedKeys();}},
          {METRIC_KEY(FULL_DEVICE_NAME), [&options, this]() {
              auto availableDevices = GetAvailableDevices().as<std::vector<std::string>>();
  
@@ -100,29 +95,3 @@ Parameter GNAPlugin::GetAvailableDevices() const {
  
      return devices;
  }
-
-std::map<std::string, std::string> GNAPlugin::supportedConfigKeysWithDefaults() const {
-    std::map<std::string, std::string>  options = {
-        {GNA_CONFIG_KEY(SCALE_FACTOR), "1.0"},
-        {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), ""},
-        {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), ""},
-        {GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO},
-        {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)},
-        {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
-        {GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name()},
-        {GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(YES)},
-        {CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
-        {GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
-        {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
-    };
-    return options;
-}
-
-
-std::vector<std::string> GNAPlugin::supportedConfigKeys()const {
-    std::vector<std::string> result;
-    for (auto && configOption : supportedConfigKeysWithDefaults()) {
-        result.push_back(configOption.first);
-    }
-    return result;
-}
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp

index 1614ca6..0c4b9f1 100644 (file)
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -675,6 +675,10 @@ void InsertCopyLayerPass::run() {
  
  void InsertConcatAligningFilterPass::run() {
      auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
+
+    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
+        return;
+    }
      // aligning specific not required in fp32 mode
      if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
          return;
@@ -740,6 +744,10 @@ void InsertConcatAligningFilterPass::run() {
                  // encodes offset to beginning of split layer input
                  concatAligningFilter->params["output_offset"] =
                          std::to_string((aligned64_offset / bytesPerConcatElement) * (quantized ? bytesPerConcatElement : 4));
+
+                // for padded rows we cannot use copy layer - TBD how to implement
+                concatAligningFilter->params["num_rows_padded"] = std::to_string(num_rows_padded);
+
                  // encodes original output size
                  concatAligningFilter->params["original_num_rows"] = std::to_string(num_rows_in);
  
@@ -1084,7 +1092,7 @@ int PassManager::run(int index) {
          saveGraphToDot(*network.get(), out, [](const CNNLayerPtr layer,
                                                 ordered_properties &printed_properties,
                                                 ordered_properties &node_properties) {});
-        network->serialize(name + ".xml", "", nullptr);
+        network->serialize(name + ".xml", name + ".bin", nullptr);
      };
  #else
      auto dumpNetworkAfterPass = [] (std::shared_ptr<Pass> ) {};
diff --git a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp

index c5513f3..24c091e 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
@@ -48,7 +48,7 @@ HeteroAsyncInferRequest::HeteroAsyncInferRequest(const HeteroInferRequest::Ptr&
  
  void HeteroAsyncInferRequest::StartAsync_ThreadUnsafe() {
      _heteroInferRequest->updateInOutIfNeeded();
-    RunFirstStage();
+    RunFirstStage(_pipeline.begin(), _pipeline.end());
  }
  
  StatusCode HeteroAsyncInferRequest::Wait(int64_t millis_timeout) {
diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp

index 2e32626..f4e221d 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
@@ -65,6 +65,27 @@ HeteroInferRequest::HeteroInferRequest(InferenceEngine::InputsDataMap networkInp
      }
  }
  
+void HeteroInferRequest::SetBlob(const char* name, const InferenceEngine::Blob::Ptr& data) {
+    InferenceEngine::InferRequestInternal::SetBlob(name, data);
+    assert(!_inferRequests.empty());
+    for (auto &&desc : _inferRequests) {
+        auto &r = desc._request;
+        assert(nullptr != r);
+        InputInfo::Ptr foundInput;
+        DataPtr foundOutput;
+        try {
+            // if `name` is input blob
+            if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
+                r->SetBlob(name, data, foundInput->getPreProcess());
+            }
+        } catch (const InferenceEngine::details::InferenceEngineException & ex) {
+            std::string message = ex.what();
+            if (message.find(NOT_FOUND_str) == std::string::npos)
+                throw ex;
+        }
+    }
+}
+
  void HeteroInferRequest::InferImpl() {
      updateInOutIfNeeded();
      size_t i = 0;
diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.hpp b/inference-engine/src/hetero_plugin/hetero_infer_request.hpp

index 72099b4..18163f7 100644 (file)
--- a/inference-engine/src/hetero_plugin/hetero_infer_request.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_infer_request.hpp
@@ -39,6 +39,8 @@ public:
  
      void InferImpl() override;
  
+    void SetBlob(const char* name, const InferenceEngine::Blob::Ptr& data) override;
+
      void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const override;
  
      void updateInOutIfNeeded();
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt

index 2071c12..54c9a23 100644 (file)
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -17,6 +17,8 @@ set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp)
  list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES})
  
  set(IE_BASE_SOURCE_FILES
+      ${CMAKE_CURRENT_SOURCE_DIR}/cnn_network_ngraph_impl.cpp
+      ${CMAKE_CURRENT_SOURCE_DIR}/generic_ie.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/blob_factory.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/ie_data.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/ie_layouts.cpp
@@ -94,7 +96,6 @@ file(GLOB_RECURSE plugin_api_src "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.hpp"
                                   "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.h")
  
  add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_src})
-add_clang_format_target(${TARGET_NAME}_plugin_api_clang_format FOR_SOURCES ${plugin_api_src})
  
  # Create common base object library
  
@@ -103,6 +104,7 @@ add_library(${TARGET_NAME}_common_obj OBJECT
  
  target_compile_definitions(${TARGET_NAME}_common_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
  target_include_directories(${TARGET_NAME}_common_obj PRIVATE
+    $<TARGET_PROPERTY:${TARGET_NAME}_transformations,INTERFACE_INCLUDE_DIRECTORIES>
      $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
  
  target_include_directories(${TARGET_NAME}_common_obj SYSTEM PRIVATE
@@ -121,6 +123,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $<TARGET_PROPERTY:n
                                                               $<TARGET_PROPERTY:pugixml,INTERFACE_INCLUDE_DIRECTORIES>)
  
  target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
+                                                      $<TARGET_PROPERTY:${TARGET_NAME}_ir_readers,INTERFACE_INCLUDE_DIRECTORIES>
                                                        $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
  
  if(ENABLE_PROFILING_ITT AND INTEL_ITT_LIBS)
@@ -146,7 +149,6 @@ if(WIN32)
  endif()
  
  add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
-add_clang_format_target(${TARGET_NAME}_clang_format FOR_TARGETS ${TARGET_NAME}_obj)
  
  # Create shared library file from object library
  
@@ -196,7 +198,6 @@ target_include_directories(${TARGET_NAME}_nn_builder PRIVATE "${CMAKE_CURRENT_SO
      "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src")
  
  add_cpplint_target(${TARGET_NAME}_nn_builder_cpplint FOR_TARGETS ${TARGET_NAME}_nn_builder)
-add_clang_format_target(${TARGET_NAME}_nn_builder_clang_format FOR_TARGETS ${TARGET_NAME}_nn_builder)
  
  # Static library used for unit tests which are always built
  
@@ -272,6 +273,10 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
      install(FILES "${TBB}/LICENSE"
              DESTINATION ${IE_CPACK_IE_DIR}/external/tbb
              COMPONENT tbb)
+    install(FILES "${TBB}/cmake/TBBConfig.cmake"
+                  "${TBB}/cmake/TBBConfigVersion.cmake"
+            DESTINATION ${IE_CPACK_IE_DIR}/external/tbb/cmake
+            COMPONENT tbb)
  endif()
  
  ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})
@@ -279,8 +284,8 @@ ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})
  install(DIRECTORY "${IE_MAIN_SOURCE_DIR}/include" DESTINATION ${IE_CPACK_IE_DIR}
          COMPONENT core)
  install(TARGETS ${TARGET_NAME} ${TARGET_NAME}_nn_builder
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
  install(FILES "${OpenVINO_BINARY_DIR}/share/ie_parallel.cmake"
                "${OpenVINO_BINARY_DIR}/share/InferenceEngineConfig.cmake"
diff --git a/inference-engine/src/inference_engine/blob_factory.cpp b/inference-engine/src/inference_engine/blob_factory.cpp

index e972b39..8572297 100644 (file)
--- a/inference-engine/src/inference_engine/blob_factory.cpp
+++ b/inference-engine/src/inference_engine/blob_factory.cpp
@@ -46,6 +46,8 @@ InferenceEngine::Blob::Ptr CreateBlobFromData(const InferenceEngine::DataPtr& da
          return std::make_shared<InferenceEngine::TBlob<int8_t>>(desc);
      case InferenceEngine::Precision::I32:
          return std::make_shared<InferenceEngine::TBlob<int32_t>>(desc);
+    case InferenceEngine::Precision::BF16:
+            return std::make_shared<InferenceEngine::TBlob<short>>(desc);
      default:
          THROW_IE_EXCEPTION << "precision is no set";
      }
diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp

index 371090c..f5af3b8 100644 (file)
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
@@ -28,7 +28,6 @@
  #include "graph_tools.hpp"
  #include "graph_transformer.h"
  #include "ie_util_internal.hpp"
-#include "ie_cnn_layer_builder_ngraph.h"
  #include "ie_ngraph_utils.hpp"
  #include "ie_profiling.hpp"
  #include "network_serializer.h"
@@ -97,7 +96,7 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::
      if (ptr) {
          ptr->reshape(dims, ptr->getTensorDesc().getLayout());
      } else {
-        const auto precision = details::ngraph::convertPrecision(output.get_element_type());
+        const auto precision = details::convertPrecision(output.get_element_type());
          const auto layout = TensorDesc::getLayoutByDims(dims);
          ptr.reset(new NGraphData(this, outName, {precision, dims, layout}));
      }
@@ -520,287 +519,3 @@ void CNNNetworkNGraphImpl::convertToCNNNetworkImpl() {
      ::ngraph::pass::ConvertOpSet1ToLegacy().run_on_function(graph);
      cnnNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, *this);
  }
-
-std::shared_ptr<CNNNetworkNGraphImpl> CNNNetworkNGraphImpl::cloneNGraphImpl() const {
-    auto result = std::make_shared<CNNNetworkNGraphImpl>(cloneFunction());
-    for (const auto& outputInfo : _outputData) {
-        result->_outputData[outputInfo.first]->setPrecision(outputInfo.second->getPrecision());
-        result->_outputData[outputInfo.first]->setLayout(outputInfo.second->getLayout());
-    }
-    for (const auto& inputInfo : _inputData) {
-        result->_inputData[inputInfo.first]->setPrecision(inputInfo.second->getPrecision());
-        result->_inputData[inputInfo.first]->setLayout(inputInfo.second->getLayout());
-        result->_inputData[inputInfo.first]->getPreProcess() = inputInfo.second->getPreProcess();
-    }
-    if (cnnNetwork)
-        result->cnnNetwork = cloneNet(*cnnNetwork);
-    return result;
-}
-
-void CNNNetworkNGraphImpl::transformConstants() {
-    if (!cnnNetwork)
-        convertToCNNNetworkImpl();
-    // Remove all redundant constant and convert unsupported precisions
-    ConstTransformer transformator(cnnNetwork.get());
-    transformator.fullTrim();
-}
-
-void InferenceEngine::details::CNNLayerCreator::on_adapter(const std::string& name,
-                                                           ::ngraph::ValueAccessor<void>& adapter) {
-    if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::element::Type>>(&adapter)) {
-        auto type = static_cast<::ngraph::element::Type&>(*a);
-        params[name] = details::ngraph::convertPrecision(type).name();
-    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::PartialShape>>(&adapter)) {
-        std::string dims;
-        auto shape = static_cast<::ngraph::PartialShape&>(*a);
-        for (size_t i = 0; i < shape.rank().get_length(); i++) {
-            if (!dims.empty()) dims += ",";
-            dims += std::to_string(shape[i].get_length());
-        }
-        params[name] = dims;
-    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::Shape>>(&adapter)) {
-        std::string dims;
-        auto shape = static_cast<::ngraph::Shape&>(*a);
-        for (size_t i = 0; i < shape.size(); i++) {
-            if (!dims.empty()) dims += ",";
-            dims += std::to_string(shape[i]);
-        }
-        params[name] = dims;
-    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::Strides>>(&adapter)) {
-        std::string dims;
-        auto shape = static_cast<::ngraph::Strides&>(*a);
-        for (size_t i = 0; i < shape.size(); i++) {
-            if (!dims.empty()) dims += ",";
-            dims += std::to_string(shape[i]);
-        }
-        params[name] = dims;
-    }
-}
-
-InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node): node(node) {
-    addSpecificCreator({"Parameter"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                         const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), "Input",
-            details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<CNNLayer>(attrs);
-        return res;
-    });
-    // TODO - Remove "GreaterEq" once ngraph transitions to GreaterEqual
-    addSpecificCreator({"Eltwise", "Subtract", "Power", "Maximum", "Divide", "Greater", "GreaterEqual", "FloorMod", "LogicalOr", "LogicalAnd", "LogicalXor",
-        "GreaterEq", "Less", "LessEqual", "Equal", "NotEqual", "Multiply", "Add"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                                                 const std::map<std::string, std::string> params) -> CNNLayerPtr {
-            LayerParams attrs = {node->get_friendly_name(), "Eltwise",
-                details::ngraph::convertPrecision(node->get_output_element_type(0))};
-            auto res = std::make_shared<EltwiseLayer>(attrs);
-            res->params = params;
-            if (node->description() == "Maximum") {
-                res->params["operation"] = "max";
-            } else if (node->description() == "Power") {
-                res->params["operation"] = "pow";
-            } else if (node->description() == "Subtract") {
-                res->params["operation"] = "sub";
-            } else if (node->description() == "Divide") {
-                res->params["operation"] = "div";
-            } else if (node->description() == "LessEqual") {
-                res->params["operation"] = "less_equal";
-            } else if (node->description() == "Less") {
-                res->params["operation"] = "less";
-            } else if (node->description() == "Equal") {
-                res->params["operation"] = "equal";
-            } else if (node->description() == "NotEqual") {
-                res->params["operation"] = "not_equal";
-            } else if (node->description() == "FloorMod") {
-                res->params["operation"] = "floor_mod";
-            } else if (node->description() == "Multiply") {
-                res->params["operation"] = "prod";
-            } else if (node->description() == "Add") {
-                res->params["operation"] = "sum";
-            } else if (node->description() == "Greater") {
-                res->params["operation"] = "greater";
-            } else if (node->description() == "GreaterEq") {
-                res->params["operation"] = "greater_equal";
-            } else if (node->description() == "GreaterEqual") {
-                res->params["operation"] = "greater_equal";
-            } else if (node->description() == "LogicalOr") {
-                res->params["operation"] = "logical_or";
-            } else if (node->description() == "LogicalAnd") {
-                res->params["operation"] = "logical_and";
-            } else if (node->description() == "LogicalXor") {
-                res->params["operation"] = "logical_xor";
-            } else if (node->description() == "Eltwise") {
-                auto castedLayer = std::dynamic_pointer_cast<::ngraph::op::Eltwise>(node);
-                if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << attrs.type << " layer " << attrs.name;
-                std::string type;
-                switch (castedLayer->eltwise_type) {
-                case ELTWISE_TYPE::Sum:
-                    type = "sum";
-                    break;
-                case ELTWISE_TYPE::Prod:
-                    type = "prod";
-                    break;
-                default:
-                    THROW_IE_EXCEPTION << "Not supported eltwise type!";
-                }
-
-                res->params["operation"] = type;
-            }
-            return res;
-        });
-    addSpecificCreator({"Concat"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), node->description(),
-            details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<ConcatLayer>(attrs);
-        res->params = params;
-        return res;
-    });
-    addSpecificCreator({"AvgPool", "MaxPool"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                                  const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), "Pooling",
-            details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<PoolingLayer>(attrs);
-        res->params = params;
-        if (res->params.find("auto_pad") != res->params.end() &&
-            details::CaselessEq<std::string>()(res->params["auto_pad"], "EXPLICIT"))
-            res->params.erase("auto_pad");
-
-        if (res->params.find("exclude_pad") != res->params.end()) {
-            res->params["exclude-pad"] = res->params["exclude_pad"];
-            res->params.erase("exclude_pad");
-        }
-
-        if (node->description() == "MaxPool") {
-            res->params["pool-method"] = "max";
-        } else if (node->description() == "AvgPool") {
-            res->params["pool-method"] = "avg";
-        }
-        return res;
-    });
-    addSpecificCreator({"Select"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), node->description(),
-                             details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<SelectLayer>(attrs);
-        res->params = params;
-        return res;
-    });
-    addSpecificCreator({"BinaryConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), node->description(),
-                             details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<BinaryConvolutionLayer>(attrs);
-
-        // todo: investigate difference between ngraph parameters for BinConvolution and the implementation above
-        // this leads to accuracy issue for Precollected_ONNX_ResNet50_88percentinto1bit e2e test
-        // res->params = params;
-
-        auto castedLayer = ::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(node);
-
-        std::string value;
-        for (const auto& val : castedLayer->get_pads_begin()) {
-            if (!value.empty()) value += ",";
-            value += Builder::asString(val);
-        }
-        res->params["pads_begin"] = value;
-
-        value.clear();
-        for (const auto& val : castedLayer->get_pads_end()) {
-            if (!value.empty()) value += ",";
-            value += Builder::asString(val);
-        }
-        res->params["pads_end"] = value;
-
-        switch (castedLayer->get_auto_pad()) {
-            case ::ngraph::op::PadType::SAME_UPPER:
-                res->params["auto_pad"] = "same_upper";
-                break;
-            case ::ngraph::op::PadType::SAME_LOWER:
-                res->params["auto_pad"] = "same_lower";
-                break;
-            case ::ngraph::op::PadType::VALID:
-                res->params["auto_pad"] = "valid";
-                break;
-            default:
-                break;
-        }
-
-        value.clear();
-        for (const auto& val : castedLayer->get_strides()) {
-            if (!value.empty()) value += ",";
-            value += Builder::asString(val);
-        }
-        res->params["strides"] = value;
-
-        value.clear();
-        for (const auto& val : castedLayer->get_dilations()) {
-            if (!value.empty()) value += ",";
-            value += Builder::asString(val);
-        }
-        res->params["dilations"] = value;
-
-        // Restore kernel size and output
-        const auto& shape = castedLayer->get_input_shape(1);
-        res->params["output"] = Builder::asString(shape[0]);
-
-        value.clear();
-        for (size_t i = 2; i < shape.size(); i++) {
-            if (!value.empty()) value += ",";
-            value += Builder::asString(shape[i]);
-        }
-        res->params["kernel"] = value;
-
-        switch (castedLayer->get_mode()) {
-            case ::ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT:
-                res->params["mode"] = "xnor-popcount";
-        }
-
-        auto weights_shape = castedLayer->input(1).get_source_output().get_shape();
-        res->params["input"] = Builder::asString(weights_shape[1]);
-        res->params["pad_value"] = Builder::asString(castedLayer->get_pad_value());
-
-        Builder::NodeConverter<::ngraph::op::Constant> converter;
-
-        const auto weightsNode = castedLayer->get_inputs()[1].get_output().get_node();
-        if (converter.canCreate(weightsNode)) {
-            const auto& weights = converter.createLayer(weightsNode);
-            res->blobs["weights"] = weights->blobs["custom"];
-            res->_weights = weights->blobs["custom"];
-        }
-        return res;
-    });
-
-    addSpecificCreator({"SpaceToBatch"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), node->description(),
-                             details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<SpaceToBatchLayer>(attrs);
-        res->params = params;
-        return res;
-    });
-
-    addSpecificCreator({"BatchToSpace"}, [](const std::shared_ptr<::ngraph::Node>& node,
-                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
-        LayerParams attrs = {node->get_friendly_name(), node->description(),
-                             details::ngraph::convertPrecision(node->get_output_element_type(0))};
-        auto res = std::make_shared<BatchToSpaceLayer>(attrs);
-        res->params = params;
-        return res;
-    });
-}
-
-CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
-    auto one_from = [](const std::string& desc, const std::vector<std::string>& descs) -> bool {
-        for (const auto& d : descs) {
-            if (details::CaselessEq<std::string>()(d, desc)) return true;
-        }
-        return false;
-    };
-    LayerParams attrs = {node->get_friendly_name(), node->description(),
-                         details::ngraph::convertPrecision(node->get_output_element_type(0))};
-    if (creators.find(node->description()) != creators.end())
-        return creators[node->description()](node, params);
-
-    auto res = std::make_shared<CNNLayer>(attrs);
-    res->params = params;
-    return res;
-}
diff --git a/inference-engine/src/plugin_api/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp

similarity index 73%

rename from inference-engine/src/plugin_api/cnn_network_ngraph_impl.hpp

rename to inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp

index c17a14f..b79c585 100644 (file)
--- a/inference-engine/src/plugin_api/cnn_network_ngraph_impl.hpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp
@@ -102,7 +102,7 @@ public:
      std::shared_ptr<const ::ngraph::Function> getFunction() const noexcept override {
          return !cnnNetwork ? _ngraph_function : nullptr;
      }
-    std::shared_ptr<::ngraph::Function> getFunction() noexcept {
+    std::shared_ptr<::ngraph::Function> getFunction() noexcept override {
          return !cnnNetwork ? _ngraph_function : nullptr;
      }
  
@@ -118,9 +118,6 @@ public:
          noexcept override;
  
      void convertToCNNNetworkImpl();
-
-    std::shared_ptr<CNNNetworkNGraphImpl> cloneNGraphImpl() const;
-    void transformConstants();
  protected:
      std::shared_ptr<::ngraph::Function> _ngraph_function;
      virtual std::shared_ptr<::ngraph::Function> cloneFunction(bool constFolding = false, const std::map<std::string,
@@ -142,7 +139,7 @@ private:
  
      friend INFERENCE_ENGINE_API_CPP(std::shared_ptr<CNNNetworkImpl>)
      convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function>& graph,
-                                 const CNNNetworkNGraphImpl & nGraphImpl);
+                                 const ICNNNetwork& nGraphImpl);
  
      /**
       * @brief Reshape on the same shape
@@ -196,65 +193,6 @@ private:
  
  IE_SUPPRESS_DEPRECATED_END
  
-/**
- * @brief Creator for CNNLayer from nGraph op
- */
-class CNNLayerCreator : public ::ngraph::AttributeVisitor {
-public:
-    using CreatorFor = std::function<CNNLayerPtr(const std::shared_ptr<::ngraph::Node>& node,
-                                                 const std::map<std::string, std::string> param)>;
-    explicit CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node);
-
-    CNNLayerPtr create();
-
-    void on_attribute(const std::string& name, std::string& value) override {
-        params[name] = value;
-    }
-
-    void on_attribute(const std::string& name, bool& value) override {
-        params[name] = value ? "true" : "false";
-    }
-
-    void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
-        for (const auto type : forTypes) {
-            creators[type] = creator;
-        }
-    }
-
-    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<std::string>& adapter) override {
-        std::string data = adapter.get();
-        std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) {
-            return std::tolower(c);
-        });
-        params[name] = data;
-    }
-
-    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override {
-        std::string dims;
-        auto shape = adapter.get();
-        for (size_t i = 0; i < shape.size(); i++) {
-            if (!dims.empty()) dims += ",";
-            dims += std::to_string(shape[i]);
-        }
-        params[name] = dims;
-    }
-
-    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<double>& adapter) override {
-        params[name] = std::to_string(adapter.get());
-    }
-
-    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<int64_t>& adapter) override {
-        params[name] = std::to_string(adapter.get());
-    }
-
-    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<void>& adapter) override;
-
-private:
-    std::shared_ptr<::ngraph::Node> node;
-    std::map<std::string, std::string> params;
-    std::map<std::string, CreatorFor> creators;
-};
-
  typedef std::shared_ptr<CNNNetworkNGraphImpl> CNNNetworkNGraphImplPtr;
  }  // namespace details
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/generic_ie.cpp b/inference-engine/src/inference_engine/generic_ie.cpp

index a28f5d8..197a096 100644 (file)
--- a/inference-engine/src/inference_engine/generic_ie.cpp
+++ b/inference-engine/src/inference_engine/generic_ie.cpp
@@ -96,7 +96,7 @@ void ngraph::op::GenericIE::validate_and_infer_types() {
                  // Set dynamic output shapes if input shapes are not defined
                  for (size_t i = 0; i < outputs.size(); i++) {
                      const auto& port = outputs[i];
-                    auto type = InferenceEngine::details::ngraph::convertPrecision(port.precision);
+                    auto type = InferenceEngine::details::convertPrecision(port.precision);
                      set_output_type(i, type, PartialShape::dynamic());
                  }
                  return;
@@ -105,7 +105,7 @@ void ngraph::op::GenericIE::validate_and_infer_types() {
              Shape this_ishape = get_input_shape(i);
              InferenceEngine::SizeVector dims = this_ishape;
              InferenceEngine::Blob::Ptr input = make_blob_with_precision(InferenceEngine::TensorDesc(
-                InferenceEngine::details::ngraph::convertPrecision(get_input_element_type(i)), dims,
+                InferenceEngine::details::convertPrecision(get_input_element_type(i)), dims,
                  InferenceEngine::TensorDesc::getLayoutByDims(dims)));
              inputs.emplace_back(input);
          }
@@ -126,6 +126,11 @@ void ngraph::op::GenericIE::validate_and_infer_types() {
              }
          }
  
+        // WA: Proposal shape infer has to know number of outputs
+        if (type == "Proposal" && parameters.find("num_outputs") == parameters.end()) {
+            parameters["num_outputs"] = std::to_string(outputs.size());
+        }
+
          ret = impl->inferShapes(inputs, parameters, blobs, outShapes, nullptr);
          IE_SUPPRESS_DEPRECATED_END
  
@@ -134,7 +139,7 @@ void ngraph::op::GenericIE::validate_and_infer_types() {
          for (size_t i = 0; i < outputs.size(); i++) {
              const auto& port = outputs[i];
              ngraph::Shape outShape(outShapes[i]);
-            auto type = InferenceEngine::details::ngraph::convertPrecision(port.precision);
+            auto type = InferenceEngine::details::convertPrecision(port.precision);
              set_output_type(i, type, PartialShape(outShape));
          }
  
@@ -149,7 +154,7 @@ void ngraph::op::GenericIE::validate_and_infer_types() {
          for (size_t i = 0; i < outputs.size(); i++) {
              const auto& port = outputs[i];
              ngraph::Shape outShape(port.dims);
-            auto type = InferenceEngine::details::ngraph::convertPrecision(port.precision);
+            auto type = InferenceEngine::details::convertPrecision(port.precision);
              set_output_type(i, type, PartialShape(outShape));
          }
          initialized++;
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/ie_core.cpp

index df41f8c..1c7c73c 100644 (file)
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@@ -16,13 +16,12 @@
  #include <vector>
  
  #include <ngraph/opsets/opset.hpp>
+#include "cpp/ie_cnn_net_reader.h"
  #include "cpp_interfaces/base/ie_plugin_base.hpp"
  #include "details/ie_exception_conversion.hpp"
  #include "details/ie_so_pointer.hpp"
  #include "file_utils.h"
-#include "ie_cnn_net_reader_impl.h"
  #include "ie_icore.hpp"
-#include "ie_ir_reader.hpp"
  #include "ie_plugin.hpp"
  #include "ie_plugin_config.hpp"
  #include "ie_profiling.hpp"
@@ -38,6 +37,27 @@ IE_SUPPRESS_DEPRECATED_START
  
  namespace {
  
+std::once_flag flag;
+std::shared_ptr<InferenceEngine::details::SharedObjectLoader> cnnReaderLoader;
+
+std::shared_ptr<InferenceEngine::details::SharedObjectLoader>
+createCnnReaderLoader() {
+    std::call_once(flag, [&] () {
+        FileUtils::FilePath libraryName = FileUtils::toFilePath(std::string("inference_engine_ir_readers") + std::string(IE_BUILD_POSTFIX));
+        FileUtils::FilePath irReadersLibraryPath = FileUtils::makeSharedLibraryName(getInferenceEngineLibraryPath(), libraryName);
+
+        if (!FileUtils::fileExist(irReadersLibraryPath)) {
+            THROW_IE_EXCEPTION << "Please, make sure that Inference Engine IR readers library "
+                << FileUtils::fromFilePath(::FileUtils::makeSharedLibraryName({}, libraryName)) << " is in "
+                << getIELibraryPath();
+        }
+        cnnReaderLoader = std::shared_ptr<InferenceEngine::details::SharedObjectLoader>(
+            new InferenceEngine::details::SharedObjectLoader(irReadersLibraryPath.c_str()));
+    });
+
+    return cnnReaderLoader;
+}
+
  IInferencePluginAPI* getInferencePluginAPIInterface(IInferencePlugin* iplugin) {
      return dynamic_cast<IInferencePluginAPI*>(iplugin);
  }
@@ -52,6 +72,11 @@ IInferencePluginAPI* getInferencePluginAPIInterface(InferencePlugin plugin) {
  
  }  // namespace
  
+CNNNetReaderPtr CreateCNNNetReaderPtr() noexcept {
+    auto loader = createCnnReaderLoader();
+    return CNNNetReaderPtr(loader);
+}
+
  IE_SUPPRESS_DEPRECATED_END
  
  DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) {
@@ -112,6 +137,7 @@ std::vector<std::string> DeviceIDParser::getMultiDevices(std::string devicesList
  }
  
  class Core::Impl : public ICore {
+    // Fields are ordered by deletion order
      ITaskExecutor::Ptr _taskExecutor = nullptr;
  
      IE_SUPPRESS_DEPRECATED_START
@@ -124,10 +150,11 @@ class Core::Impl : public ICore {
          std::vector<FileUtils::FilePath> listOfExtentions;
      };
  
-    std::map<std::string, PluginDescriptor> pluginRegistry;
      std::unordered_set<std::string> opsetNames;
      std::vector<IExtensionPtr> extensions;
  
+    std::map<std::string, PluginDescriptor> pluginRegistry;
+
  public:
      Impl();
      ~Impl() override;
@@ -395,12 +422,18 @@ std::map<std::string, Version> Core::GetVersions(const std::string& deviceName)
  
      {
          // for compatibility with samples / demo
-        if (deviceName.find("HETERO:") == 0) {
-            deviceNames = DeviceIDParser::getHeteroDevices(deviceName.substr(7));
+        if (deviceName.find("HETERO") == 0) {
+            auto pos = deviceName.find_first_of(":");
+            if (pos != std::string::npos) {
+                deviceNames = DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1));
+            }
              deviceNames.push_back("HETERO");
          } else if (deviceName.find("MULTI") == 0) {
+            auto pos = deviceName.find_first_of(":");
+            if (pos != std::string::npos) {
+                deviceNames = DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
+            }
              deviceNames.push_back("MULTI");
-            deviceNames = DeviceIDParser::getMultiDevices(deviceName.substr(6));
          } else {
              deviceNames.push_back(deviceName);
          }
@@ -457,13 +490,12 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
  CNNNetwork Core::ReadNetwork(const std::string& modelPath, const std::string& binPath) const {
      IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork)
      IE_SUPPRESS_DEPRECATED_START
-    auto cnnReader = std::shared_ptr<ICNNNetReader>(CreateCNNNetReader());
      ResponseDesc desc;
+    CNNNetReaderPtr cnnReader(createCnnReaderLoader());
      StatusCode rt = cnnReader->ReadNetwork(modelPath.c_str(), &desc);
      if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
-    auto cnnNetReaderImpl = std::dynamic_pointer_cast<details::CNNNetReaderImpl>(cnnReader);
-    if (cnnNetReaderImpl && cnnReader->getVersion(&desc) >= 10) {
-        cnnNetReaderImpl->addExtensions(_impl->getExtensions());
+    if (cnnReader->getVersion(&desc) >= 10) {
+        cnnReader->addExtensions(_impl->getExtensions());
      }
      std::string bPath = binPath;
      if (bPath.empty()) {
@@ -491,13 +523,12 @@ CNNNetwork Core::ReadNetwork(const std::string& modelPath, const std::string& bi
  CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights) const {
      IE_PROFILING_AUTO_SCOPE(Core::ReadNetwork)
      IE_SUPPRESS_DEPRECATED_START
-    auto cnnReader = std::shared_ptr<ICNNNetReader>(CreateCNNNetReader());
      ResponseDesc desc;
+    CNNNetReaderPtr cnnReader(createCnnReaderLoader());
      StatusCode rt = cnnReader->ReadNetwork(model.data(), model.length(), &desc);
      if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
-    auto cnnNetReaderImpl = std::dynamic_pointer_cast<details::CNNNetReaderImpl>(cnnReader);
-    if (cnnNetReaderImpl && cnnReader->getVersion(&desc) >= 10) {
-        cnnNetReaderImpl->addExtensions(_impl->getExtensions());
+    if (cnnReader->getVersion(&desc) >= 10) {
+        cnnReader->addExtensions(_impl->getExtensions());
      }
      TBlob<uint8_t>::Ptr weights_ptr;
      if (weights) {
@@ -507,6 +538,7 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights
      rt = cnnReader->SetWeights(weights_ptr, &desc);
      if (rt != OK) THROW_IE_EXCEPTION << desc.msg;
      IE_SUPPRESS_DEPRECATED_END
+
      return CNNNetwork(cnnReader);
  }
  
@@ -694,11 +726,6 @@ void Core::SetConfig(const std::map<std::string, std::string>& config, const std
              THROW_IE_EXCEPTION << "SetConfig is supported only for HETERO itself (without devices). "
                                    "You can configure the devices with SetConfig before creating the HETERO on top.";
          }
-
-        if (config.find("TARGET_FALLBACK") != config.end()) {
-            THROW_IE_EXCEPTION << "Please, specify TARGET_FALLBACK to the LoadNetwork directly, "
-                                  "as you will need to pass the same TARGET_FALLBACK anyway.";
-        }
      }
  
      // MULTI case
@@ -707,11 +734,6 @@ void Core::SetConfig(const std::map<std::string, std::string>& config, const std
              THROW_IE_EXCEPTION << "SetConfig is supported only for MULTI itself (without devices). "
                                    "You can configure the devices with SetConfig before creating the MULTI on top.";
          }
-
-        if (config.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES) != config.end()) {
-            THROW_IE_EXCEPTION << "Please, specify DEVICE_PRIORITIES to the LoadNetwork directly, "
-                                  "as you will need to pass the same DEVICE_PRIORITIES anyway.";
-        }
      }
  
      if (deviceName.empty()) {
diff --git a/inference-engine/src/inference_engine/ie_rtti.cpp b/inference-engine/src/inference_engine/ie_rtti.cpp

index dded13a..58c893c 100644 (file)
--- a/inference-engine/src/inference_engine/ie_rtti.cpp
+++ b/inference-engine/src/inference_engine/ie_rtti.cpp
@@ -13,6 +13,7 @@
  #include <ie_parameter.hpp>
  #include <ie_iextension.h>
  #include <ie_extension.h>
+
  #include <ngraph/opsets/opset.hpp>
  
  using namespace InferenceEngine;
@@ -83,6 +84,7 @@ template struct InferenceEngine::Parameter::RealData<std::vector<std::string>>;
  template struct InferenceEngine::Parameter::RealData<std::vector<unsigned long>>;
  template struct InferenceEngine::Parameter::RealData<std::tuple<unsigned int, unsigned int>>;
  template struct InferenceEngine::Parameter::RealData<std::tuple<unsigned int, unsigned int, unsigned int>>;
+template struct InferenceEngine::Parameter::RealData<InferenceEngine::Blob::Ptr>;
  #endif  // __clang__
  //
  // ie_blob.h
diff --git a/inference-engine/src/inference_engine/ie_system_conf.cpp b/inference-engine/src/inference_engine/ie_system_conf.cpp

index f7aed75..337fbfa 100644 (file)
--- a/inference-engine/src/inference_engine/ie_system_conf.cpp
+++ b/inference-engine/src/inference_engine/ie_system_conf.cpp
@@ -71,6 +71,13 @@ bool with_cpu_x86_avx512_core() {
  #endif
  }
  
+bool with_cpu_x86_bfloat16() {
+#ifdef ENABLE_MKL_DNN
+    return cpu.has(Xbyak::util::Cpu::tAVX512_BF16);
+#else
+    return false;
+#endif
+}
  
  bool checkOpenMpEnvVars(bool includeOMPNumThreads) {
      for (auto&& var : {
diff --git a/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp b/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp

index 7a4c18d..f46ccbf 100644 (file)
--- a/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp
+++ b/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp

index bdd5aca..d4d4b94 100644 (file)
--- a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp
+++ b/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp b/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp

index 99d6a19..7543621 100644 (file)
--- a/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp
+++ b/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp
@@ -12,6 +12,7 @@
  namespace InferenceEngine {
  
  ITaskExecutor::Ptr ExecutorManagerImpl::getExecutor(std::string id) {
+    std::lock_guard<std::mutex> guard(taskExecutorMutex);
      auto foundEntry = executors.find(id);
      if (foundEntry == executors.end()) {
          auto newExec = std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{id});
@@ -22,6 +23,7 @@ ITaskExecutor::Ptr ExecutorManagerImpl::getExecutor(std::string id) {
  }
  
  IStreamsExecutor::Ptr ExecutorManagerImpl::getIdleCPUStreamsExecutor(const IStreamsExecutor::Config& config) {
+    std::lock_guard<std::mutex> guard(streamExecutorMutex);
      for (const auto& it : cpuStreamsExecutors) {
          const auto& executor = it.second;
          if (executor.use_count() != 1)
@@ -52,6 +54,8 @@ size_t ExecutorManagerImpl::getIdleCPUStreamsExecutorsNumber() {
  }
  
  void ExecutorManagerImpl::clear(const std::string& id) {
+    std::lock_guard<std::mutex> stream_guard(streamExecutorMutex);
+    std::lock_guard<std::mutex> task_guard(taskExecutorMutex);
      if (id.empty()) {
          executors.clear();
          cpuStreamsExecutors.clear();
@@ -66,8 +70,47 @@ void ExecutorManagerImpl::clear(const std::string& id) {
      }
  }
  
+std::mutex ExecutorManager::_mutex;
  ExecutorManager* ExecutorManager::_instance = nullptr;
  
+ExecutorManager* ExecutorManager::getInstance() {
+    /*
+     * 1) We do not use singleton implementation via STATIC LOCAL object like
+     *
+     *   getInstance() {
+     *       static ExecutorManager _instance;
+     *       return &instance;
+     *   }
+     *
+     * Because of problem with destruction order on program exit.
+     * Some IE classes like MKLDNN::Engine use this singleton in destructor.
+     * But they has no direct dependency from c++ runtime point of view and
+     * it's possible that _instance local static variable  will be destroyed
+     * before MKLDNN::~Engine call. Any further manipulation with destroyed
+     * object will lead to exception or crashes.
+     *
+     * 2) We do not use singleton implementation via STATIC object like:
+     *
+     *   ExecutorManager ExecutorManager::_instance;
+     *   getInstance() {
+     *       return &instance;
+     *   }
+     *
+     * Because of problem with double destruction. In some test cases we use
+     * double link with IE module via static and dynamic version. Both modules
+     * have static object with same export name and it leads to double construction
+     * and double destruction of that object. For some c++ compilers (ex gcc 5.4)
+     * it lead to crash with "double free".
+     *
+     * That's why we use manual allocation of singleton instance on heap.
+     */
+    std::lock_guard<std::mutex> guard(_mutex);
+    if (_instance == nullptr) {
+        _instance = new ExecutorManager();
+    }
+    return _instance;
+}
+
  ITaskExecutor::Ptr ExecutorManager::getExecutor(std::string id) {
      return _impl.getExecutor(id);
  }
diff --git a/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp b/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp

index 61104c0..0e04154 100644 (file)
--- a/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp
+++ b/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/ir_readers/CMakeLists.txt b/inference-engine/src/ir_readers/CMakeLists.txt

new file mode 100644 (file)

index 0000000..f90b2ae
--- /dev/null
+++ b/inference-engine/src/ir_readers/CMakeLists.txt
@@ -0,0 +1,49 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "inference_engine_ir_readers")
+
+if(ENABLE_LTO)
+    ie_enable_lto()
+endif()
+
+set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/")
+
+file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/*.h ${PUBLIC_HEADERS_DIR}/*.hpp)
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+
+source_group("src" FILES ${LIBRARY_SRC})
+source_group("include" FILES ${PUBLIC_HEADERS})
+
+# Create shared library
+
+add_library(${TARGET_NAME} SHARED ${LIBRARY_SRC} ${PUBLIC_HEADERS})
+
+target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_API
+                                                  IMPLEMENT_INFERENCE_ENGINE_PLUGIN)
+
+target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR})
+target_include_directories(${TARGET_NAME} PRIVATE "${IE_MAIN_SOURCE_DIR}/src/inference_engine")
+
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_plugin_api ${NGRAPH_LIBRARIES} inference_engine)
+target_link_libraries(${TARGET_NAME} PRIVATE pugixml)
+
+# code style
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
+add_clang_format_target(${TARGET_NAME}_clang_format FOR_TARGETS ${TARGET_NAME})
+
+# developer package
+
+ie_developer_export_targets(${TARGET_NAME})
+
+# install
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/inference_engine/ie_blob_proxy.hpp b/inference-engine/src/ir_readers/ie_blob_proxy.hpp

similarity index 100%

rename from inference-engine/src/inference_engine/ie_blob_proxy.hpp

rename to inference-engine/src/ir_readers/ie_blob_proxy.hpp
diff --git a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp b/inference-engine/src/ir_readers/ie_cnn_net_reader_impl.cpp

similarity index 92%

rename from inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp

rename to inference-engine/src/ir_readers/ie_cnn_net_reader_impl.cpp

index 1c34f88..f7257bd 100644 (file)
--- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
+++ b/inference-engine/src/ir_readers/ie_cnn_net_reader_impl.cpp
@@ -18,6 +18,7 @@
  #include "ie_format_parser.h"
  #include "ie_ir_reader.hpp"
  #include "ie_profiling.hpp"
+#include "ie_plugin.hpp"
  #include "parsers.h"
  #include "xml_parse_utils.h"
  
@@ -29,21 +30,19 @@ IE_SUPPRESS_DEPRECATED_START
  CNNNetReaderImpl::CNNNetReaderImpl(const FormatParserCreator::Ptr& _creator)
      : parseSuccess(false), _version(0), parserCreator(_creator) {}
  
+CNNNetReaderImpl::~CNNNetReaderImpl() { }
+
  StatusCode CNNNetReaderImpl::SetWeights(const TBlob<uint8_t>::Ptr& weights, ResponseDesc* desc) noexcept {
      if (!_parser && _version < 10) {
          return DescriptionBuffer(desc) << "network must be read first";
      }
      try {
          if (_version == 10) {
-#if defined(ENABLE_IR_READER)
              // It's time to perform actual reading of V10 network and instantiate CNNNetworkNGraphImpl
              IRReader v10Reader(extensions);
              std::stringstream model;
              xmlDoc->save(model);
              network = std::make_shared<CNNNetworkNGraphImpl>(v10Reader.read(model.str(), weights));
-#else
-            return DescriptionBuffer(desc) << "Please, recompile Inference Engine with the ENABLE_IR_READER=ON Cmake option";
-#endif
          } else {
              _parser->SetWeights(weights);
          }
@@ -173,15 +172,13 @@ void CNNNetReaderImpl::addExtensions(const std::vector<InferenceEngine::IExtensi
  }
  
  std::shared_ptr<IFormatParser> V2FormatParserCreator::create(size_t version) {
-#ifdef ENABLE_IR_READER
      return std::make_shared<FormatParser>(version);
-#else
-    THROW_IE_EXCEPTION << "Please, recompile Inference Engine library with the ENABLE_IR_READER=ON Cmake option";
-    return nullptr;
-#endif
  }
  
-InferenceEngine::ICNNNetReader* InferenceEngine::CreateCNNNetReader() noexcept {
-    return new CNNNetReaderImpl(std::make_shared<V2FormatParserCreator>());
+INFERENCE_PLUGIN_API(InferenceEngine::StatusCode)
+CreateICNNNetReader(ICNNNetReader *& data, ResponseDesc *resp) noexcept {
+    data = new CNNNetReaderImpl(std::make_shared<V2FormatParserCreator>());
+    return StatusCode::OK;
  }
+
  IE_SUPPRESS_DEPRECATED_END
diff --git a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h b/inference-engine/src/ir_readers/ie_cnn_net_reader_impl.h

similarity index 89%

rename from inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h

rename to inference-engine/src/ir_readers/ie_cnn_net_reader_impl.h

index 3222868..bd0e0c5 100644 (file)
--- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h
+++ b/inference-engine/src/ir_readers/ie_cnn_net_reader_impl.h
@@ -11,10 +11,10 @@
  #include <vector>
  
  #include "cnn_network_impl.hpp"
-#include "ie_icnn_net_reader.h"
  #include "ie_memcpy.h"
  #include "ie_profiling.hpp"
  #include "parsers.h"
+#include "ie_util_internal.hpp"
  
  namespace pugi {
  class xml_node;
@@ -31,14 +31,14 @@ struct FormatParserCreator {
      virtual ~FormatParserCreator() = default;
  };
  
-struct V2FormatParserCreator : public FormatParserCreator {
+struct INFERENCE_ENGINE_API_CLASS(V2FormatParserCreator) : public FormatParserCreator {
      std::shared_ptr<IFormatParser> create(size_t version) override;
  };
  
  IE_SUPPRESS_DEPRECATED_START
-class CNNNetReaderImpl : public ICNNNetReader {
+class INFERENCE_ENGINE_API_CLASS(CNNNetReaderImpl) : public ICNNNetReader {
  public:
-    explicit CNNNetReaderImpl(const FormatParserCreator::Ptr& _parserCreator);
+    explicit CNNNetReaderImpl(const FormatParserCreator::Ptr& _creator);
  
      StatusCode ReadNetwork(const char* filepath, ResponseDesc* resp) noexcept override;
  
@@ -78,7 +78,9 @@ public:
          delete this;
      }
  
-    void addExtensions(const std::vector<InferenceEngine::IExtensionPtr>& ext);
+    void addExtensions(const std::vector<InferenceEngine::IExtensionPtr>& ext) override;
+
+    ~CNNNetReaderImpl() override;
  
  private:
      std::shared_ptr<InferenceEngine::details::IFormatParser> _parser;
@@ -96,6 +98,7 @@ private:
      std::shared_ptr<pugi::xml_document> xmlDoc;
      std::vector<InferenceEngine::IExtensionPtr> extensions;
  };
+
  IE_SUPPRESS_DEPRECATED_END
  
  }  // namespace details
diff --git a/inference-engine/src/inference_engine/ie_format_parser.cpp b/inference-engine/src/ir_readers/ie_format_parser.cpp

similarity index 99%

rename from inference-engine/src/inference_engine/ie_format_parser.cpp

rename to inference-engine/src/ir_readers/ie_format_parser.cpp

index 9c55f3e..9cf8783 100644 (file)
--- a/inference-engine/src/inference_engine/ie_format_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_format_parser.cpp
@@ -267,9 +267,10 @@ FormatParser::FormatParser(size_t version): _version(version) {
                  std::make_shared<LayerCreator<TopKLayer>>("TopK"),
                  std::make_shared<LayerCreator<UniqueLayer>>("Unique"),
                  std::make_shared<LayerCreator<NonMaxSuppressionLayer>>("NonMaxSuppression"),
-                std::make_shared<LayerCreator<ScatterLayer>>("ScatterUpdate"),
+                std::make_shared<LayerCreator<ScatterUpdateLayer>>("ScatterUpdate"),
                  std::make_shared<LayerCreator<ExperimentalDetectronPriorGridGeneratorLayer>>("ExperimentalDetectronPriorGridGenerator"),
-                std::make_shared<LayerCreator<ExperimentalDetectronGenerateProposalsSingleImageLayer>>("ExperimentalDetectronGenerateProposalsSingleImage")};
+                std::make_shared<LayerCreator<ExperimentalDetectronGenerateProposalsSingleImageLayer>>("ExperimentalDetectronGenerateProposalsSingleImage"),
+                std::make_shared<LayerCreator<ExperimentalDetectronTopKROIs>>("ExperimentalDetectronTopKROIs")};
      creators.emplace_back(_version < 6 ? std::make_shared<LayerCreator<QuantizeLayer>>("Quantize")
                                         : std::make_shared<LayerCreator<QuantizeLayer>>("FakeQuantize"));
  }
diff --git a/inference-engine/src/inference_engine/ie_format_parser.h b/inference-engine/src/ir_readers/ie_format_parser.h

similarity index 97%

rename from inference-engine/src/inference_engine/ie_format_parser.h

rename to inference-engine/src/ir_readers/ie_format_parser.h

index 50d7433..7067040 100644 (file)
--- a/inference-engine/src/inference_engine/ie_format_parser.h
+++ b/inference-engine/src/ir_readers/ie_format_parser.h
@@ -71,11 +71,7 @@ public:
      }
  };
  
-#ifdef ENABLE_IR_READER
  class INFERENCE_ENGINE_API_CLASS(FormatParser): public IFormatParser {
-#else
-class FormatParser : public IFormatParser {
-#endif
  public:
      explicit FormatParser(size_t version);
  
diff --git a/inference-engine/src/inference_engine/ie_ir_parser.cpp b/inference-engine/src/ir_readers/ie_ir_parser.cpp

similarity index 99%

rename from inference-engine/src/inference_engine/ie_ir_parser.cpp

rename to inference-engine/src/ir_readers/ie_ir_parser.cpp

index 9be11ba..6e0ac90 100644 (file)
--- a/inference-engine/src/inference_engine/ie_ir_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp
@@ -187,7 +187,7 @@ V10Parser::GenericLayerParams V10Parser::parseGenericParams(const pugi::xml_node
          // Input port hasn't precision
          if (!input) {
              const std::string& preStr = GetStrAttr(parentNode, "precision");
-            type = InferenceEngine::details::ngraph::convertPrecision(preStr);
+            type = InferenceEngine::details::convertPrecision(preStr);
          }
          port.precision = type;
          return port;
@@ -413,7 +413,7 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
          for (const auto& port : params.outputPorts) {
              ngraph::op::GenericIE::PortIE iePort;
              iePort.dims = port.dims;
-            iePort.precision = InferenceEngine::details::ngraph::convertPrecision(port.precision);
+            iePort.precision = InferenceEngine::details::convertPrecision(port.precision);
              outputs.emplace_back(iePort);
          }
  
@@ -767,7 +767,7 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::Convert>::crea
          THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
  
      return std::make_shared<ngraph::op::Convert>(inputs[0],
-                                                 details::ngraph::convertPrecision(GetStrAttr(dn, "destination_type")));
+                                                 details::convertPrecision(GetStrAttr(dn, "destination_type")));
  }
  
  // LSTMCell layer
diff --git a/inference-engine/src/inference_engine/ie_ir_parser.hpp b/inference-engine/src/ir_readers/ie_ir_parser.hpp

similarity index 99%

rename from inference-engine/src/inference_engine/ie_ir_parser.hpp

rename to inference-engine/src/ir_readers/ie_ir_parser.hpp

index c7cfe5c..df2e722 100644 (file)
--- a/inference-engine/src/inference_engine/ie_ir_parser.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp
@@ -192,7 +192,7 @@ private:
              std::string val;
              if (!getStrAttribute(node.child("data"), name, val)) return;
              if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::element::Type>>(&adapter)) {
-                static_cast<ngraph::element::Type&>(*a) = details::ngraph::convertPrecision(val);
+                static_cast<ngraph::element::Type&>(*a) = details::convertPrecision(val);
              } else if (auto a = ngraph::as_type<ngraph::AttributeAdapter<ngraph::PartialShape>>(&adapter)) {
                  std::vector<int64_t> shape;
                  std::vector<ngraph::Dimension> dims;
diff --git a/inference-engine/src/inference_engine/ie_ir_reader.cpp b/inference-engine/src/ir_readers/ie_ir_reader.cpp

similarity index 100%

rename from inference-engine/src/inference_engine/ie_ir_reader.cpp

rename to inference-engine/src/ir_readers/ie_ir_reader.cpp
diff --git a/inference-engine/src/inference_engine/ie_ir_reader.hpp b/inference-engine/src/ir_readers/ie_ir_reader.hpp

similarity index 97%

rename from inference-engine/src/inference_engine/ie_ir_reader.hpp

rename to inference-engine/src/ir_readers/ie_ir_reader.hpp

index 38bde68..f17b43a 100644 (file)
--- a/inference-engine/src/inference_engine/ie_ir_reader.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_reader.hpp
@@ -36,11 +36,7 @@ namespace InferenceEngine {
   * All methods here do not throw exceptions and return a StatusCode and ResponseDesc object.
   * Alternatively, to use methods that throw exceptions, refer to the CNNNetReader wrapper class.
   */
-#ifdef ENABLE_IR_READER
  class INFERENCE_ENGINE_API_CLASS(IRReader) {
-#else
-class IRReader {
-#endif
  public:
      IRReader() = default;
      explicit IRReader(const std::vector<IExtensionPtr>& exts): extensions(exts) {}
diff --git a/inference-engine/src/inference_engine/ie_layer_parsers.cpp b/inference-engine/src/ir_readers/ie_layer_parsers.cpp

similarity index 100%

rename from inference-engine/src/inference_engine/ie_layer_parsers.cpp

rename to inference-engine/src/ir_readers/ie_layer_parsers.cpp
diff --git a/inference-engine/src/inference_engine/ie_layer_parsers.h b/inference-engine/src/ir_readers/ie_layer_parsers.h

similarity index 100%

rename from inference-engine/src/inference_engine/ie_layer_parsers.h

rename to inference-engine/src/ir_readers/ie_layer_parsers.h
diff --git a/inference-engine/src/inference_engine/parsers.h b/inference-engine/src/ir_readers/parsers.h

similarity index 100%

rename from inference-engine/src/inference_engine/parsers.h

rename to inference-engine/src/ir_readers/parsers.h
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt

index a03a5f2..daae95a 100644 (file)
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -32,6 +32,8 @@ set_ie_threading_interface_for(${TARGET_NAME}_obj)
  target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
  
  target_include_directories(${TARGET_NAME}_obj PRIVATE ${PUBLIC_HEADERS_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${IE_MAIN_SOURCE_DIR}/src/inference_engine                                          # For CNNNetworkNGraphImpl
+    $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
      $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
      $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
      $<TARGET_PROPERTY:pugixml,INTERFACE_INCLUDE_DIRECTORIES>)
@@ -51,10 +53,9 @@ add_library(${TARGET_NAME} SHARED
  
  set_ie_threading_interface_for(${TARGET_NAME})
  
-target_link_libraries(${TARGET_NAME} PRIVATE ${NGRAPH_LIBRARIES} pugixml)
+target_link_libraries(${TARGET_NAME} PRIVATE ${NGRAPH_LIBRARIES} inference_engine_transformations pugixml)
  
  add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
-add_clang_format_target(${TARGET_NAME}_clang_format FOR_TARGETS ${TARGET_NAME})
  
  # export targets
  
@@ -67,6 +68,6 @@ ie_developer_export_targets(${TARGET_NAME})
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/legacy_api/include/cnn_network_impl.hpp b/inference-engine/src/legacy_api/include/cnn_network_impl.hpp

index 617df27..fe3bd5e 100644 (file)
--- a/inference-engine/src/legacy_api/include/cnn_network_impl.hpp
+++ b/inference-engine/src/legacy_api/include/cnn_network_impl.hpp
@@ -40,6 +40,10 @@ public:
          precision = prec;
      }
  
+    std::shared_ptr<::ngraph::Function> getFunction() noexcept override {
+        return nullptr;
+    }
+
      std::shared_ptr<const ::ngraph::Function> getFunction() const noexcept override {
          return nullptr;
      }
diff --git a/inference-engine/src/plugin_api/convert_function_to_cnn_network.hpp b/inference-engine/src/legacy_api/include/convert_function_to_cnn_network.hpp

similarity index 68%

rename from inference-engine/src/plugin_api/convert_function_to_cnn_network.hpp

rename to inference-engine/src/legacy_api/include/convert_function_to_cnn_network.hpp

index c5771d0..92c2b55 100644 (file)
--- a/inference-engine/src/plugin_api/convert_function_to_cnn_network.hpp
+++ b/inference-engine/src/legacy_api/include/convert_function_to_cnn_network.hpp
@@ -4,13 +4,18 @@
  
  #pragma once
  
-#include "cnn_network_ngraph_impl.hpp"
+#include "cnn_network_impl.hpp"
+#include <ngraph/attribute_visitor.hpp>
  
  #include <memory>
+#include <string>
+#include <vector>
  
  namespace InferenceEngine {
  namespace details {
+
  INFERENCE_ENGINE_API_CPP(std::shared_ptr<CNNNetworkImpl>)
-convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function>& graph, const CNNNetworkNGraphImpl &nGraphImpl);
+convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function>& graph, const ICNNNetwork &network);
+
  }  // namespace details
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/legacy_api/include/graph_transformer.h b/inference-engine/src/legacy_api/include/graph_transformer.h

index c96aa11..7de32ff 100644 (file)
--- a/inference-engine/src/legacy_api/include/graph_transformer.h
+++ b/inference-engine/src/legacy_api/include/graph_transformer.h
@@ -24,6 +24,7 @@ namespace InferenceEngine {
   */
  class INFERENCE_ENGINE_API_CLASS(ConstTransformer) {
  public:
+    explicit ConstTransformer(ICNNNetwork* _network);
      explicit ConstTransformer(details::CNNNetworkImpl* _network);
      explicit ConstTransformer(std::vector<DataPtr> &_inputs, std::vector<DataPtr> &_outputs);
  
diff --git a/inference-engine/src/inference_engine/ie_ngraph_utils.hpp b/inference-engine/src/legacy_api/include/ie_ngraph_utils.hpp

similarity index 94%

rename from inference-engine/src/inference_engine/ie_ngraph_utils.hpp

rename to inference-engine/src/legacy_api/include/ie_ngraph_utils.hpp

index 84b622e..cabbba0 100644 (file)
--- a/inference-engine/src/inference_engine/ie_ngraph_utils.hpp
+++ b/inference-engine/src/legacy_api/include/ie_ngraph_utils.hpp
@@ -11,7 +11,6 @@
  
  namespace InferenceEngine {
  namespace details {
-namespace ngraph {
  
  inline ::ngraph::element::Type convertPrecision(const Precision& precision) {
      Precision::ePrecision pType = precision;
@@ -22,6 +21,8 @@ inline ::ngraph::element::Type convertPrecision(const Precision& precision) {
          return ::ngraph::element::Type(::ngraph::element::Type_t::f32);
      case Precision::FP16:
          return ::ngraph::element::Type(::ngraph::element::Type_t::f16);
+    case Precision::BF16:
+        return ::ngraph::element::Type(::ngraph::element::Type_t::bf16);
      case Precision::U8:
          return ::ngraph::element::Type(::ngraph::element::Type_t::u8);
      case Precision::I8:
@@ -53,6 +54,8 @@ inline ::ngraph::element::Type convertPrecision(const std::string& precision) {
          return ::ngraph::element::Type(::ngraph::element::Type_t::f16);
      } else if (precision == "f32" || precision == "FP32") {
          return ::ngraph::element::Type(::ngraph::element::Type_t::f32);
+    } else if (precision == "bf16" || precision == "BF16") {
+        return ::ngraph::element::Type(::ngraph::element::Type_t::bf16);
      } else if (precision == "f64" || precision == "FP64") {
          return ::ngraph::element::Type(::ngraph::element::Type_t::f64);
      } else if (precision == "i8" || precision == "I8") {
@@ -90,6 +93,8 @@ inline Precision convertPrecision(const ::ngraph::element::Type& precision) {
          return Precision(Precision::FP16);
      case ::ngraph::element::Type_t::f32:
          return Precision(Precision::FP32);
+    case ::ngraph::element::Type_t::bf16:
+        return Precision(Precision::BF16);
      case ::ngraph::element::Type_t::i8:
          return Precision(Precision::I8);
      case ::ngraph::element::Type_t::i16:
@@ -113,6 +118,5 @@ inline Precision convertPrecision(const ::ngraph::element::Type& precision) {
      }
  }
  
-}  // namespace ngraph
  }  // namespace details
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/legacy_api/include/ie_util_internal.hpp b/inference-engine/src/legacy_api/include/ie_util_internal.hpp

index 8eaf449..c22e41a 100644 (file)
--- a/inference-engine/src/legacy_api/include/ie_util_internal.hpp
+++ b/inference-engine/src/legacy_api/include/ie_util_internal.hpp
@@ -6,6 +6,7 @@
  
  #include <cpp/ie_cnn_network.h>
  
+#include <ie_icnn_network.hpp>
  #include <cnn_network_impl.hpp>
  #include <file_utils.h>
  #include <deque>
@@ -50,6 +51,15 @@ cloneNet(const std::vector<InferenceEngine::CNNLayerPtr>& layers, const ICNNNetw
  IE_SUPPRESS_DEPRECATED_END
  
  /**
+ * @brief Clones the whole network without conversion to CNNNetworkImpl. All layers and data objects will be cloned
+ * @note Blobs inside layers are reused
+ * @param network A network to clone
+ * @return A cloned object
+ */
+INFERENCE_ENGINE_API_CPP(std::shared_ptr<InferenceEngine::ICNNNetwork>)
+cloneNetwork(const InferenceEngine::ICNNNetwork& network);
+
+/**
   * @brief Clones the whole network. All layers and data objects will be cloned
   * @note Blobs inside layers are reused
   * @param network A network to clone
diff --git a/inference-engine/src/legacy_api/include/layer_transform.hpp b/inference-engine/src/legacy_api/include/layer_transform.hpp

index 3759bb3..cd2e6c3 100644 (file)
--- a/inference-engine/src/legacy_api/include/layer_transform.hpp
+++ b/inference-engine/src/legacy_api/include/layer_transform.hpp
@@ -33,8 +33,8 @@ using AllLayers =
                 ReshapeLayer*, TileLayer*, ScaleShiftLayer*, PReLULayer*, PowerLayer*, BatchNormalizationLayer*,
                 ClampLayer*, TensorIterator*, LSTMCell*, GRUCell*, RNNCell*, RNNSequenceLayer*, QuantizeLayer*,
                 BinaryConvolutionLayer*, WeightableLayer*, OneHotLayer*, MathLayer*, ReduceLayer*, UniqueLayer*,
-               NonMaxSuppressionLayer*, ScatterLayer*, ExperimentalDetectronPriorGridGeneratorLayer*,
-               ExperimentalDetectronGenerateProposalsSingleImageLayer*, CNNLayer*>;
+               NonMaxSuppressionLayer*, ScatterUpdateLayer*, ExperimentalDetectronPriorGridGeneratorLayer*,
+               ExperimentalDetectronGenerateProposalsSingleImageLayer*, ExperimentalDetectronTopKROIs*, CNNLayer*>;
  
  template <class Visitor, std::size_t I = 0, typename... Tp>
  inline typename std::enable_if<I == sizeof...(Tp), void>::type visitActualLayer(std::tuple<Tp...>&& t,
diff --git a/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp b/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp

index f01b647..04830f3 100644 (file)
--- a/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp
+++ b/inference-engine/src/legacy_api/src/cnn_network_int8_normalizer.cpp
@@ -1000,7 +1000,9 @@ void CNNNetworkInt8Normalizer::QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr
                  }
                  prev = *it;
              }
-            symQuant = *(intervals.begin());
+            if (!intervals.empty()) {
+                symQuant = *(intervals.begin());
+            }
              std::set<double> divs;
              prev = 0.f;
              for (auto it = individualsG.begin(); it != individualsG.end(); it++) {
diff --git a/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp

similarity index 59%

rename from inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp

rename to inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp

index 0d41c87..3283cdc 100644 (file)
--- a/inference-engine/src/inference_engine/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -9,6 +9,7 @@
  #include <vector>
  #include <unordered_set>
  
+#include <cnn_network_ngraph_impl.hpp>
  #include "ngraph_ops/convolution_ie.hpp"
  #include "ngraph_ops/deconvolution_ie.hpp"
  #include "ngraph_ops/eltwise.hpp"
@@ -39,12 +40,318 @@
  #include "ie_profiling.hpp"
  #include "ie_cnn_layer_builder_ngraph.h"
  
+#include <debug.h>
  #include "transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp"
  #include "transformations/utils/utils.hpp"
  
  namespace InferenceEngine {
  namespace details {
-std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function>& graph, const CNNNetworkNGraphImpl &nGraphImpl) {
+
+/**
+ * @brief Creator for CNNLayer from nGraph op
+ */
+class CNNLayerCreator : public ::ngraph::AttributeVisitor {
+public:
+    using CreatorFor = std::function<CNNLayerPtr(const std::shared_ptr<::ngraph::Node>& node,
+                                                 const std::map<std::string, std::string> param)>;
+    explicit CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node);
+
+    CNNLayerPtr create();
+
+    void on_attribute(const std::string& name, std::string& value) override {
+        params[name] = value;
+    }
+
+    void on_attribute(const std::string& name, bool& value) override {
+        params[name] = value ? "true" : "false";
+    }
+
+    void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
+        for (const auto type : forTypes) {
+            creators[type] = creator;
+        }
+    }
+
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<std::string>& adapter) override {
+        std::string data = adapter.get();
+        std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) {
+            return std::tolower(c);
+        });
+        params[name] = data;
+    }
+
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override {
+        auto shape = adapter.get();
+        params[name] = joinVec(shape);
+    }
+
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<double>& adapter) override {
+        params[name] = std::to_string(adapter.get());
+    }
+
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<int64_t>& adapter) override {
+        params[name] = std::to_string(adapter.get());
+    }
+
+    void on_adapter(const std::string& name, ::ngraph::ValueAccessor<void>& adapter) override;
+
+private:
+    std::shared_ptr<::ngraph::Node> node;
+    std::map<std::string, std::string> params;
+    std::map<std::string, CreatorFor> creators;
+};
+
+void InferenceEngine::details::CNNLayerCreator::on_adapter(const std::string& name,
+                                                           ::ngraph::ValueAccessor<void>& adapter) {
+    if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::element::Type>>(&adapter)) {
+        auto type = static_cast<::ngraph::element::Type&>(*a);
+        params[name] = details::convertPrecision(type).name();
+    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::PartialShape>>(&adapter)) {
+        std::string dims;
+        auto shape = static_cast<::ngraph::PartialShape&>(*a);
+        for (size_t i = 0; i < shape.rank().get_length(); i++) {
+            if (!dims.empty()) dims += ",";
+            dims += std::to_string(shape[i].get_length());
+        }
+        params[name] = dims;
+    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::Shape>>(&adapter)) {
+        auto shape = static_cast<::ngraph::Shape&>(*a);
+        params[name] = joinVec(shape);
+    } else if (auto a = ::ngraph::as_type<::ngraph::AttributeAdapter<::ngraph::Strides>>(&adapter)) {
+        auto shape = static_cast<::ngraph::Strides&>(*a);
+        params[name] = joinVec(shape);
+    }
+}
+
+InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr<::ngraph::Node>& node): node(node) {
+    addSpecificCreator({"Parameter"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                         const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "Input",
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<CNNLayer>(attrs);
+        return res;
+    });
+    // TODO - Remove "GreaterEq" once ngraph transitions to GreaterEqual
+    addSpecificCreator({"Eltwise", "Subtract", "Power", "Maximum", "Divide", "Greater", "GreaterEqual", "FloorMod", "LogicalOr", "LogicalAnd", "LogicalXor",
+        "GreaterEq", "Less", "LessEqual", "Equal", "NotEqual", "Multiply", "Add"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                                                 const std::map<std::string, std::string> params) -> CNNLayerPtr {
+            LayerParams attrs = {node->get_friendly_name(), "Eltwise",
+                details::convertPrecision(node->get_output_element_type(0))};
+            auto res = std::make_shared<EltwiseLayer>(attrs);
+            res->params = params;
+            if (node->description() == "Maximum") {
+                res->params["operation"] = "max";
+            } else if (node->description() == "Power") {
+                res->params["operation"] = "pow";
+            } else if (node->description() == "Subtract") {
+                res->params["operation"] = "sub";
+            } else if (node->description() == "Divide") {
+                res->params["operation"] = "div";
+            } else if (node->description() == "LessEqual") {
+                res->params["operation"] = "less_equal";
+            } else if (node->description() == "Less") {
+                res->params["operation"] = "less";
+            } else if (node->description() == "Equal") {
+                res->params["operation"] = "equal";
+            } else if (node->description() == "NotEqual") {
+                res->params["operation"] = "not_equal";
+            } else if (node->description() == "FloorMod") {
+                res->params["operation"] = "floor_mod";
+            } else if (node->description() == "Multiply") {
+                res->params["operation"] = "prod";
+            } else if (node->description() == "Add") {
+                res->params["operation"] = "sum";
+            } else if (node->description() == "Greater") {
+                res->params["operation"] = "greater";
+            } else if (node->description() == "GreaterEq") {
+                res->params["operation"] = "greater_equal";
+            } else if (node->description() == "GreaterEqual") {
+                res->params["operation"] = "greater_equal";
+            } else if (node->description() == "LogicalOr") {
+                res->params["operation"] = "logical_or";
+            } else if (node->description() == "LogicalAnd") {
+                res->params["operation"] = "logical_and";
+            } else if (node->description() == "LogicalXor") {
+                res->params["operation"] = "logical_xor";
+            } else if (node->description() == "Eltwise") {
+                auto castedLayer = std::dynamic_pointer_cast<::ngraph::op::Eltwise>(node);
+                if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << attrs.type << " layer " << attrs.name;
+                std::string type;
+                switch (castedLayer->eltwise_type) {
+                case ELTWISE_TYPE::Sum:
+                    type = "sum";
+                    break;
+                case ELTWISE_TYPE::Prod:
+                    type = "prod";
+                    break;
+                default:
+                    THROW_IE_EXCEPTION << "Not supported eltwise type!";
+                }
+
+                res->params["operation"] = type;
+            }
+            return res;
+        });
+    addSpecificCreator({"Concat"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<ConcatLayer>(attrs);
+        res->params = params;
+        return res;
+    });
+    addSpecificCreator({"AvgPool", "MaxPool"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                                  const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), "Pooling",
+            details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<PoolingLayer>(attrs);
+        res->params = params;
+        if (res->params.find("auto_pad") != res->params.end() &&
+            details::CaselessEq<std::string>()(res->params["auto_pad"], "EXPLICIT"))
+            res->params.erase("auto_pad");
+
+        if (res->params.find("exclude_pad") != res->params.end()) {
+            res->params["exclude-pad"] = res->params["exclude_pad"];
+            res->params.erase("exclude_pad");
+        }
+
+        if (node->description() == "MaxPool") {
+            res->params["pool-method"] = "max";
+        } else if (node->description() == "AvgPool") {
+            res->params["pool-method"] = "avg";
+        }
+        return res;
+    });
+    addSpecificCreator({"Select"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+                             details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<SelectLayer>(attrs);
+        res->params = params;
+        return res;
+    });
+    addSpecificCreator({"BinaryConvolution"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+                             details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<BinaryConvolutionLayer>(attrs);
+
+        // todo: investigate difference between ngraph parameters for BinConvolution and the implementation above
+        // this leads to accuracy issue for Precollected_ONNX_ResNet50_88percentinto1bit e2e test
+        // res->params = params;
+
+        auto castedLayer = ::ngraph::as_type_ptr<::ngraph::op::v1::BinaryConvolution>(node);
+
+        std::string value;
+        for (const auto& val : castedLayer->get_pads_begin()) {
+            if (!value.empty()) value += ",";
+            value += Builder::asString(val);
+        }
+        res->params["pads_begin"] = value;
+
+        value.clear();
+        for (const auto& val : castedLayer->get_pads_end()) {
+            if (!value.empty()) value += ",";
+            value += Builder::asString(val);
+        }
+        res->params["pads_end"] = value;
+
+        switch (castedLayer->get_auto_pad()) {
+            case ::ngraph::op::PadType::SAME_UPPER:
+                res->params["auto_pad"] = "same_upper";
+                break;
+            case ::ngraph::op::PadType::SAME_LOWER:
+                res->params["auto_pad"] = "same_lower";
+                break;
+            case ::ngraph::op::PadType::VALID:
+                res->params["auto_pad"] = "valid";
+                break;
+            default:
+                break;
+        }
+
+        value.clear();
+        for (const auto& val : castedLayer->get_strides()) {
+            if (!value.empty()) value += ",";
+            value += Builder::asString(val);
+        }
+        res->params["strides"] = value;
+
+        value.clear();
+        for (const auto& val : castedLayer->get_dilations()) {
+            if (!value.empty()) value += ",";
+            value += Builder::asString(val);
+        }
+        res->params["dilations"] = value;
+
+        // Restore kernel size and output
+        const auto& shape = castedLayer->get_input_shape(1);
+        res->params["output"] = Builder::asString(shape[0]);
+
+        value.clear();
+        for (size_t i = 2; i < shape.size(); i++) {
+            if (!value.empty()) value += ",";
+            value += Builder::asString(shape[i]);
+        }
+        res->params["kernel"] = value;
+
+        switch (castedLayer->get_mode()) {
+            case ::ngraph::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT:
+                res->params["mode"] = "xnor-popcount";
+        }
+
+        auto weights_shape = castedLayer->input(1).get_source_output().get_shape();
+        res->params["input"] = Builder::asString(weights_shape[1]);
+        res->params["pad_value"] = Builder::asString(castedLayer->get_pad_value());
+
+        Builder::NodeConverter<::ngraph::op::Constant> converter;
+
+        const auto weightsNode = castedLayer->get_inputs()[1].get_output().get_node();
+        if (converter.canCreate(weightsNode)) {
+            const auto& weights = converter.createLayer(weightsNode);
+            res->blobs["weights"] = weights->blobs["custom"];
+            res->_weights = weights->blobs["custom"];
+        }
+        return res;
+    });
+
+    addSpecificCreator({"SpaceToBatch"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+                             details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<SpaceToBatchLayer>(attrs);
+        res->params = params;
+        return res;
+    });
+
+    addSpecificCreator({"BatchToSpace"}, [](const std::shared_ptr<::ngraph::Node>& node,
+                                      const std::map<std::string, std::string> params) -> CNNLayerPtr {
+        LayerParams attrs = {node->get_friendly_name(), node->description(),
+                             details::convertPrecision(node->get_output_element_type(0))};
+        auto res = std::make_shared<BatchToSpaceLayer>(attrs);
+        res->params = params;
+        return res;
+    });
+}
+
+CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
+    auto one_from = [](const std::string& desc, const std::vector<std::string>& descs) -> bool {
+        for (const auto& d : descs) {
+            if (details::CaselessEq<std::string>()(d, desc)) return true;
+        }
+        return false;
+    };
+    LayerParams attrs = {node->get_friendly_name(), node->description(),
+                         details::convertPrecision(node->get_output_element_type(0))};
+    if (creators.find(node->description()) != creators.end())
+        return creators[node->description()](node, params);
+
+    auto res = std::make_shared<CNNLayer>(attrs);
+    res->params = params;
+    return res;
+}
+
+std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function>& graph, const ICNNNetwork &network) {
      IE_PROFILING_AUTO_SCOPE(convertFunctionToICNNNetwork)
      const auto createCNNLayer = [](const std::shared_ptr<::ngraph::Node> &node) -> CNNLayerPtr {
          class NGraphCNNLayer: public CNNLayer {
@@ -240,8 +547,10 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
          network->setInputInfo(info);
      };
  
+    const CNNNetworkNGraphImpl* nGraphImpl = dynamic_cast<const CNNNetworkNGraphImpl*>(&network);
+
      InputsDataMap thisInputDataMap;
-    nGraphImpl.getInputsInfo(thisInputDataMap);
+    network.getInputsInfo(thisInputDataMap);
  
      // Create network
      auto cnnNetworkImpl = std::make_shared<details::CNNNetworkImpl>();
@@ -295,25 +604,25 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
              for (const auto &dim : dims) {
                  if (!dim)
                      THROW_IE_EXCEPTION << cnnLayer->type << " layer " << cnnLayer->name
-                                       << " has incorrect dimensions in the output data " << i;
+                        << " has incorrect dimensions in the output data " << i;
              }
-
-            if (!ptr && nGraphImpl._data.find(outName) != nGraphImpl._data.end()) {
-                ptr = nGraphImpl._data.at(outName);
+            if (!ptr && nGraphImpl && nGraphImpl->_data.find(outName) != nGraphImpl->_data.end()) {
+                ptr = nGraphImpl->_data.at(outName);
                  if (auto nData = std::dynamic_pointer_cast<InferenceEngine::details::NGraphData>(ptr)) {
                      const auto layout =
-                            dims.size() == nData->getTensorDesc().getDims().size() ?
-                            nData->getTensorDesc().getLayout() :
-                            TensorDesc::getLayoutByDims(dims);
+                        dims.size() == nData->getTensorDesc().getDims().size() ?
+                        nData->getTensorDesc().getLayout() :
+                        TensorDesc::getLayoutByDims(dims);
  
                      nData->reset();
                      nData->reshape(dims, layout);
                  }
                  cnnNetworkImpl->addData(outName.c_str(), ptr);
              }
+
              if (!ptr) {
                  ptr.reset(new Data(outName,
-                                   {details::ngraph::convertPrecision(layer->get_output_element_type(i)), dims,
+                                   {details::convertPrecision(layer->get_output_element_type(i)), dims,
                                      TensorDesc::getLayoutByDims(dims)}));
              }
  
diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp

index 3b0be37..cb5afbf 100644 (file)
--- a/inference-engine/src/legacy_api/src/graph_transformer.cpp
+++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp
@@ -17,6 +17,7 @@
  #include <vector>
  #include <mutex>
  
+#include <cnn_network_ngraph_impl.hpp>
  #include "blob_factory.hpp"
  #include "cnn_network_impl.hpp"
  #include "graph_tools.hpp"
@@ -70,6 +71,19 @@ ConstTransformer::ConstTransformer(details::CNNNetworkImpl* _network)
          THROW_IE_EXCEPTION << "[ERROR]: Failed to init ConstTransformer with null pointer of network";
  }
  
+ConstTransformer::ConstTransformer(ICNNNetwork* _network) {
+    if (auto cnnNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(_network)) {
+        network = cnnNet;
+    } else if (auto nGraphNet = dynamic_cast<InferenceEngine::details::CNNNetworkNGraphImpl *>(_network)) {
+        if (auto cnnNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(nGraphNet->getCNNNetwork().get()))
+            network = cnnNet;
+    }
+    if (!network)
+        THROW_IE_EXCEPTION << "[ERROR]: Failed to init ConstTransformer with unsupported network type";
+    inputs = get_inputs(network);
+    outputs = get_outputs(network);
+}
+
  ConstTransformer::ConstTransformer(std::vector<DataPtr> &_inputs, std::vector<DataPtr> &_outputs)
          : inputs(_inputs), outputs(_outputs), network(nullptr) {
      if (inputs.empty() || outputs.empty())
diff --git a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp

similarity index 90%

rename from inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp

rename to inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp

index 4b8606c..02359c9 100644 (file)
--- a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@@ -3,7 +3,7 @@
  //
  
  #include <ie_cnn_layer_builder_ngraph.h>
-#include "cnn_network_ngraph_impl.hpp"
+#include <cnn_network_ngraph_impl.hpp>
  #include <precision_utils.h>
  #include <cpp/ie_cnn_network.h>
  
@@ -72,7 +72,7 @@ std::string asString<float>(const float& value) {
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Abs>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Abs",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -83,7 +83,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::GenericIE>::createLayer(const std::share
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get layer " << layer->get_friendly_name();
  
      LayerParams params = {layer->get_friendly_name(), castedLayer->getType(),
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      if (castedLayer->getType() == "RNNCell")
          res = std::make_shared<InferenceEngine::RNNCell>(params);
@@ -232,7 +232,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::TensorIterator>::createLayer(const std::
  
      // Create Inference Engine representation of TensorIterator
      LayerParams params = {layer->get_friendly_name(), "TensorIterator",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::TensorIterator>(params);
  
      // Body: inputs
@@ -365,7 +365,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::TensorIterator>::createLayer(const std::
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Constant>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Const",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Constant>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -377,9 +377,9 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Constant>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Convert>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Convert",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
-    auto p = details::ngraph::convertPrecision(layer->get_output_element_type(0));
+    auto p = details::convertPrecision(layer->get_output_element_type(0));
      std::string precision_str;
      switch (p) {
      case Precision::FP16:
@@ -423,7 +423,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Convert>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Ceiling>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Ceiling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -431,7 +431,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Ceiling>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Floor>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Floor",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -439,7 +439,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Floor>::createLayer(const std::shared_pt
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Sigmoid>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Sigmoid",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -447,7 +447,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Sigmoid>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Tanh>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "TanH",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -455,7 +455,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Tanh>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Relu>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReLU",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReLULayer>(params);
      return res;
  }
@@ -463,7 +463,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Relu>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::SeluIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Selu",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::SeluIE>(layer);
@@ -477,7 +477,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::SeluIE>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ReLUIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReLU",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReLULayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ReLUIE>(layer);
@@ -490,7 +490,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReLUIE>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Range>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Range",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -498,7 +498,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Range>::createLayer(const std::shared_pt
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Exp>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Exp",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -506,7 +506,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Exp>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::MVN>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "MVN",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::MVNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::MVN>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -529,7 +529,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::LRN>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::LRN_IE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Norm",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::NormLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::LRN_IE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -545,7 +545,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::LRN_IE>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::CropIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Crop",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CropLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::CropIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -577,7 +577,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::CropIE>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Clamp>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Clamp",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ClampLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Clamp>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -590,7 +590,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Clamp>::createLayer(const std::shared_pt
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Softmax>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "SoftMax",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::SoftMaxLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Softmax>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -602,7 +602,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Softmax>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Subtract>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "sub";
      return res;
@@ -611,7 +611,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Subtract>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Power>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "pow";
      return res;
@@ -620,7 +620,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Power>::createLayer(const std::share
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Maximum>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "max";
      return res;
@@ -634,7 +634,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Minimum>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Divide>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "div";
      return res;
@@ -643,7 +643,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Divide>::createLayer(const std::shar
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Multiply>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "prod";
      return res;
@@ -652,7 +652,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Multiply>::createLayer(const std::sh
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Add>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "sum";
      return res;
@@ -673,7 +673,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::BatchNormInference>::createLayer(
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Squeeze>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Squeeze",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Squeeze>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -684,7 +684,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Squeeze>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Unsqueeze>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Unsqueeze",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Unsqueeze>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -695,7 +695,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Unsqueeze>::createLayer(const std::share
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::FakeQuantize>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "FakeQuantize",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::QuantizeLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::FakeQuantize>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -707,7 +707,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ConvolutionIE>::createLayer(
          const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Convolution",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ConvolutionLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ConvolutionIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -793,7 +793,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::DeconvolutionIE>::createLayer(
          const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Deconvolution",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::DeconvolutionLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::DeconvolutionIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -861,7 +861,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::DeformableConvolution>::createLayer(
          const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "DeformableConvolution",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::DeformableConvolutionLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::DeformableConvolution>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -935,7 +935,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::DeformableConvolution>::createLayer(
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::AvgPool>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Pooling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::PoolingLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::AvgPool>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1001,7 +1001,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::AvgPool>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::MaxPool>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Pooling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::PoolingLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::MaxPool>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1066,7 +1066,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::MaxPool>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ROIPooling>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ROIPooling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ROIPooling>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1082,7 +1082,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ROIPooling>::createLayer(const std::shar
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PSROIPooling>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "PSROIPooling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PSROIPooling>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1101,7 +1101,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::DeformablePSROIPooling>::createLayer(
          const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "PSROIPooling",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::DeformablePSROIPooling>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1125,7 +1125,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::DeformablePSROIPooling>::createLayer
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PRelu>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "PReLU",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::PReLULayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PRelu>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1154,7 +1154,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::PRelu>::createLayer(const std::shared_pt
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Split>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Split",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::SplitLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Split>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1175,7 +1175,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Split>::createLayer(const std::share
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::VariadicSplit>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Split",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::SplitLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::VariadicSplit>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1196,7 +1196,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::VariadicSplit>::createLayer(const std::s
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Concat>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Concat",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ConcatLayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Concat>(layer);
@@ -1210,7 +1210,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Concat>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::GatherIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Gather",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::GatherLayer>(params);
  
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::GatherIE>(layer);
@@ -1229,14 +1229,14 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::GatherTree>::createLayer(const std::
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::GatherTreeIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "GatherTree",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ReverseSequence>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "ReverseSequence", details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+    LayerParams params = {layer->get_friendly_name(), "ReverseSequence", details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReverseSequenceLayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ReverseSequence>(layer);
@@ -1252,7 +1252,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReverseSequence>::createLayer(const std:
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Reshape>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Reshape",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReshapeLayer>(params);
      return res;
  }
@@ -1260,7 +1260,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Reshape>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ShapeOf",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -1268,7 +1268,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ShapeOf>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::Reshape>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Reshape",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Reshape>(layer);
      if (castedLayer == nullptr)
@@ -1293,7 +1293,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::Reshape>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PadIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Pad",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::PadLayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PadIE>(layer);
@@ -1333,7 +1333,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::PadIE>::createLayer(const std::shared_pt
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ScaleShiftIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ScaleShift",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ScaleShiftLayer>(params);
  
      NodeConverter<ngraph::op::Constant> converter;
@@ -1357,7 +1357,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ScaleShiftIE>::createLayer(const std::sh
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Elu>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "elu",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Elu>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1370,7 +1370,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Elu>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::SquaredDifference>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      res->params["operation"] = "squared_diff";
      return res;
@@ -1380,7 +1380,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
      const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "DetectionOutput",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::DetectionOutput>(layer);
@@ -1416,7 +1416,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Transpose>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Permute",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
  
      NodeConverter<ngraph::op::Constant> converter;
@@ -1444,7 +1444,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Proposal>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ProposalIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Proposal",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ProposalIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1484,7 +1484,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PriorBoxClusteredIE>::createLayer(
      const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "PriorBoxClustered",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PriorBoxClusteredIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1533,7 +1533,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::PriorBoxClustered>::createLayer(
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PriorBoxIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "PriorBox",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PriorBoxIE>(layer);
      auto layer_info = params.type + " layer " + params.name;
@@ -1613,7 +1613,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::PriorBox>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::PowerIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Power",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::PowerLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::PowerIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1628,7 +1628,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::PowerIE>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::TopK>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "TopK",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::TopKLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::TopK>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1672,7 +1672,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::TopK>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::TopKIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "TopK",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::TopKLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::TopKIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1687,7 +1687,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::TopKIE>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Eltwise>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Eltwise",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::EltwiseLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Eltwise>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1712,7 +1712,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Eltwise>::createLayer(const std::shared_
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::TileIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Tile",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::TileLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::TileIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1725,7 +1725,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::TileIE>::createLayer(const std::shared_p
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ResampleV2>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "Resample", details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+    LayerParams params = {layer->get_friendly_name(), "Resample", details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ResampleV2>(layer);
      if (castedLayer == nullptr)
@@ -1752,7 +1752,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ResampleV2>::createLayer(const std::shar
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Interp>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Resample",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::Interp>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
  
@@ -1766,7 +1766,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Interp>::createLayer(const std::shared_p
      }
  
      params = {layer->get_friendly_name(), "Interp",
-              details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+              details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
  
      res->params["height"] = asString(attrs.height);
@@ -1786,7 +1786,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Interpolate>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::FullyConnected>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "FullyConnected",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::FullyConnected>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1824,7 +1824,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::LSTMCell>::createLayer(const std::shared
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::LSTMCellIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "LSTMCell",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::LSTMCellIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
  
@@ -1872,7 +1872,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::LSTMCellIE>::createLayer(const std::shar
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::MatMul>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Gemm",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
  
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::MatMul>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1887,7 +1887,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::MatMul>::createLayer(const std::shared_p
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::RegionYolo>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "RegionYolo",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::RegionYolo>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1920,7 +1920,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::RegionYolo>::createLayer(const std::shar
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::ReorgYolo>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReorgYolo",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::ReorgYolo>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1938,7 +1938,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::ReorgYolo>::createLayer(const std::share
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMin>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReduceMin",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::ReduceMin>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1950,7 +1950,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMin>::createLayer(const std::s
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMax>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReduceMax",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::ReduceMax>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1962,7 +1962,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMax>::createLayer(const std::s
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMean>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReduceMean",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::ReduceMean>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1974,7 +1974,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceMean>::createLayer(const std::
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceProd>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReduceProd",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::ReduceProd>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -1986,7 +1986,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceProd>::createLayer(const std::
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceSum>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "ReduceSum",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::ReduceSum>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -2003,7 +2003,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::NormalizeL2>::createLayer(const std::sha
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Log>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Log",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2011,7 +2011,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Log>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::NormalizeIE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Normalize",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::NormLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::NormalizeIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -2025,6 +2025,8 @@ CNNLayer::Ptr NodeConverter<ngraph::op::NormalizeIE>::createLayer(const std::sha
      if (converter.canCreate(weightsNode)) {
          const auto& weights = converter.createLayer(weightsNode);
          res->blobs["weights"] = weights->blobs["custom"];
+    } else {
+        THROW_IE_EXCEPTION << "Cannot convert weight node for NormalizeIE op";
      }
  
      return res;
@@ -2034,7 +2036,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::CTCGreedyDecoder>::createLayer(
      const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "CTCGreedyDecoder",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = ngraph::as_type_ptr<ngraph::op::CTCGreedyDecoder>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -2046,7 +2048,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::CTCGreedyDecoder>::createLayer(
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Erf>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Erf",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2054,7 +2056,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Erf>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Sign>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Sign",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2062,7 +2064,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Sign>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Sin>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Sin",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2070,7 +2072,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Sin>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Sinh>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Sinh",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2078,7 +2080,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Sinh>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Asin>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Asin",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2086,7 +2088,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Asin>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Cos>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Cos",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2094,7 +2096,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Cos>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Cosh>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Cosh",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2102,7 +2104,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Cosh>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Acos>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Acos",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2110,7 +2112,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Acos>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Tan>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Tan",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2118,7 +2120,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Tan>::createLayer(const std::shared_ptr<
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Atan>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Atan",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2126,7 +2128,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::Atan>::createLayer(const std::shared_ptr
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::Sqrt>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "Sqrt",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      return res;
  }
@@ -2142,7 +2144,7 @@ template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::StridedSliceIE>::createLayer(
          const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "StridedSlice",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::StridedSliceLayer>(params);
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::StridedSliceIE>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
@@ -2209,7 +2211,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::HardSigmoid>::createLayer(const std::sha
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::HardSigmoid_IE>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = { layer->get_friendly_name(), "HardSigmoid", details::ngraph::convertPrecision(layer->get_output_element_type(0)) };
+    LayerParams params = { layer->get_friendly_name(), "HardSigmoid", details::convertPrecision(layer->get_output_element_type(0)) };
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::HardSigmoid_IE>(layer);
      if (castedLayer == nullptr)
@@ -2223,7 +2225,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::HardSigmoid_IE>::createLayer(const std::
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::GRN>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
      LayerParams params = {layer->get_friendly_name(), "GRN",
-                          details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+                          details::convertPrecision(layer->get_output_element_type(0))};
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::GRN>(layer);
      if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
  
@@ -2234,7 +2236,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::GRN>::createLayer(const std::shared_ptr<
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::LogicalNot>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "Activation", details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+    LayerParams params = {layer->get_friendly_name(), "Activation", details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
      res->params["type"] = "not";
      return res;
@@ -2242,7 +2244,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::LogicalNot>::createLayer(const std::
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceLogicalAnd>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "ReduceAnd", details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+    LayerParams params = {layer->get_friendly_name(), "ReduceAnd", details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
  
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::v1::ReduceLogicalAnd>(layer);
@@ -2254,7 +2256,7 @@ CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceLogicalAnd>::createLayer(const
  
  template <>
  CNNLayer::Ptr NodeConverter<ngraph::op::v1::ReduceLogicalOr>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
-    LayerParams params = {layer->get_friendly_name(), "ReduceOr", details::ngraph::convertPrecision(layer->get_output_element_type(0))};
+    LayerParams params = {layer->get_friendly_name(), "ReduceOr", details::convertPrecision(layer->get_output_element_type(0))};
      auto res = std::make_shared<InferenceEngine::ReduceLayer>(params);
  
      auto castedLayer = std::dynamic_pointer_cast<ngraph::op::v1::ReduceLogicalOr>(layer);
diff --git a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.h b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.h

similarity index 96%

rename from inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.h

rename to inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.h

index 86ba9bf..378aaac 100644 (file)
--- a/inference-engine/src/inference_engine/ie_cnn_layer_builder_ngraph.h
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.h
@@ -93,7 +93,7 @@ private:
  
      Blob::Ptr shareWeights(const std::shared_ptr<ngraph::op::Constant>& constLayer) const {
          if (!constLayer) THROW_IE_EXCEPTION << "Cannot share weights! Constant operation is empty!";
-        auto dataPrecision = details::ngraph::convertPrecision(constLayer->get_element_type());
+        auto dataPrecision = details::convertPrecision(constLayer->get_element_type());
  
          size_t shapeSize = ngraph::shape_size(constLayer->get_shape());
          if (dataPrecision == Precision::BIN) {
diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp

index ccf9f74..9c38fe6 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
+++ b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp
@@ -2447,7 +2447,9 @@ void PriorBoxClusteredValidator::checkShapes(const CNNLayer* layer, const std::v
  PriorBoxClusteredValidator::PriorBoxClusteredValidator(const std::string& _type): LayerValidator(_type) {}
  
  void ProposalValidator::parseParams(CNNLayer* layer) {
-    layer->params["num_outputs"] = std::to_string(layer->outData.size());
+    if (layer->params.find("num_outputs") == layer->params.end()) {
+        layer->params["num_outputs"] = std::to_string(layer->outData.size());
+    }
  }
  
  void ProposalValidator::checkParams(const CNNLayer* layer) {
@@ -3074,52 +3076,55 @@ void NMSValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>&
          THROW_IE_EXCEPTION << layer->name << " 'score_threshold' should be scalar";
  }
  
-ScatterValidator::ScatterValidator(const std::string& _type): LayerValidator(_type) {}
+ScatterUpdateValidator::ScatterUpdateValidator(const std::string& _type): LayerValidator(_type) {}
  
-void ScatterValidator::parseParams(CNNLayer* layer) {
-    auto casted = dynamic_cast<ScatterLayer*>(layer);
+void ScatterUpdateValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<ScatterUpdateLayer*>(layer);
      if (!casted) {
-        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterUpdateLayer class";
      }
-
-    casted->axis = casted->GetParamAsInt("axis", 0);
  }
  
-void ScatterValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
-    auto casted = dynamic_cast<const ScatterLayer*>(layer);
+void ScatterUpdateValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const ScatterUpdateLayer*>(layer);
      if (!casted) {
-        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ScatterUpdateLayer class";
      }
  
      size_t numInputs = inShapes.size();
-    if (numInputs != 3)
-        THROW_IE_EXCEPTION << layer->name << " Scatter can take only 3 inputs, but actually it has: " << numInputs;
+    if (numInputs != 4)
+        THROW_IE_EXCEPTION << layer->name << " Scatter can take only 4 inputs, but actually it has: " << numInputs;
  
-    if (!(-static_cast<int>(inShapes[0].size()) <= casted->axis && casted->axis < static_cast<int>(inShapes[0].size())))
-        THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
+    static constexpr int DATA = 0;
+    static constexpr int INDICES = 1;
+    static constexpr int UPDATES = 2;
+    static constexpr int AXIS = 3;
  
-    if (inShapes[0].size() == 0 || (inShapes[0].size() == 1 && inShapes[0][0] == 1))
-        THROW_IE_EXCEPTION << layer->name << " 'Data' tensor rank should be >= 1";
+    if (inShapes[DATA].size() < 1)
+        THROW_IE_EXCEPTION << layer->name << " 'Data' tensor rank must be >= 1";
  
-    if (inShapes[1].size() == 0 || (inShapes[1].size() == 1 && inShapes[1][0] == 1))
-        THROW_IE_EXCEPTION << layer->name << " 'Indexes' tensor rank should be >= 1";
+    if (inShapes[INDICES].size() < 1)
+        THROW_IE_EXCEPTION << layer->name << " 'Indices' tensor rank must be >= 1";
  
-    if (inShapes[1].size() == 0 || (inShapes[1].size() == 1 && inShapes[1][0] == 1))
-        THROW_IE_EXCEPTION << layer->name << " 'Updates' tensor rank should be >= 1";
+    if (inShapes[UPDATES].size() < 1)
+        THROW_IE_EXCEPTION << layer->name << " 'Updates' tensor rank must be >= 1";
  
-    if (inShapes[1] != inShapes[2])
-        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indexes' and 'updates' tensors dimension";
+    if (!(inShapes[AXIS].size() == 1 && inShapes[AXIS][0] == 1))
+        THROW_IE_EXCEPTION << layer->name << " 'Axis' tensor must be 1D array of 1 element";
  
-    const size_t SCATTER_DATA = 0;
-    const size_t SCATTER_INDEXES = 1;
-    const size_t SCATTER_UPDATES = 2;
+    if (inShapes[UPDATES].size() != inShapes[INDICES].size() + inShapes[DATA].size() - 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indexes' and 'updates' tensors dimension";
  
-    Precision inIdxPrecision = layer->insData[SCATTER_INDEXES].lock()->getTensorDesc().getPrecision();
+    Precision inIdxPrecision = layer->insData[INDICES].lock()->getTensorDesc().getPrecision();
      if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
-        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indexes' precision. Only FP32 or I32 are supported!";
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Indices' precision. Only FP32 or I32 are supported!";
+
+    Precision inAxisPrecision = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+    if (inAxisPrecision != Precision::FP32 && inAxisPrecision != Precision::I32)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input 'Axis' precision. Only FP32 or I32 are supported!";
  
-    if (layer->insData[SCATTER_DATA].lock()->getTensorDesc().getPrecision() !=
-        layer->insData[SCATTER_UPDATES].lock()->getTensorDesc().getPrecision())
+    if (layer->insData[DATA].lock()->getTensorDesc().getPrecision() !=
+        layer->insData[UPDATES].lock()->getTensorDesc().getPrecision())
          THROW_IE_EXCEPTION << layer->name << " Precision should be equal for input tensors 'Data' and 'Updates'";
  }
  
@@ -3248,7 +3253,7 @@ LayerValidators::LayerValidators() {
      REG_LAYER_VALIDATOR_FOR_TYPE(TopKValidator, TopK);
      REG_LAYER_VALIDATOR_FOR_TYPE(UniqueValidator, Unique);
      REG_LAYER_VALIDATOR_FOR_TYPE(NMSValidator, NonMaxSuppression);
-    REG_LAYER_VALIDATOR_FOR_TYPE(ScatterValidator, ScatterUpdate);
+    REG_LAYER_VALIDATOR_FOR_TYPE(ScatterUpdateValidator, ScatterUpdate);
  }
  
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.hpp b/inference-engine/src/legacy_api/src/ie_layer_validators.hpp

index 0ac1c97..6cbd18d 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_layer_validators.hpp
+++ b/inference-engine/src/legacy_api/src/ie_layer_validators.hpp
@@ -969,9 +969,9 @@ public:
      void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
  };
  
-class ScatterValidator : public LayerValidator {
+class ScatterUpdateValidator : public LayerValidator {
  public:
-    explicit ScatterValidator(const std::string& _type);
+    explicit ScatterUpdateValidator(const std::string& _type);
  
      void parseParams(CNNLayer* layer) override;
  
diff --git a/inference-engine/src/legacy_api/src/ie_layers.cpp b/inference-engine/src/legacy_api/src/ie_layers.cpp

index f561ddc..e58e562 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_layers.cpp
+++ b/inference-engine/src/legacy_api/src/ie_layers.cpp
@@ -67,6 +67,7 @@ ReduceLayer::~ReduceLayer() {}
  TopKLayer::~TopKLayer() {}
  UniqueLayer::~UniqueLayer() {}
  NonMaxSuppressionLayer::~NonMaxSuppressionLayer() {}
-ScatterLayer::~ScatterLayer() {}
+ScatterUpdateLayer::~ScatterUpdateLayer() {}
  ExperimentalDetectronPriorGridGeneratorLayer::~ExperimentalDetectronPriorGridGeneratorLayer() {}
  ExperimentalDetectronGenerateProposalsSingleImageLayer::~ExperimentalDetectronGenerateProposalsSingleImageLayer() {}
+ExperimentalDetectronTopKROIs::~ExperimentalDetectronTopKROIs() {}
diff --git a/inference-engine/src/legacy_api/src/ie_util_internal.cpp b/inference-engine/src/legacy_api/src/ie_util_internal.cpp

index 93f0317..8fb75b4 100644 (file)
--- a/inference-engine/src/legacy_api/src/ie_util_internal.cpp
+++ b/inference-engine/src/legacy_api/src/ie_util_internal.cpp
@@ -77,9 +77,10 @@ CNNLayerPtr layerCloneImpl<TensorIterator>(const CNNLayer* source) {
  CNNLayerPtr clonelayer(const CNNLayer& source) {
      using fptr = CNNLayerPtr (*)(const CNNLayer*);
      // Most derived layers must go first in this list
-    static const fptr cloners[] = {&layerCloneImpl<ExperimentalDetectronGenerateProposalsSingleImageLayer>,
+    static const fptr cloners[] = {&layerCloneImpl<ExperimentalDetectronTopKROIs>,
+                                   &layerCloneImpl<ExperimentalDetectronGenerateProposalsSingleImageLayer>,
                                     &layerCloneImpl<ExperimentalDetectronPriorGridGeneratorLayer>,
-                                   &layerCloneImpl<ScatterLayer>,
+                                   &layerCloneImpl<ScatterUpdateLayer>,
                                     &layerCloneImpl<NonMaxSuppressionLayer>,
                                     &layerCloneImpl<SelectLayer>,
                                     &layerCloneImpl<BatchNormalizationLayer>,
@@ -145,6 +146,35 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
      return nullptr;  // Silence "control may reach end of non-void function" warning
  }
  
+std::shared_ptr<ICNNNetwork> cloneNetwork(const ICNNNetwork& network) {
+    if (auto func = network.getFunction()) {
+        CNNNetwork net(func);
+
+        InputsDataMap originInputs;
+        OutputsDataMap originOutputs;
+        network.getInputsInfo(originInputs);
+        network.getOutputsInfo(originOutputs);
+        InputsDataMap clonedInputs = net.getInputsInfo();
+        OutputsDataMap clonedOutputs = net.getOutputsInfo();
+
+        for (const auto& outputInfo : originOutputs) {
+            if (clonedOutputs.find(outputInfo.first) == clonedOutputs.end())
+                THROW_IE_EXCEPTION << "Cannot clone network! Cloned network doesn't contain all outputs";
+            clonedOutputs[outputInfo.first]->setPrecision(outputInfo.second->getPrecision());
+            clonedOutputs[outputInfo.first]->setLayout(outputInfo.second->getLayout());
+        }
+        for (const auto& inputInfo : originInputs) {
+            if (clonedInputs.find(inputInfo.first) == clonedInputs.end())
+                THROW_IE_EXCEPTION << "Cannot clone network! Cloned network doesn't contain all inputs";
+            clonedInputs[inputInfo.first]->setPrecision(inputInfo.second->getPrecision());
+            clonedInputs[inputInfo.first]->setLayout(inputInfo.second->getLayout());
+            clonedInputs[inputInfo.first]->getPreProcess() = inputInfo.second->getPreProcess();
+        }
+        return net;
+    }
+
+    return cloneNet(network);
+}
  details::CNNNetworkImplPtr cloneNet(const ICNNNetwork& network) {
      std::vector<CNNLayerPtr> layers;
      details::CNNNetworkIterator i(&network);
diff --git a/inference-engine/src/legacy_api/src/layer_transform.cpp b/inference-engine/src/legacy_api/src/layer_transform.cpp

index 0cce4e1..294d574 100644 (file)
--- a/inference-engine/src/legacy_api/src/layer_transform.cpp
+++ b/inference-engine/src/legacy_api/src/layer_transform.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/legacy_api/src/network_serializer.cpp b/inference-engine/src/legacy_api/src/network_serializer.cpp

index f36d535..442dc5f 100644 (file)
--- a/inference-engine/src/legacy_api/src/network_serializer.cpp
+++ b/inference-engine/src/legacy_api/src/network_serializer.cpp
@@ -487,6 +487,7 @@ std::size_t FillXmlDoc(const InferenceEngine::ICNNNetwork& network, pugi::xml_do
          if (dumpWeights && !node->blobs.empty()) {
              auto blobsNode = layer.append_child("blobs");
              for (const auto& dataIt : node->blobs) {
+                if (!dataIt.second) continue;
                  size_t dataSize = dataIt.second->byteSize();
                  pugi::xml_node data = blobsNode.append_child(dataIt.first.c_str());
                  data.append_attribute("offset").set_value(dataOffset);
@@ -546,11 +547,12 @@ void SerializeBlobs(std::ostream& stream, const InferenceEngine::ICNNNetwork& ne
      for (auto&& node : ordered) {
          if (!node->blobs.empty()) {
              for (const auto& dataIt : node->blobs) {
+                if (!dataIt.second) continue;
                  const char* dataPtr = dataIt.second->buffer().as<char*>();
                  size_t dataSize = dataIt.second->byteSize();
                  stream.write(dataPtr, dataSize);
                  if (!stream.good()) {
-                    THROW_IE_EXCEPTION << "Error during writing blob waights";
+                    THROW_IE_EXCEPTION << "Error during writing blob weights";
                  }
              }
          }
diff --git a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_built_in_holder.cpp b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_built_in_holder.cpp

index 1ae2e61..8dd5294 100644 (file)
--- a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_built_in_holder.cpp
+++ b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_built_in_holder.cpp
@@ -248,7 +248,7 @@ REG_SHAPE_INFER_FOR_TYPE(GatherTreeShapeProp, GatherTree);
  REG_SHAPE_INFER_FOR_TYPE(TopKShapeProp, TopK);
  REG_SHAPE_INFER_FOR_TYPE(UniqueShapeProp, Unique);
  REG_SHAPE_INFER_FOR_TYPE(NMSShapeProp, NonMaxSuppression);
-REG_SHAPE_INFER_FOR_TYPE(ScatterShapeProp, Scatter);
+REG_SHAPE_INFER_FOR_TYPE(ScatterUpdateShapeProp, ScatterUpdate);
  
  }  // namespace ShapeInfer
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_scatter_shape_infer.hpp b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_scatter_shape_infer.hpp

index 19cfbb8..04b7f6a 100644 (file)
--- a/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_scatter_shape_infer.hpp
+++ b/inference-engine/src/legacy_api/src/shape_infer/built-in/ie_scatter_shape_infer.hpp
@@ -15,19 +15,19 @@ namespace InferenceEngine {
  namespace ShapeInfer {
  
  /**
- *@brief Implementation of Shape inference for Scatter layer
+ *@brief Implementation of Shape inference for ScatterUpdate layer
   */
-class ScatterShapeProp : public BuiltInShapeInferImpl {
+class ScatterUpdateShapeProp : public BuiltInShapeInferImpl {
  public:
-    explicit ScatterShapeProp(const std::string& type): BuiltInShapeInferImpl(type) {}
+    explicit ScatterUpdateShapeProp(const std::string& type): BuiltInShapeInferImpl(type) {}
  
      void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs, const std::map<std::string, std::string>& params,
                           const std::map<std::string, Blob::Ptr>& blobs, std::vector<SizeVector>& outShapes) override {
          LayerParams lp {};
-        ScatterLayer scatterLayer(lp);
-        scatterLayer.params = params;
-        scatterLayer.type = _type;
-        validate(&scatterLayer, inBlobs, params, blobs);
+        ScatterUpdateLayer scatterUpdateLayer(lp);
+        scatterUpdateLayer.params = params;
+        scatterUpdateLayer.type = _type;
+        validate(&scatterUpdateLayer, inBlobs, params, blobs);
  
          outShapes = {inShapes[0]};
      }
diff --git a/inference-engine/src/low_precision_transformations/CMakeLists.txt b/inference-engine/src/low_precision_transformations/CMakeLists.txt

index aa27efe..1067869 100644 (file)
--- a/inference-engine/src/low_precision_transformations/CMakeLists.txt
+++ b/inference-engine/src/low_precision_transformations/CMakeLists.txt
@@ -33,7 +33,6 @@ target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
         $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
  
  add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
-add_clang_format_target(${TARGET_NAME}_clang_format FOR_TARGETS ${TARGET_NAME})
  
  # developer package
  
@@ -42,6 +41,6 @@ ie_developer_export_targets(${TARGET_NAME})
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp

new file mode 100644 (file)

index 0000000..0558bd1
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
@@ -0,0 +1,238 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bf16transformer.h"
+#include <string>
+#include <vector>
+#include <fstream>
+#include <utility>
+#include <set>
+#include <chrono>
+#include "details/ie_cnn_network_tools.h"
+#include "ie_util_internal.hpp"
+#include "ngraph/type/bfloat16.hpp"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+void precisionColoringBF16(const CNNLayerPtr layer,
+                           ordered_properties &printed_properties,
+                           ordered_properties &node_properties) {
+    if (layer && !layer->insData.empty() && layer->input()) {
+        printed_properties.insert(printed_properties.begin(),
+                                  std::pair<std::string, std::string>("Precision",
+                                                                      layer->input()->getPrecision() == Precision::FP32 ? "FP32" : "BF16"));
+
+        if (layer->input()->getPrecision() == Precision::FP32) {
+            node_properties.emplace_back("fillcolor", "#5A5DF0");
+        } else {
+            node_properties.emplace_back("fillcolor", "#20F608");
+        }
+    }
+}
+
+void BF16Transformer::convertToFloat(InferenceEngine::CNNNetwork &network) {
+    // go over all edges and all edges having FP32 mark as BF16
+    std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
+    InputsDataMap inputs = network.getInputsInfo();
+    OutputsDataMap outputs = network.getOutputsInfo();
+    for (auto iter : sortedLayers) {
+        for (size_t o = 0; o < iter->outData.size(); o++) {
+            if (inputs.find(iter->outData[o]->getName()) == inputs.end()
+                && outputs.find(iter->outData[o]->getName()) == outputs.end()
+                && iter->outData[o]->getPrecision() == Precision::BF16) {
+                iter->outData[o]->setPrecision(Precision::FP32);
+            }
+        }
+    }
+}
+
+void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
+    // go over all edges and all edges having FP32 mark as BF16
+    std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
+    InputsDataMap inputs = network.getInputsInfo();
+    OutputsDataMap outputs = network.getOutputsInfo();
+    for (auto iter : sortedLayers) {
+        for (size_t o = 0; o < iter->outData.size(); o++) {
+            if (inputs.find(iter->outData[o]->getName()) == inputs.end()
+                && outputs.find(iter->outData[o]->getName()) == outputs.end()
+                && iter->outData[o]->getPrecision() == Precision::FP32) {
+                iter->outData[o]->setPrecision(Precision::BF16);
+            }
+        }
+    }
+
+    // convert all edges back to FP32 on demand
+    optimizeToFloat(network);
+}
+
+void BF16Transformer::optimizeToFloat(InferenceEngine::CNNNetwork &network) {
+    std::set<DataPtr> toAnalyzeTensors;
+    std::set<DataPtr> immutable;
+    bool hasBF16Tensor = false;
+    std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
+    // 1. Verify if we do not have bf16 tensors - it's better to return early and not to try to return anything since there is no such tensors
+    for (auto iter : sortedLayers) {
+        for (size_t i = 0; i < iter->insData.size(); i++) {
+            if (iter->insData[i].lock()->getTensorDesc().getPrecision() == Precision::BF16) {
+                hasBF16Tensor = true;
+            }
+        }
+        for (size_t o = 0; o < iter->outData.size(); o++) {
+            if (iter->outData[o]->getTensorDesc().getPrecision() == Precision::BF16) {
+                hasBF16Tensor = true;
+            }
+        }
+    }
+    if (!hasBF16Tensor) {
+        return;
+    }
+    // 2a. go over all inputs and outputs and put them to the toAnalyzeTensors
+    InputsDataMap inputs = network.getInputsInfo();
+    for (auto input : inputs) {
+        immutable.insert(input.second->getInputData());
+        if (input.second->getInputData()->getTensorDesc().getPrecision() != Precision::BF16) {
+            toAnalyzeTensors.insert(input.second->getInputData());
+        }
+    }
+
+    OutputsDataMap outputs = network.getOutputsInfo();
+    for (auto output : outputs) {
+        immutable.insert(output.second);
+        if (output.second->getTensorDesc().getPrecision() != Precision::BF16) {
+            toAnalyzeTensors.insert(output.second);
+        }
+    }
+
+    // 2b. go over all unknown layers for this algo and mark them as fp32 and add to the toAnalyzeTensors
+    // 2c. go over all inputs to _initbf16 and if they are fp32 - add them to the toAnalyzeTensors
+    for (auto iter : sortedLayers) {
+        if (_initbf16.find(iter->type) == _initbf16.end()
+            && _complementbf16.find(iter->type) == _complementbf16.end()
+            && _multiinput.find(iter->type) == _multiinput.end()) {
+            // try to mark inputs of the unknown layer
+            for (size_t i = 0; i < iter->insData.size(); i++) {
+                if (iter->insData[i].lock()->getPrecision() == Precision::BF16) {
+                    bool marked = tryToMarkFP32(iter->insData[i].lock(), immutable);
+                    if (marked) {
+                        toAnalyzeTensors.insert(iter->insData[i].lock());
+                    }
+                }
+            }
+            // try to mark outputs of the unknown layer
+            for (size_t o = 0; o < iter->outData.size(); o++) {
+                if (iter->outData[o]->getPrecision() == Precision::BF16) {
+                    bool marked = tryToMarkFP32(iter->outData[o], immutable);
+                    if (marked) {
+                        toAnalyzeTensors.insert(iter->outData[o]);
+                    }
+                }
+            }
+        }
+        if (_initbf16.find(iter->type) != _initbf16.end()) {
+            // verify if input activation tensor is not bf16 - add to toAnalyzeTensors as well
+            // we are assuming here that _initbf16 contain only layers having one dynamic input
+            // in other case algorithm should be changed to care about two dynamic input tensors
+            // and take into account case of different precision if they are
+            if (iter->insData[0].lock()->getTensorDesc().getPrecision() != Precision::BF16) {
+                toAnalyzeTensors.insert(iter->insData[0].lock());
+                // output tensor for FP32 convolutoin/FC layers should be FP32 as well
+                for (size_t o = 0; o < iter->outData.size(); o++) {
+                    if (iter->outData[o]->getPrecision() == Precision::BF16) {
+                        bool marked = tryToMarkFP32(iter->outData[o], immutable);
+                        if (marked) {
+                            toAnalyzeTensors.insert(iter->outData[o]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // 3 - while toAnalyzeTensors is not empty look at the layers dealing with tensors mentioned in toAnalyzeTensors
+    while (!toAnalyzeTensors.empty()) {
+        DataPtr tensor = *toAnalyzeTensors.begin();
+        toAnalyzeTensors.erase(tensor);
+        // look into producer of the tensor
+        auto layer = tensor->getCreatorLayer().lock();
+        // if this layer is not from _initbf16 - analyze inputs
+        if (_initbf16.find(layer->type) == _initbf16.end()) {
+            // for all inputs investigate and modify tensor precision if required
+            for (size_t i = 0; i < layer->insData.size(); i++) {
+                bool marked = tryToMarkFP32(layer->insData[i].lock(), immutable);
+                if (marked) {
+                    toAnalyzeTensors.insert(layer->insData[i].lock());
+                }
+            }
+        }
+
+        // mark all produced tensors to FP32 if they are BF16 and if they do not go _only_ to the toAnalyzeTensors
+        // TODO: when we enable greedy mode and start to produce bf16 tensor even if one consumer accepts it,
+        // this place should be changed.
+        // Instead of "if they do not go _only_ to the toAnalyzeTensors" we have to apply "if they do not go at least to one of _initbf16"
+        // TODO: add test input1->pooling1->conv1 and the same pooling1->relu. for example. now convolution should be returned to fp32
+        // after greedy mode, it should be fp32.
+        for (auto inputTo : tensor->getInputTo()) {
+            for (size_t o = 0; o < inputTo.second->outData.size(); o++) {
+                if (inputTo.second->outData[o]->getTensorDesc().getPrecision() == Precision::BF16) {
+                    bool marked = tryToMarkFP32(inputTo.second->outData[o], immutable);
+                    if (marked) {
+                        toAnalyzeTensors.insert(layer->outData[o]);
+                    }
+                }
+            }
+        }
+    }
+
+#ifndef NDEBUG
+    {
+        std::ofstream file("bf16_icnnnetwork.dot");
+        saveGraphToDot(network, file, precisionColoringBF16);
+    }
+#endif
+}
+
+bool BF16Transformer::tryToMarkFP32(InferenceEngine::DataPtr data, const std::set<InferenceEngine::DataPtr>& immutable) {
+    bool marked = false;
+    if (immutable.find(data) == immutable.end() && data->getPrecision() == Precision::BF16) {
+        // we treat one consumer and many in different ways
+        // if there is one consumer, we can mark its input as float if it does not belong to the list of initial layers
+        // in other cases we need to mark tensor which is passed to several l ayers as FP32 only if there is at least one conusmer
+        // produces data in FP32. I.e. there should be a way fo getting FP32 from output data to this point
+        if (data->getInputTo().size() == 1) {
+            if (_initbf16.find(data->getInputTo().begin()->second->type) == _initbf16.end()) {
+                marked = true;
+            }
+        } else {
+            // get all consumers
+            for (auto o : data->getInputTo()) {
+                // if tensor goes to several layers, we will mark it by FP32 only if one of the layer is unknown
+                if (_initbf16.find(o.second->type) == _initbf16.end() &&
+                    _complementbf16.find(o.second->type) == _complementbf16.end() &&
+                    _multiinput.find(o.second->type) == _multiinput.end()) {
+                    marked = true;
+                }
+            }
+        }
+        if (marked) {
+            data->setPrecision(Precision::FP32);
+        }
+    }
+    return marked;
+}
+
+InferenceEngine::MemoryBlob::Ptr BF16Transformer::convertBF16ToFloat(InferenceEngine::MemoryBlob::Ptr tweights) {
+    TensorDesc td(Precision::FP32, tweights->getTensorDesc().getDims(), tweights->getTensorDesc().getLayout());
+    MemoryBlob::Ptr weightsFP32 = make_shared_blob<float>(td);
+    weightsFP32->allocate();
+    auto lmbf16 = tweights->rmap();
+    short *bf16data = lmbf16.as<short *>();
+    auto lmfp32 = weightsFP32->wmap();
+    float *fp32data = lmfp32.as<float *>();
+    for (size_t i = 0; i < weightsFP32->size(); i++) {
+        fp32data[i] = ngraph::bfloat16::from_bits(bf16data[i]);
+    }
+    return weightsFP32;
+}
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.h b/inference-engine/src/mkldnn_plugin/bf16transformer.h

new file mode 100644 (file)

index 0000000..22becc6
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/bf16transformer.h
@@ -0,0 +1,63 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <details/caseless.hpp>
+#include <string>
+#include <set>
+#include "inference_engine.hpp"
+
+namespace MKLDNNPlugin {
+
+class BF16Transformer {
+    const InferenceEngine::details::caseless_set<std::string> _initbf16 =
+        { "convolution", "fullyconnected", "innerproduct" };
+    const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
+        { "relu", "pooling", "norm", "gather" };
+    const InferenceEngine::details::caseless_set<std::string> _multiinput =
+        { "concat", "eltwise" };
+
+    /**
+    * Tries to mark tensor as FP32 by analyzing of local consumers of the tensor. Do not mark if
+    *
+    * 1. tensor goes to init layer (conv of fc)
+    * 2. goes to the layers which can work with BF16
+    *
+    * if tensor goes to layer not supporting BF16, this tensor will be marked as FP32
+    */
+    bool tryToMarkFP32(InferenceEngine::DataPtr data, const std::set<InferenceEngine::DataPtr> &immutable);
+
+public:
+    /**
+     * Restores Float point data types on edges which goes to non supported layers
+     *
+     * Algo:
+     * 1. Verify if we do not have bf16 tensors it's better to return early and not to try to return
+     * anything since there is no such tensors
+     * 2a. go over all inputs and outputs and if data type is not BF16, put them to the toAnalyzeTensors
+     * 2b. go over all unknown layers for this algo and mark them as fp32 and add their inputs and
+     * outputs to the toAnalyzeTensors and try to mark them as FP32
+     * 2c. go over all inputs to _initbf16 and if they are fp32 add them to the toAnalyzeTensors
+     *
+     * 3 - while toAnalyzeTensors is not empty look at the layers dealing with tensors mentioned in
+     * toAnalyzeTensors, analyze parent and children and depending on the type of the layers try to
+     * extend FP32 data type
+    */
+    void optimizeToFloat(InferenceEngine::CNNNetwork &network);
+
+    /**
+     * Converts all edges from bfloat16 to float data type. Do not touch input and output nodes
+     */
+    void convertToFloat(InferenceEngine::CNNNetwork &network);
+
+    /**
+    * converts all fp32 edges excepting inputs and outputs to bf16 and call restoreFloatPrecision
+    */
+    void convertToBFloat16(InferenceEngine::CNNNetwork &network);
+
+    InferenceEngine::MemoryBlob::Ptr convertBF16ToFloat(InferenceEngine::MemoryBlob::Ptr);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp

index 694b6d0..3ff1172 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/config.cpp
+++ b/inference-engine/src/mkldnn_plugin/config.cpp
@@ -72,6 +72,12 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
              dumpQuantizedGraphToDot = val;
          } else if (key.compare(PluginConfigParams::KEY_DUMP_QUANTIZED_GRAPH_AS_IR) == 0) {
              dumpQuantizedGraphToIr = val;
+        } else if (key == PluginConfigParams::KEY_ENFORCE_BF16) {
+            if (val == PluginConfigParams::YES) enforceBF16 = true;
+            else if (val == PluginConfigParams::NO) enforceBF16 = false;
+            else
+                THROW_IE_EXCEPTION << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
+                    << ". Expected only YES/NO";
          } else {
              THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property " << key << " by CPU plugin";
          }
@@ -112,6 +118,10 @@ void Config::updateProperties() {
          _config.insert({ PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, std::to_string(streamExecutorConfig._streams) });
          _config.insert({ PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads) });
          _config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
+        if (enforceBF16)
+            _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
+        else
+            _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
      }
  }
  
diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h

index 0007bc5..2444f00 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -32,6 +32,7 @@ struct Config {
      std::string dumpQuantizedGraphToDot = "";
      std::string dumpQuantizedGraphToIr = "";
      int batchLimit = 0;
+    bool enforceBF16 = false;
      InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
  
  #if defined(__arm__) || defined(__aarch64__)
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp

index 65eaec7..f444ff5 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -10,6 +10,7 @@
  #include "mkldnn_async_infer_request.h"
  #include "mkldnn_infer_request.h"
  #include "mkldnn_memory_state.h"
+#include "bf16transformer.h"
  #include <ie_util_internal.hpp>
  #include <graph_tools.hpp>
  #include <cnn_network_int8_normalizer.hpp>
@@ -100,6 +101,19 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
                      LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
                      "ScaleShift"));
              transformer.transform(*_clonedNetwork);
+            if (with_cpu_x86_bfloat16()) {
+                BF16Transformer bf16Transformer;
+                CNNNetwork cnnetwork(_clonedNetwork);
+                if (cfg.enforceBF16 == true) {
+                    bf16Transformer.convertToBFloat16(cnnetwork);
+                } else {
+                    bf16Transformer.optimizeToFloat(cnnetwork);
+                }
+            } else {
+                BF16Transformer bf16Transformer;
+                CNNNetwork cnnetwork(_clonedNetwork);
+                bf16Transformer.convertToFloat(cnnetwork);
+            }
          }
      }
  
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp

index 494a00d..0ab3e0a 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
@@ -17,6 +17,8 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType)
          return 4;
      case mkldnn::memory::data_type::s16:
          return 2;
+    case mkldnn::memory::data_type::bf16:
+        return 2;
      case mkldnn::memory::data_type::s8:
          return 1;
      case mkldnn::memory::data_type::u8:
@@ -39,6 +41,8 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::P
              return memory::s32;
          case InferenceEngine::Precision::I16:
              return memory::s16;
+        case InferenceEngine::Precision::BF16:
+            return memory::bf16;
          case InferenceEngine::Precision::I8:
              return memory::s8;
          case InferenceEngine::Precision::U8:
@@ -61,6 +65,8 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d
              return InferenceEngine::Precision::I32;
          case memory::s16:
              return InferenceEngine::Precision::I16;
+        case memory::bf16:
+            return InferenceEngine::Precision::BF16;
          case memory::s8:
              return InferenceEngine::Precision::I8;
          case memory::u8:
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp

index 8cac262..8d72160 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -701,6 +701,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
          return activationNode &&
              (activationNode->getAlgorithm() == eltwise_relu ||
              (conv->getCnnLayer()->precision == Precision::FP32 &&
+             conv->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
               isOneOf(activationNode->getAlgorithm(), {eltwise_elu, eltwise_logistic, eltwise_bounded_relu, eltwise_clamp, eltwise_swish})));
      };
  
@@ -774,6 +775,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
  
      auto isSutableParentNode = [](MKLDNNNodePtr node) {
          return node->getType() == FullyConnected &&
+               node->getCnnLayer()->insData[0].lock()->getPrecision() != Precision::BF16 &&
                 node->getChildEdges().size() == 1;
      };
  
@@ -845,7 +847,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
          bool isSutableConv = (node->getType() == Convolution) &&
                               node->getCnnLayer()->precision == Precision::FP32;
          bool isSutableBinConv = node->getType() == BinaryConvolution;
-        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
+        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1 &&
+               !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
+                 node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
      };
  
      auto isSutableChildNode = [](MKLDNNNodePtr node) {
@@ -1118,7 +1122,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
      auto isSutableParentNode = [](MKLDNNNodePtr node) {
          return node->getType() == Convolution &&
                 node->getChildEdges().size() == 1 &&
-               node->getCnnLayer()->precision == Precision::FP32;
+               node->getCnnLayer()->precision == Precision::FP32 &&
+             !(node->getCnnLayer()->insData[0].lock()->getPrecision() == Precision::BF16 &&
+               node->getCnnLayer()->outData[0]->getPrecision() == Precision::FP32);
      };
  
      auto isSutableChildNode = [&](MKLDNNNodePtr node) {
@@ -2233,4 +2239,4 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) {
              graph.DropNode(parent);
          }
      }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp

index e30f1c4..ad1bc55 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -128,7 +128,8 @@ void MKLDNNMemory::SetData(const MKLDNNMemory& memory, bool ftz) const {
      mkldnn::reorder reorderPrim(memory.GetPrimitive(), GetPrimitive());
      mkldnn::stream(stream::kind::eager).submit({reorderPrim});
  
-    if (ftz && memory.GetDataType() == mkldnn::memory::f32 && GetFormat() != mkldnn::memory::wino_fmt) {
+    if (ftz && memory.GetDataType() == mkldnn::memory::f32 && GetFormat() != mkldnn::memory::wino_fmt &&
+        GetDataType() != mkldnn::memory::bf16) {
          // Internal blobs haven't strides yet.
          auto *memData = static_cast<float *>(GetData());
          memData += prim->get_primitive_desc().desc().data.layout_desc.blocking.offset_padding;
@@ -540,6 +541,9 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
          case mkldnn_bin:
              precision = Precision::BIN;
              break;
+        case mkldnn_bf16:
+            precision = Precision::BF16;
+            break;
          default:
              THROW_IE_EXCEPTION << "Cannot cast to TensorDesc. Unsupported precision!";
      }
@@ -984,6 +988,9 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
          case Precision::BOOL:
              data_type = mkldnn::memory::data_type::u8;
              break;
+        case Precision::BF16:
+            data_type = mkldnn::memory::data_type::bf16;
+            break;
          default:
              THROW_IE_EXCEPTION << "Cannot create MKLDNNMemoryDesc from TensorDesc. Unsupported precision!";
      }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp

index 99aaf86..e1c0920 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -662,6 +662,8 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
          internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
      } else if (blb->getTensorDesc().getPrecision() == Precision::I32) {
          internalBlob = InferenceEngine::make_shared_blob<int32_t>(desc);
+    } else if (blb->getTensorDesc().getPrecision() == Precision::BF16) {
+        internalBlob = InferenceEngine::make_shared_blob<int16_t>(desc);
      } else {
          internalBlob = InferenceEngine::make_shared_blob<float>(desc);
      }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp

index 27ec34b..df73e8f 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -15,7 +15,6 @@
  #include <ie_system_conf.h>
  #include <generic_ie.hpp>
  
-#include "cnn_network_ngraph_impl.hpp"
  #include "convert_function_to_cnn_network.hpp"
  #include <transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
  #include <transformations/convert_opset2_to_opset1/convert_opset2_to_opset1.hpp>
@@ -87,24 +86,21 @@ Engine::LoadExeNetworkImpl(const ICore * /*core*/, const InferenceEngine::ICNNNe
  
      std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
  
-    if (auto networkNGraph = dynamic_cast<const CNNNetworkNGraphImpl*>(&network)) {
-        auto nGraphNetwork = networkNGraph->cloneNGraphImpl();
-        if (!nGraphNetwork->getFunction()) {
-            clonedNetwork = nGraphNetwork->getCNNNetwork();
-        } else {
-            const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-                return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
-                       std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
-                       std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
-            };
-            // Disable shape inference (WA for generic operations)
-            ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphNetwork->getFunction());
-
-            // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
-            ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphNetwork->getFunction());
-            ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphNetwork->getFunction());
-            clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphNetwork->getFunction(), *nGraphNetwork.get());
-        }
+    if (network.getFunction()) {
+        const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
+            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
+                std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
+                std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
+        };
+        CNNNetwork net(network.getFunction());
+        auto nGraphFunc = net.getFunction();
+        // Disable shape inference (WA for generic operations)
+        ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
+
+        // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
+        ngraph::pass::ConvertOpSet2ToOpSet1(transformations_callback).run_on_function(nGraphFunc);
+        ngraph::pass::ConvertOpSet1ToLegacy(transformations_callback).run_on_function(nGraphFunc);
+        clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, network);
      } else {
          clonedNetwork = cloneNet(network);
      }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp

index 41f1185..c514164 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@@ -128,7 +128,13 @@ protected:
              }
  
              // All extension layers support only FP32 precision!
+            // fixing of BF16 precisions where they are - layers naturally support only FP32
+            // if we see BF16, that means another floating point format which will be converted by reorder
+            // added by current mkl-dnn cpu plugin when it figure out diff in data types on input and output of edges
              InferenceEngine::Precision precision = data_desc.getPrecision();
+            if (precision == Precision::BF16) {
+                precision = Precision::FP32;
+            }
              if (conf.layout == ConfLayout::ANY) {
                  dataConfig.desc = TensorDesc(precision, data_dims, InferenceEngine::Layout::ANY);
              } else {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp

index 252c8c0..0213857 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
@@ -159,11 +159,12 @@ public:
                  THROW_IE_EXCEPTION << "Interp supports only 4d blobs!";
  
              auto src_precision = inData->getTensorDesc().getPrecision();
-            if (src_precision != Precision::FP32 && src_precision != Precision::U8)
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only U8 or FP32 are supported!";
+            if (src_precision != Precision::FP32 && src_precision != Precision::U8 && src_precision != Precision::BF16)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only U8 or FP32 or BF16 are supported!";
  
-            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
-                THROW_IE_EXCEPTION << layer->name << " Incorrect output data tensor precision. Only FP32 is supported!";
+            auto dst_precision = layer->outData[0]->getTensorDesc().getPrecision();
+            if (dst_precision != Precision::FP32 && dst_precision != Precision::BF16)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect output data tensor precision. Only FP32 or BF16 are supported!";
  
              // We don't read other parameters since they are needed only for dst reshape in caffe
              pad_beg = layer->GetParamAsInt("pad_beg");
@@ -197,14 +198,16 @@ public:
                  if (mayiuse(avx512_common)) {
                      blk_layout = ConfLayout::BLK16;
                      interp_kernel.reset(new jit_uni_interp_kernel_f32<avx512_common>());
+                    addConfig(layer, { DataConfigurator(blk_layout) }, { DataConfigurator(blk_layout) });
                  } else if (mayiuse(avx2)) {
                      blk_layout = ConfLayout::BLK8;
                      interp_kernel.reset(new jit_uni_interp_kernel_f32<avx2>());
+                    addConfig(layer, { DataConfigurator(blk_layout) }, { DataConfigurator(blk_layout) });
                  } else {
                      blk_layout = ConfLayout::BLK8;
                      interp_kernel.reset(new jit_uni_interp_kernel_f32<sse42>());
+                    addConfig(layer, { DataConfigurator(blk_layout) }, { DataConfigurator(blk_layout) });
                  }
-                addConfig(layer, { DataConfigurator(blk_layout) }, { DataConfigurator(blk_layout) });
              }
          } catch (InferenceEngine::details::InferenceEngineException &ex) {
              errorMsg = ex.what();
@@ -258,8 +261,10 @@ public:
          case Precision::FP32:
          {
              const float* src_data = inputs[0]->cbuffer().as<const float *>() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
-            size_t IC = inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1] *
-                        inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[4];
+            size_t IC = (inputs[0]->getTensorDesc().getLayout() == Layout::BLOCKED)
+                ? inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1] *
+                inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[4]
+                : IC = inputs[0]->getTensorDesc().getDims()[1];
              interpolate(IN, IC, src_data,
                  -pad_beg, -pad_beg, IH_pad, IW_pad, IH, IW, dst_data, 0, 0, OH, OW, OH, OW);
          }
@@ -312,10 +317,12 @@ private:
          }
  
          int block_size = 1;
-        if (mayiuse(avx512_common)) {
-            block_size = 16;
-        } else {
-            block_size = 8;
+        if (interp_kernel) {
+            if (mayiuse(avx512_common)) {
+                block_size = 16;
+            } else {
+                block_size = 8;
+            }
          }
  
          // Align channel number to block size to deal with channels padding in IE with multiple blobs
@@ -358,14 +365,21 @@ private:
  
                          float *pdst = pdst_h + w * block_size;
  
-                        arg.src00 = psrc00;
-                        arg.src01 = psrc01;
-                        arg.src10 = psrc10;
-                        arg.src11 = psrc11;
-                        arg.dst = pdst;
-                        arg.w_lambda0 = static_cast<float*>(&w_lambda0);
-                        arg.w_lambda1 = static_cast<float*>(&w_lambda1);
-                        (*interp_kernel)(&arg);
+                        if (interp_kernel) {
+                            arg.src00 = psrc00;
+                            arg.src01 = psrc01;
+                            arg.src10 = psrc10;
+                            arg.src11 = psrc11;
+                            arg.dst = pdst;
+                            arg.w_lambda0 = static_cast<float*>(&w_lambda0);
+                            arg.w_lambda1 = static_cast<float*>(&w_lambda1);
+                            (*interp_kernel)(&arg);
+                        } else {
+                            for (int c = 0; c < block_size; ++c) {
+                                pdst[c] = h_lambda1 * (w_lambda1 * psrc00[c] + w_lambda0 * psrc01[c]) +
+                                    h_lambda0 * (w_lambda1 * psrc10[c] + w_lambda0 * psrc11[c]);
+                            }
+                        }
                      }
          });
      }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp

index 4520d70..58b60af 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -91,6 +91,17 @@ bool MKLDNNConvolutionNode::canBeExecutedInInt8() {
      }
  }
  
+InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex) {
+    InferenceEngine::Precision eltwisePrecision;
+    auto parent0 = eltwiseNode->getCnnLayer()->insData[0].lock()->getCreatorLayer().lock();
+    auto parent1 = eltwiseNode->getCnnLayer()->insData[1].lock()->getCreatorLayer().lock();
+
+    auto fusedParent = findex != 0 ? fusedWith[findex - 1].get()->getCnnLayer() : this->getCnnLayer();
+    eltwisePrecision = fusedParent == parent0 ? eltwiseNode->getCnnLayer()->insData[1].lock()->getPrecision() :
+        eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision();
+    return eltwisePrecision;
+}
+
  void MKLDNNConvolutionNode::getSupportedDescriptors() {
      if (!descs.empty())
          return;
@@ -116,17 +127,11 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
  
          // We need to make sure that convolution output and second input of fused Eltwise operation
          // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
-        if (outputDataType != memory::f32 && isFusedWith(Eltwise)) {
+        if (outputDataType != memory::f32 && outputDataType != memory::bf16 && isFusedWith(Eltwise)) {
              for (int i = 0; i < fusedWith.size(); i++) {
                  auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
                  if (eltwiseNode) {
-                    auto parent0 = eltwiseNode->getCnnLayer()->insData[0].lock()->getCreatorLayer().lock();
-                    auto parent1 = eltwiseNode->getCnnLayer()->insData[1].lock()->getCreatorLayer().lock();
-
-                    auto fusedParent = i != 0 ? fusedWith[i-1].get()->getCnnLayer() : this->getCnnLayer();
-                    eltwisePrecision = fusedParent == parent0 ? eltwiseNode->getCnnLayer()->insData[1].lock()->getPrecision() :
-                                                                eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision();
-
+                    eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
                      if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
                          eltwisePrecision = Precision::FP32;
                          outputDataType = memory::f32;
@@ -274,10 +279,29 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                  getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
          createDescriptor({in_candidate}, {out_candidate});
      } else {
-        // If the weights aren't quantized, the only precision we support is FP32
-        inputDataType = memory::f32;
-        outputDataType = memory::f32;
+        inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
+        outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
          eltwisePrecision = Precision::FP32;
+        for (int i = 0; i < fusedWith.size(); i++) {
+            auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
+            if (eltwiseNode) {
+                eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
+                // TODO(amalyshe): there might be situation when convolution can be executed in BF16,
+                // output is required in FP32 but eltwise inplace tensor would be in BF16
+                // currently we forcedly change output to the BF16 that will add reoreder after the node
+                // Another situation can be when we mark output as FP32 and Eltwise asPrecison (which stand
+                // for input of inplace tensor precision) to FP32. This will add reorder for that in-place tensor
+                // bofore the fused convolution. This behaviour might be more correct regarding expected markup
+                // of the graph but performance of first and second approaches might be different. Need to verify
+                outputDataType = eltwisePrecision == Precision::BF16 ? memory::bf16 : memory::f32;
+            }
+        }
+        // correction for cases of FP32 input - we do not have FP32 convolution supported BF16 output
+        if (inputDataType == memory::f32
+            && (outputDataType == memory::bf16 || eltwisePrecision == Precision::BF16)) {
+            outputDataType = memory::f32;
+            eltwisePrecision = Precision::FP32;
+        }
  
          Layout layout = convLayer->input()->getLayout();
  
@@ -628,6 +652,9 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
  
      mkldnn::memory::data_type wdt = precisionToDataType(inDesc.getPrecision());
      mkldnn::memory::data_type bdt = precisionToDataType(inDesc.getPrecision());
+    if (inDesc.getPrecision() == Precision::BF16) {
+        bdt = mkldnn::memory::data_type::f32;
+    }
  
      if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
          wdt = memory::s8;
@@ -739,7 +766,8 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
      // Works only for FP32 convolutions for now.
      bool isStridedBlobsSupported = true;
      for (auto &insData : getCnnLayer()->insData) {
-        if (insData.lock()->getPrecision() != InferenceEngine::Precision::FP32) {
+        if (insData.lock()->getPrecision() != InferenceEngine::Precision::FP32
+            && insData.lock()->getPrecision() != InferenceEngine::Precision::BF16) {
              isStridedBlobsSupported = false;
              break;
          }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h

index 827223e..232803b 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@@ -12,6 +12,8 @@
  
  namespace MKLDNNPlugin {
  
+class MKLDNNEltwiseNode;
+
  class MKLDNNConvolutionNode : public MKLDNNNode {
  public:
      MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, int socket);
@@ -50,6 +52,7 @@ public:
  
  protected:
      void addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const;
+    InferenceEngine::Precision fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex);
  
  private:
      mkldnn::memory::data_type precisionToDataType(InferenceEngine::Precision prec);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp

index d129ab5..a5ddce2 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -511,6 +511,11 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
                  inputDT = MKLDNNExtensionUtils::IEPrecisionToDataType(in_prec);
              }
  
+            if (inputDT == memory::bf16 || outputDT == memory::bf16) {
+                inputDT = memory::f32;
+                outputDT = memory::f32;
+            }
+
              auto impl_desc = initDesc(inputDT, outputDT, format);
  
              if (impl_desc.getImplementationType() != impl_desc_type::undef) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp

index cfe8a33..9748ae6 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -78,7 +78,8 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
          }
          auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision());
  
-        if (inputDataType != memory::u8 || weightsDataType != memory::s8) {
+        // TODO(amalyse) what are the cases when we have non i8 weights and have to overide the precisions?
+        if ((inputDataType != memory::u8 || weightsDataType != memory::s8) && inputDataType != memory::bf16) {
              inputDataType = memory::f32;
              outputDataType = memory::f32;
          }
@@ -355,6 +356,9 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngin
      TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
      mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
      mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+    if (inDesc.getPrecision() == Precision::BF16) {
+        bdt = mkldnn::memory::data_type::f32;
+    }
  
      if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
          wdt = memory::s8;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp

index d89f800..4c56bd0 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@@ -48,8 +48,9 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
      memory::format outFormat = mkldnn::memory::format_undef;
      if (getType() == Input || getType() == MemoryInput) {
          precision = getCnnLayer()->outData[0]->getPrecision();
-        if (precision == InferenceEngine::Precision::U16 || isMeanImage)
+        if (precision == InferenceEngine::Precision::U16 || isMeanImage) {
              precision = InferenceEngine::Precision::FP32;
+        }
          auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
          InferenceEngine::DataConfig dataConfig;
          dataConfig.inPlace = -1;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp

index 88721cc..f1d0cb0 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
@@ -19,7 +19,7 @@ void MKLDNNLrnNode::getSupportedDescriptors() {
      if (!descs.empty())
          return;
      InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32)
+    if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
          precision = InferenceEngine::Precision::FP32;
      auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
      auto * lrnLayer = dynamic_cast<NormLayer*>(getCnnLayer().get());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp

index 5ab8352..870a8f6 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -41,7 +41,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
      inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
      outputPrecision = getCnnLayer()->outData[0]->getPrecision();
      // Dirty WA to support stat based quantization approach
-    if (this->getCnnLayer()->precision != Precision::I8) {
+    if (this->getCnnLayer()->precision != Precision::I8
+        && inputPrecision != Precision::BF16) {
          if (type == PoolingLayer::MAX) {
              // MKLDNN supports only equal precisions for input and output
              outputPrecision = inputPrecision;
@@ -49,6 +50,9 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
              outputPrecision = Precision::FP32;
          }
      }
+    if (inputPrecision == Precision::BF16) {
+        outputPrecision = inputPrecision;
+    }
  
      if (!fusedWith.empty()) {
          auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
@@ -84,6 +88,10 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
          MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format::ndhwc : memory::format::nhwc};
          MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format::ndhwc : memory::format::nhwc};
          createDescriptor({ in_candidate }, { out_candidate });
+    } else if ((parentDims.ndims() == 4 || parentDims.ndims() == 5) && (inputDataType == memory::bf16 || outputDataType == memory::bf16)) {
+        MKLDNNMemoryDesc in_candidate{ parentDims, memory::bf16, parentDims.ndims() == 5 ? memory::format::nCdhw16c : memory::format::nChw16c};
+        MKLDNNMemoryDesc out_candidate{ childDims, memory::bf16, parentDims.ndims() == 5 ? memory::format::nCdhw16c : memory::format::nChw16c};
+        createDescriptor({ in_candidate }, { out_candidate });
      } else if ((parentDims.ndims() == 4 || parentDims.ndims() == 5) && parentDims[1] == 1) {
          inputDataType = memory::f32;
          outputDataType = memory::f32;
@@ -92,8 +100,10 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
          MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format::ncdhw : memory::format::nchw};
          createDescriptor({ in_candidate }, { out_candidate });
      } else {
-        inputDataType = memory::f32;
-        outputDataType = memory::f32;
+        if (inputDataType != memory::bf16) {
+            inputDataType = memory::f32;
+            outputDataType = memory::f32;
+        }
          // It doesn't support any format
          for (auto format : getAvailableFormatsForDims(parentDims)) {
              MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, format};
diff --git a/inference-engine/src/mkldnn_plugin/nodes/normalize.cpp b/inference-engine/src/mkldnn_plugin/nodes/normalize.cpp

index e49c4fc..5ef5eef 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/normalize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/normalize.cpp
@@ -13,6 +13,7 @@
  #include <memory>
  #include <string>
  #include <vector>
+#include "bf16transformer.h"
  
  using namespace mkldnn::impl::cpu;
  using namespace mkldnn::impl::utils;
@@ -333,9 +334,22 @@ public:
                  THROW_IE_EXCEPTION << "Normalize supports from 2D to 4D blobs!";
              }
  
-            weights = std::dynamic_pointer_cast<TBlob<float>>(layer->blobs.at("weights"));
-            if (!weights)
-                THROW_IE_EXCEPTION << layer->name << " weights is empty!";
+            MemoryBlob::Ptr tweights = as<MemoryBlob>(layer->blobs.at("weights"));
+            if (!tweights) {
+                THROW_IE_EXCEPTION << layer->name << "Weights are not initialized or cannot be casted to MemoryBlob for layer Normalize with name '"
+                    << layer->name << "'";
+            }
+
+            if (tweights->getTensorDesc().getPrecision() == Precision::FP32) {
+                weights = tweights;
+            } else if (tweights->getTensorDesc().getPrecision() == Precision::BF16) {
+                MKLDNNPlugin::BF16Transformer transformer;
+                weights = transformer.convertBF16ToFloat(tweights);
+            } else {
+                // Unknown non supported data type, return an error
+                THROW_IE_EXCEPTION << layer->name << "Weights for layer Normalize wiht name '" << layer->name <<
+                    "' has unsupported data type " << tweights->getTensorDesc().getPrecision();
+            }
              across_spatial = layer->GetParamAsBool("across_spatial", false);
              channel_shared = layer->GetParamAsBool("channel_shared", false);
              eps = layer->GetParamAsFloat("eps");
@@ -514,7 +528,7 @@ private:
      std::shared_ptr<jit_uni_normalize_across_spatial_kernel> normalize_across_spatial_kernel;
      std::shared_ptr<jit_uni_sqr_sum_kernel> sqr_sum_kernel;
  
-    TBlob<float>::Ptr weights;
+    MemoryBlob::Ptr weights;
      bool across_spatial = true;
      bool channel_shared = true;
      float eps = 1e-10f;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp

index 9a9a4e5..04aa8be 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp
@@ -31,21 +31,26 @@ public:
              if (layer->outData.size() != 1 && layer->outData.size() != 2)
                  THROW_IE_EXCEPTION << layer->name << " Incorrect number of output edges!";
  
-            if (layer->insData[TOPK_DATA].lock()->getTensorDesc().getPrecision() != Precision::FP32 ||
+            // DataConfigurator::addConfig will automatically change BF16 datatype to FP32
+            // it can be changed back by explicit modification like confs.back().outConfs[i].desc.setPrecision(Precision::BF16);
+            // if current layer supports BF16 naturally. usually they are not and nothing special is not required
+            if ((layer->insData[TOPK_DATA].lock()->getTensorDesc().getPrecision() != Precision::FP32 &&
+                layer->insData[TOPK_DATA].lock()->getTensorDesc().getPrecision() != Precision::BF16) ||
                  layer->insData[TOPK_K].lock()->getTensorDesc().getPrecision() != Precision::I32)
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input data/index values precision.";
+                THROW_IE_EXCEPTION << layer->name << " TopKImpl - Incorrect input data/index values precision.";
  
              if (layer->insData[TOPK_K].lock()->getTensorDesc().getDims().size() > 1)
-                THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension";
+                THROW_IE_EXCEPTION << layer->name << " TopKImpl - Index vector should be 1 dimension";
  
              SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
              SizeVector src_data_dims = layer->insData[TOPK_DATA].lock()->getTensorDesc().getDims();
              if (src_data_dims.size() != dst_dims.size())
-                THROW_IE_EXCEPTION << layer->name << " Incorrect input/output tensor dimension sizes";
+                THROW_IE_EXCEPTION << layer->name << " TopKImpl - Incorrect input/output tensor dimension sizes";
  
              if (layer->outData.size() == 2) {
-                if (layer->outData[TOPK_VALUE]->getTensorDesc().getPrecision() != Precision::FP32)
-                    THROW_IE_EXCEPTION << layer->name << " Incorrect output data tensor precision. Only FP32 is supported!";
+                if (layer->outData[TOPK_VALUE]->getTensorDesc().getPrecision() != Precision::FP32 &&
+                    layer->outData[TOPK_VALUE]->getTensorDesc().getPrecision() != Precision::BF16)
+                    THROW_IE_EXCEPTION << layer->name << " TopKImpl - Incorrect output data tensor precision. Floating point datatypes are supported!";
  
                  SizeVector dst_idx_dims = layer->outData[TOPK_INDEX]->getTensorDesc().getDims();
                  if (dst_dims.size() != dst_idx_dims.size())
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp

index c6b360f..786f1d7 100644 (file)
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
@@ -122,11 +122,11 @@ static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
              break;
          }
          case Precision::I16:
-        case Precision::U16: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int16_t*>();
+        case Precision::U16:
+        case Precision::BF16: {
+            auto *pln_blob_ptr = pln_blob->buffer().as<int16_t *>();
              auto *blob_ptr = blob->buffer().as<int16_t *>();
-            for (size_t i = 0; i < data_size; i++)
-                pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+            for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
              break;
          }
          case Precision::I8:
@@ -187,7 +187,8 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) {
             << dims.size() << "D "
             << "shape: ";
      for (size_t d : dims) stream << d << " ";
-    stream << "(" << _blob->size() << ")" <<std::endl;
+    stream << "(" << _blob->size() << ")" <<
+    " by address 0x" << std::hex << _blob->buffer().as<long long>() << std::dec <<std::endl;
  
      // Dump data
      MKLDNNMemoryDesc mdesc(_blob->getTensorDesc());
@@ -202,6 +203,17 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) {
                  stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
              break;
          }
+        case Precision::BF16:
+        {
+            auto *blob_ptr = _blob->buffer().as<int16_t *>();
+            for (size_t i = 0; i < data_size; i++) {
+                int i16n = blob_ptr[blob_wrp.off_l(i)];
+                i16n = i16n << 16;
+                float fn = *(reinterpret_cast<float *>(&i16n));
+                stream << fn << std::endl;
+            }
+            break;
+        }
          case Precision::I32: {
              auto *blob_ptr = _blob->buffer().as<int32_t*>();
              for (size_t i = 0; i < data_size; i++)
diff --git a/inference-engine/src/plugin_api/blob_factory.hpp b/inference-engine/src/plugin_api/blob_factory.hpp

index b184940..bcddeab 100644 (file)
--- a/inference-engine/src/plugin_api/blob_factory.hpp
+++ b/inference-engine/src/plugin_api/blob_factory.hpp
@@ -117,6 +117,7 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p
          USE_FACTORY(I64);
          USE_FACTORY(U64);
          USE_FACTORY(BIN);
+        USE_FACTORY(BF16);
          USE_FACTORY(BOOL);
      default:
          THROW_IE_EXCEPTION << "cannot locate blob for precision: " << precision;
diff --git a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp

index c53fbc6..db3ea04 100644 (file)
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
@@ -40,6 +40,21 @@ namespace InferenceEngine {
   * @snippet example_async_infer_request.cpp async_infer_request:define_pipeline
   */
  class AsyncInferRequestThreadSafeDefault : public AsyncInferRequestThreadSafeInternal {
+    using AtomicCallback = std::atomic<IInferRequest::CompletionCallback>;
+    using Futures = std::vector<std::shared_future<void>>;
+    using Promise = std::shared_ptr<std::promise<void>>;
+    enum Stage_e : std::uint8_t { executor, task };
+    struct DisableCallbackGuard{
+        explicit DisableCallbackGuard(AtomicCallback& callback)
+            : _callbackRef(callback), _callback(callback.exchange(nullptr)) {}
+        ~DisableCallbackGuard() {
+            _callbackRef = _callback;
+        }
+        AtomicCallback& _callbackRef;
+        IInferRequest::CompletionCallback _callback;
+    };
+    InferRequestInternal::Ptr _syncRequest;
+
  public:
      /**
       * @brief A shared pointer to AsyncInferRequestThreadSafeDefault
@@ -47,7 +62,7 @@ public:
      using Ptr = std::shared_ptr<AsyncInferRequestThreadSafeDefault>;
  
      /**
-     * @brief      Wraps a InferRequestInternal::Ptr implementation and constructs a 
+     * @brief      Wraps a InferRequestInternal::Ptr implementation and constructs a
       * AsyncInferRequestThreadSafeDefault::_pipeline where `taskExecutor` is used to run InferRequestInternal::Infer
       * asynchronously.
       *
@@ -58,12 +73,11 @@ public:
      AsyncInferRequestThreadSafeDefault(const InferRequestInternal::Ptr& request,
                                         const ITaskExecutor::Ptr& taskExecutor,
                                         const ITaskExecutor::Ptr& callbackExecutor)
-        : _requestExecutor {taskExecutor},
+        : _syncRequest {request},
+          _requestExecutor {taskExecutor},
            _callbackExecutor {callbackExecutor},
-          _syncRequest {request} {
-        _pipeline = {
-            { _requestExecutor, [this] { _syncRequest->Infer(); } }
-        };
+          _pipeline {{taskExecutor, [this] {_syncRequest->Infer();}}},
+          _syncPipeline{{std::make_shared<ImmediateExecutor>(), [this] {_syncRequest->Infer();}}} {
      }
  
      /**
@@ -140,9 +154,12 @@ protected:
       * @brief Creates and run the first stage task. If destructor was not called add a new std::future to the
       * AsyncInferRequestThreadSafeDefault::_futures list that would be used to wait
       * AsyncInferRequestThreadSafeDefault::_pipeline finish
+     * @param[in]  itBeginStage Iterator to begin of pipeline
+     * @param[in]  itEndStage End pipeline iterator
+     * @param[in]  callbackExecutor Final or error stage executor
       */
-    void RunFirstStage() {
-        _itStage = _pipeline.begin();
+    void RunFirstStage(const Pipeline::iterator itBeginStage, const Pipeline::iterator itEndStage,
+                       const ITaskExecutor::Ptr callbackExecutor = {}) {
          _promise = {};
          bool stop = [&] {
              std::lock_guard<std::mutex> lock(_mutex);
@@ -165,9 +182,9 @@ protected:
  
          if (!stop) {
              try {
-                auto& firstStageExecutor = std::get<Stage_e::executor>(*_itStage);
+                auto& firstStageExecutor = std::get<Stage_e::executor>(*itBeginStage);
                  IE_ASSERT(nullptr != firstStageExecutor);
-                firstStageExecutor->run(MakeNextStageTask());
+                firstStageExecutor->run(MakeNextStageTask(itBeginStage, itEndStage, std::move(callbackExecutor)));
              } catch (...) {
                  _promise.set_exception(std::current_exception());
                  throw;
@@ -199,31 +216,34 @@ protected:
       * @brief Implements Infer() using StartAsync() and Wait()
       */
      void InferUsingAsync() {
-        struct CallbackStorage {
-            explicit CallbackStorage(AtomicCallback& callback)
-                : _callbackRef(callback), _callback(callback.exchange(nullptr)) {}
-            ~CallbackStorage() {
-                _callbackRef = _callback;
-            }
-            AtomicCallback& _callbackRef;
-            IInferRequest::CompletionCallback _callback;
-        } storage {_callback};
+        DisableCallbackGuard disableCallbackGuard{_callback};
          StartAsync_ThreadUnsafe();
          Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
      }
  
-    ITaskExecutor::Ptr _requestExecutor;  //!< Used to run inference CPU tasks
-    ITaskExecutor::Ptr _callbackExecutor;  //!< Used to run post inference callback
+    /**
+     * @brief Implements Infer() using synchronous pipeline and Wait()
+     */
+    void InferUsingSync() {
+        DisableCallbackGuard disableCallbackGuard{_callback};
+        _syncRequest->checkBlobs();
+        RunFirstStage(_syncPipeline.begin(), _syncPipeline.end(), _syncCallbackExecutor);
+        Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+    }
+
+    ITaskExecutor::Ptr _requestExecutor;  //!< Used to run inference CPU tasks.
+    ITaskExecutor::Ptr _callbackExecutor;  //!< Used to run post inference callback in asynchronous pipline
+    ITaskExecutor::Ptr _syncCallbackExecutor;  //!< Used to run post inference callback in synchronous pipline
      Pipeline _pipeline;  //!< Pipeline variable that should be filled by inherited class.
+    Pipeline _syncPipeline;  //!< Synchronous pipeline variable that should be filled by inherited class.
  
      void StartAsync_ThreadUnsafe() override {
          _syncRequest->checkBlobs();
-        RunFirstStage();
+        RunFirstStage(_pipeline.begin(), _pipeline.end(), _callbackExecutor);
      }
  
      void Infer_ThreadUnsafe() override {
-        _syncRequest->checkBlobs();
-        _syncRequest->InferImpl();
+        InferUsingSync();
      }
  
      void GetPerformanceCounts_ThreadUnsafe(std::map<std::string, InferenceEngineProfileInfo>& perfMap) const override {
@@ -264,38 +284,35 @@ protected:
      }
  
  private:
-    using AtomicCallback = std::atomic<IInferRequest::CompletionCallback>;
-    using Futures = std::vector<std::shared_future<void>>;
-    using Promise = std::shared_ptr<std::promise<void>>;
-    enum Stage_e : std::uint8_t { executor, task };
-
      /**
       * @brief Create a task with next pipeline stage.
-     *        Each call to MakeNextStageTask() generates `InferenceEngine::Task` objects for each stage.
-     *        When stage task is called it increments
-     *        `_stage` counter, call `_pipeline` task for this stage and generates next stage task using
-     * MakeNextStageTask() and pass it to executor. On last stage or if the exception is raised from `_pipeline` task
+     * Each call to MakeNextStageTask() generates @ref Task objects for each stage.
+     * On last stage or if the exception is raised from `_pipeline` task
       * the last stage task is called or passed to callback executor if it is presented. The last stage task call the
       * callback, if it is presented, capture the `_promise` member and use it to forward completion or exception to the
       * one of `_futures` member
+     * @param[in]  itStage Iterator to next stage of pipeline
+     * @param[in]  itEndStage End pipeline iterator
+     * @param[in]  callbackExecutor Executor that will run final stage with callback call
       * @return A next stage task
       */
-    Task MakeNextStageTask() {
-        return [this]() mutable {
+    Task MakeNextStageTask(const Pipeline::iterator itStage, const Pipeline::iterator itEndStage,
+                           const ITaskExecutor::Ptr callbackExecutor) {
+        return std::bind([this, itStage, itEndStage](ITaskExecutor::Ptr& callbackExecutor) mutable {
              StatusCode requestStatus = StatusCode::OK;
              std::exception_ptr localCurrentException = nullptr;
-            auto& thisStage = *_itStage;
-            auto copyItStage = ++_itStage;
+            auto& thisStage = *itStage;
+            auto itNextStage = itStage + 1;
  
              try {
                  auto& stageTask = std::get<Stage_e::task>(thisStage);
                  IE_ASSERT(nullptr != stageTask);
                  stageTask();
-                if (_pipeline.end() != _itStage) {
-                    auto nextStage = *_itStage;
+               if (itEndStage != itNextStage) {
+                    auto& nextStage = *itNextStage;
                      auto& nextStageExecutor = std::get<Stage_e::executor>(nextStage);
                      IE_ASSERT(nullptr != nextStageExecutor);
-                    nextStageExecutor->run(MakeNextStageTask());
+                    nextStageExecutor->run(MakeNextStageTask(itNextStage, itEndStage, std::move(callbackExecutor)));
                  }
              } catch (InferenceEngine::details::InferenceEngineException& ie_ex) {
                  requestStatus = ie_ex.hasStatus() ? ie_ex.getStatus() : StatusCode::GENERAL_ERROR;
@@ -305,7 +322,7 @@ private:
                  localCurrentException = std::current_exception();
              }
  
-            if ((_pipeline.end() == copyItStage) || (nullptr != localCurrentException)) {
+            if ((itEndStage == itNextStage) || (nullptr != localCurrentException)) {
                  auto lastStageTask = [this, requestStatus, localCurrentException]() mutable {
                      auto promise = std::move(_promise);
                      auto callback = _callback.load();
@@ -327,20 +344,18 @@ private:
                      }
                  };
  
-                if (nullptr == _callbackExecutor) {
+                if (nullptr == callbackExecutor) {
                      lastStageTask();
                  } else {
-                    _callbackExecutor->run(std::move(lastStageTask));
+                    callbackExecutor->run(std::move(lastStageTask));
                  }
              }
-        };
+        }, std::move(callbackExecutor));
      }
  
-    InferRequestInternal::Ptr _syncRequest;
      void* _userData = nullptr;
      AtomicCallback _callback = {nullptr};
      IInferRequest::Ptr _publicInterface;
-    Pipeline::iterator _itStage;
      std::promise<void> _promise;
      mutable std::mutex _mutex;
      Futures _futures;
diff --git a/inference-engine/src/plugin_api/ie_system_conf.h b/inference-engine/src/plugin_api/ie_system_conf.h

index aa42a5d..41427d1 100644 (file)
--- a/inference-engine/src/plugin_api/ie_system_conf.h
+++ b/inference-engine/src/plugin_api/ie_system_conf.h
@@ -72,4 +72,11 @@ INFERENCE_ENGINE_API_CPP(bool) with_cpu_x86_avx512f();
   */
  INFERENCE_ENGINE_API_CPP(bool) with_cpu_x86_avx512_core();
  
+/**
+ * @brief      Checks whether CPU supports BFloat16 capability
+ * @ingroup    ie_dev_api_system_conf
+ * @return     `True` is tAVX512_BF16 instructions are available, `false` otherwise
+ */
+INFERENCE_ENGINE_API_CPP(bool) with_cpu_x86_bfloat16();
+
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp

index b2fb1a4..6f1525b 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_cpu_streams_executor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp b/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp

index 5fa4f27..f6b8898 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_executor_manager.hpp
@@ -13,6 +13,7 @@
  #include <unordered_map>
  #include <vector>
  #include <utility>
+#include <mutex>
  
  #include "threading/ie_itask_executor.hpp"
  #include "threading/ie_istreams_executor.hpp"
@@ -40,6 +41,8 @@ public:
  private:
      std::unordered_map<std::string, ITaskExecutor::Ptr> executors;
      std::vector<std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr> > cpuStreamsExecutors;
+    std::mutex streamExecutorMutex;
+    std::mutex taskExecutorMutex;
  };
  
  /**
@@ -59,13 +62,7 @@ public:
       * @brief      Returns a global instance of ExecutorManager
       * @return     The instance.
       */
-    static ExecutorManager* getInstance() {
-        if (!_instance) {
-            _instance = new ExecutorManager();
-        }
-
-        return _instance;
-    }
+    static ExecutorManager* getInstance();
  
      /**
       * @brief A deleted copy constructor
@@ -92,9 +89,6 @@ public:
       */
      size_t getExecutorsNumber();
  
-    /**
-     * @cond
-     */
      size_t getIdleCPUStreamsExecutorsNumber();
  
      void clear(const std::string& id = {});
@@ -106,7 +100,9 @@ private:
      ExecutorManager() {}
  
      ExecutorManagerImpl _impl;
-    static ExecutorManager* _instance;
+
+    static std::mutex _mutex;
+    static ExecutorManager *_instance;
  };
  
  }  // namespace InferenceEngine
diff --git a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp

index 628ea6a..d53c6e4 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_istreams_executor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/plugin_api/threading/ie_thread_affinity.hpp b/inference-engine/src/plugin_api/threading/ie_thread_affinity.hpp

index 8ad5e97..a2163f9 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_thread_affinity.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_thread_affinity.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/plugin_api/threading/ie_thread_local.hpp b/inference-engine/src/plugin_api/threading/ie_thread_local.hpp

index 3acee43..2a5da46 100644 (file)
--- a/inference-engine/src/plugin_api/threading/ie_thread_local.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_thread_local.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt

index c3ad1e5..5943b29 100644 (file)
--- a/inference-engine/src/preprocessing/CMakeLists.txt
+++ b/inference-engine/src/preprocessing/CMakeLists.txt
@@ -189,6 +189,6 @@ ie_developer_export_targets(${TARGET_NAME})
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp

index db46c1b..cf121f4 100644 (file)
--- a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
+++ b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
@@ -851,139 +851,17 @@ void calcRowLinear_32F(float *dst[],
  }
  
  //------------------------------------------------------------------------------
-namespace calcRowArea {
-// vertical pass
-template<typename T, typename A, typename I, typename W>
-static inline void downy(const T *src[], int inWidth, const MapperUnit<A, I>& ymap, A yalpha,
-                         W vbuf[]) {
-    int y_1st = ymap.index0;
-    int ylast = ymap.index1 - 1;
-
-    // yratio > 1, so at least 2 rows
-    GAPI_DbgAssert(y_1st < ylast);
-
-    // 1st and last rows
-    {
-        int w = 0;
-
-    #if CV_SIMD128
-        if (std::is_same<T, uint8_t>::value) {
-            for (; w <= inWidth - 8; w += 8) {
-                v_uint16x8 vsrc0 = v_load_expand(reinterpret_cast<const uint8_t*>(& src[0][w]));
-                v_uint16x8 vsrc1 = v_load_expand(reinterpret_cast<const uint8_t*>(& src[ylast - y_1st][w]));
-                v_uint16x8 vres = v_mulhi(vsrc0 << 8, static_cast<Q0_16>(ymap.alpha0)) +
-                                  v_mulhi(vsrc1 << 8, static_cast<Q0_16>(ymap.alpha1));
-                v_store(reinterpret_cast<Q8_8*>(& vbuf[w]), vres);
-            }
-        }
-    #endif
-
-        for (; w < inWidth; w++) {
-            vbuf[w] = mulas(ymap.alpha0, src[0][w])
-                    + mulas(ymap.alpha1, src[ylast - y_1st][w]);
-        }
-    }
-
-    // inner rows (if any)
-    for (int i = 1; i < ylast - y_1st; i++) {
-        int w = 0;
-
-    #if CV_SIMD128
-        if (std::is_same<T, uint8_t>::value) {
-            for (; w <= inWidth - 8; w += 8) {
-                v_uint16x8 vsrc = v_load_expand(reinterpret_cast<const uint8_t*>(& src[i][w]));
-                v_uint16x8 vres = v_load(reinterpret_cast<Q8_8*>(& vbuf[w]));
-                vres = vres + v_mulhi(vsrc << 8, static_cast<Q0_16>(yalpha));
-                v_store(reinterpret_cast<Q8_8*>(& vbuf[w]), vres);
-            }
-        }
-    #endif
-
-        for (; w < inWidth; w++) {
-            vbuf[w] += mulas(yalpha, src[i][w]);
-        }
-    }
-}
-
-// horizontal pass
-template<typename T, typename A, typename I, typename W>
-static inline void downx(T dst[], int outWidth, int xmaxdf, const I xindex[], const A xalpha[],
-                         const W vbuf[]) {
-#define HSUM(xmaxdf) \
-    for (int x = 0; x < outWidth; x++) { \
-        int      index =  xindex[x]; \
-        const A *alpha = &xalpha[x * xmaxdf]; \
-\
-        W sum = 0; \
-        for (int i = 0; i < xmaxdf; i++) { \
-            sum += mulaw(alpha[i], vbuf[index + i]); \
-        } \
-\
-        dst[x] = convert_cast<T>(sum); \
-    }
-
-    if (2 == xmaxdf) {
-        HSUM(2);
-    } else if (3 == xmaxdf) {
-        HSUM(3);
-    } else if (4 == xmaxdf) {
-        HSUM(4);
-    } else if (5 == xmaxdf) {
-        HSUM(5);
-    } else if (6 == xmaxdf) {
-        HSUM(6);
-    } else if (7 == xmaxdf) {
-        HSUM(7);
-    } else if (8 == xmaxdf) {
-        HSUM(8);
-    } else {
-        HSUM(xmaxdf);
-    }
-#undef HSUM
-}
-}  // namespace calcRowArea
-
-template<typename T, typename A, typename I, typename W>
-static void calcRowArea_impl_sse4(T dst[], const T *src[], const Size& inSz, const Size& outSz,
-    A yalpha, const MapperUnit<A, I>& ymap, int xmaxdf, const I xindex[], const A xalpha[],
-    W vbuf[]) {
-    bool xRatioEq1 = inSz.width  == outSz.width;
-    bool yRatioEq1 = inSz.height == outSz.height;
-
-    if (!yRatioEq1 && !xRatioEq1) {
-        calcRowArea::downy(src, inSz.width, ymap, yalpha, vbuf);
-        calcRowArea::downx(dst, outSz.width, xmaxdf, xindex, xalpha, vbuf);
-
-    } else if (!yRatioEq1) {
-        GAPI_DbgAssert(xRatioEq1);
-        calcRowArea::downy(src, inSz.width, ymap, yalpha, vbuf);
-        for (int x = 0; x < outSz.width; x++) {
-            dst[x] = convert_cast<T>(vbuf[x]);
-        }
-
-    } else if (!xRatioEq1) {
-        GAPI_DbgAssert(yRatioEq1);
-        for (int w = 0; w < inSz.width; w++) {
-            vbuf[w] = convert_cast<W>(src[0][w]);
-        }
-        calcRowArea::downx(dst, outSz.width, xmaxdf, xindex, xalpha, vbuf);
-
-    } else {
-        GAPI_DbgAssert(xRatioEq1 && yRatioEq1);
-        memcpy(dst, src[0], outSz.width * sizeof(T));
-    }
-}
  
  void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, const Size& outSz,
      Q0_16 yalpha, const MapperUnit8U &ymap, int xmaxdf, const short xindex[], const Q0_16 xalpha[],
      Q8_8 vbuf[]) {
-    calcRowArea_impl_sse4(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
+    calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
  }
  
  void calcRowArea_32F(float dst[], const float *src[], const Size& inSz, const Size& outSz,
      float yalpha, const MapperUnit32F& ymap, int xmaxdf, const int xindex[], const float xalpha[],
      float vbuf[]) {
-    calcRowArea_impl_sse4(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
+    calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf);
  }
  
  //------------------------------------------------------------------------------
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp

index a5513e0..667e9d6 100644 (file)
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp
@@ -1244,7 +1244,67 @@ static void calcAreaRow(const cv::gapi::fluid::View& in, cv::gapi::fluid::Buffer
  
          auto dst = out.OutLine<T>(l);
  
-    #ifdef HAVE_SSE
+        #ifdef HAVE_AVX512
+        if (with_cpu_x86_avx512f()) {
+            if (std::is_same<T, uchar>::value) {
+                avx512::calcRowArea_8U(reinterpret_cast<uchar*>(dst),
+                                       reinterpret_cast<const uchar**>(src),
+                                       inSz, outSz,
+                                       static_cast<Q0_16>(ymapper.alpha),
+                                       reinterpret_cast<const MapperUnit8U&>(ymap),
+                                       xmaxdf[0],
+                                       reinterpret_cast<const short*>(xindex),
+                                       reinterpret_cast<const Q0_16*>(xalpha),
+                                       reinterpret_cast<Q8_8*>(vbuf));
+                continue;  // next l = 0, ..., lpi-1
+            }
+
+            if (std::is_same<T, float>::value) {
+                avx512::calcRowArea_32F(reinterpret_cast<float*>(dst),
+                                        reinterpret_cast<const float**>(src),
+                                        inSz, outSz,
+                                        static_cast<float>(ymapper.alpha),
+                                        reinterpret_cast<const MapperUnit32F&>(ymap),
+                                        xmaxdf[0],
+                                        reinterpret_cast<const int*>(xindex),
+                                        reinterpret_cast<const float*>(xalpha),
+                                        reinterpret_cast<float*>(vbuf));
+                continue;
+            }
+        }
+        #endif  // HAVE_AVX512
+
+        #ifdef HAVE_AVX2
+        if (with_cpu_x86_avx2()) {
+            if (std::is_same<T, uchar>::value) {
+                avx::calcRowArea_8U(reinterpret_cast<uchar*>(dst),
+                                    reinterpret_cast<const uchar**>(src),
+                                    inSz, outSz,
+                                    static_cast<Q0_16>(ymapper.alpha),
+                                    reinterpret_cast<const MapperUnit8U&>(ymap),
+                                    xmaxdf[0],
+                                    reinterpret_cast<const short*>(xindex),
+                                    reinterpret_cast<const Q0_16*>(xalpha),
+                                    reinterpret_cast<Q8_8*>(vbuf));
+                continue;  // next l = 0, ..., lpi-1
+            }
+
+            if (std::is_same<T, float>::value) {
+                avx::calcRowArea_32F(reinterpret_cast<float*>(dst),
+                                     reinterpret_cast<const float**>(src),
+                                     inSz, outSz,
+                                     static_cast<float>(ymapper.alpha),
+                                     reinterpret_cast<const MapperUnit32F&>(ymap),
+                                     xmaxdf[0],
+                                     reinterpret_cast<const int*>(xindex),
+                                     reinterpret_cast<const float*>(xalpha),
+                                     reinterpret_cast<float*>(vbuf));
+                continue;
+            }
+        }
+        #endif  // HAVE_AVX2
+
+        #ifdef HAVE_SSE
          if (with_cpu_x86_sse42()) {
              if (std::is_same<T, uchar>::value) {
                  calcRowArea_8U(reinterpret_cast<uchar*>(dst),
@@ -1272,7 +1332,7 @@ static void calcAreaRow(const cv::gapi::fluid::View& in, cv::gapi::fluid::Buffer
                  continue;
              }
          }
-    #endif  // HAVE_SSE
+        #endif  // HAVE_SSE
  
          // vertical pass
          int y_1st = ymap.index0;
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp

index fd2509e..a5c806b 100644 (file)
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp
@@ -1,7 +1,3 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
  #ifndef IE_PREPROCESS_GAPI_KERNELS_SIMD_IMPL_H
  #define IE_PREPROCESS_GAPI_KERNELS_SIMD_IMPL_H
  
diff --git a/inference-engine/src/transformations/CMakeLists.txt b/inference-engine/src/transformations/CMakeLists.txt

index d9b54a7..f7f5f85 100644 (file)
--- a/inference-engine/src/transformations/CMakeLists.txt
+++ b/inference-engine/src/transformations/CMakeLists.txt
@@ -33,7 +33,6 @@ target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR})
  target_include_directories(${TARGET_NAME} PUBLIC ${IE_MAIN_SOURCE_DIR}/include)
  
  add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
-add_clang_format_target(${TARGET_NAME}_clang_format FOR_TARGETS ${TARGET_NAME})
  
  # developer package
  
@@ -42,6 +41,6 @@ ie_developer_export_targets(${TARGET_NAME})
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
-        ARCHIVE DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
+        ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core
          LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core)
diff --git a/inference-engine/src/transformations/src/transformations/pull_transpose_through_fq.cpp b/inference-engine/src/transformations/src/transformations/pull_transpose_through_fq.cpp

index 3b0e095..f03c05c 100644 (file)
--- a/inference-engine/src/transformations/src/transformations/pull_transpose_through_fq.cpp
+++ b/inference-engine/src/transformations/src/transformations/pull_transpose_through_fq.cpp
@@ -39,7 +39,7 @@ void ngraph::pass::PullTransposeThroughFQUp::pull_transpose_through_fq() {
  
          auto input_shape = fq->input(0).get_source_output().get_shape();
  
-        std::vector<std::shared_ptr<ngraph::Node> > fq_inputs;
+        ngraph::OutputVector fq_inputs;
          for (size_t i = 0; i < fq->inputs().size(); ++i) {
              std::shared_ptr<ngraph::Node> fq_input;
              fq_input = fq->input(i).get_source_output().get_node_shared_ptr();
@@ -52,11 +52,11 @@ void ngraph::pass::PullTransposeThroughFQUp::pull_transpose_through_fq() {
                  fq_input = std::make_shared<ngraph::opset1::Unsqueeze>(fq_input,
                                                                         opset1::Constant::create(element::i64, Shape{unsqueeze_axes.size()}, unsqueeze_axes));
              }
-            fq_input = transpose->copy_with_new_args({fq_input, const_order});
+            fq_input = transpose->copy_with_new_inputs({fq_input, const_order});
              fq_inputs.push_back(fq_input);
          }
  
-        auto new_fq = fq->copy_with_new_args(fq_inputs);
+        auto new_fq = fq->copy_with_new_inputs(fq_inputs);
          new_fq->set_friendly_name(fq->get_friendly_name());
          ngraph::replace_node(transpose, new_fq);
  
@@ -65,4 +65,4 @@ void ngraph::pass::PullTransposeThroughFQUp::pull_transpose_through_fq() {
  
      auto m = std::make_shared<ngraph::pattern::Matcher>(transpose, "PullTransposeThroughFQUp");
      this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE);
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/src/transformations/src/transformations/utils/utils.cpp b/inference-engine/src/transformations/src/transformations/utils/utils.cpp

index 161e9f3..6c9d04f 100644 (file)
--- a/inference-engine/src/transformations/src/transformations/utils/utils.cpp
+++ b/inference-engine/src/transformations/src/transformations/utils/utils.cpp
@@ -23,6 +23,8 @@ bool get_single_value(const std::shared_ptr<op::Constant>& const_node, float& va
          return util::normalize_single_value(const_node->get_vector<float16>(), value);
      case element::Type_t::f32:
          return util::normalize_single_value(const_node->get_vector<float>(), value);
+    case element::Type_t::bf16:
+        return util::normalize_single_value(const_node->get_vector<bfloat16>(), value);
      case element::Type_t::f64:
          return util::normalize_single_value(const_node->get_vector<double>(), value);
      case element::Type_t::i8:
diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt

index 6521529..43e9308 100644 (file)
--- a/inference-engine/src/vpu/common/CMakeLists.txt
+++ b/inference-engine/src/vpu/common/CMakeLists.txt
@@ -47,6 +47,8 @@ function(add_common_target TARGET_NAME STATIC_IE)
  
          ie_developer_export_targets(${TARGET_NAME})
      endif()
+
+    target_link_libraries(${TARGET_NAME} PUBLIC ${NGRAPH_LIBRARIES} inference_engine_transformations)
  endfunction()
  
  add_common_target("vpu_common_lib" FALSE)
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/dynamic_shape_resolver.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/dynamic_shape_resolver.hpp

new file mode 100644 (file)

index 0000000..50c8dfc
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/dynamic_shape_resolver.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/op/op.hpp"
+
+#include <memory>
+
+namespace ngraph { namespace op {
+
+class DynamicShapeResolver : public Op {
+public:
+    static constexpr NodeTypeInfo type_info{"DynamicShapeResolver", 1};
+    const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    DynamicShapeResolver(const Output<Node>& tensorWithData, const Output<Node>& tensorWithDims);
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
+
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+};
+
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp

new file mode 100644 (file)

index 0000000..8d8dcb5
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/operations/static_shape_nonzero.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/node.hpp>
+#include <ngraph/op/op.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace ngraph {
+namespace op {
+
+class StaticShapeNonZero : public Op {
+public:
+    static constexpr NodeTypeInfo type_info{"StaticShapeNonZero", 1};
+    const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    explicit StaticShapeNonZero(const Output<ngraph::Node>& input);
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const override;
+
+    bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
+};
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp

new file mode 100644 (file)

index 0000000..855cbcf
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+#include <vector>
+#include <memory>
+
+namespace ngraph {
+namespace pass {
+
+class DynamicToStaticShape : public FunctionPass {
+public:
+    DynamicToStaticShape() = default;
+
+    bool run_on_function(std::shared_ptr<ngraph::Function> function) override;
+
+private:
+    bool validateStaticShapes(std::shared_ptr<ngraph::Function> function) const;
+};
+
+}  // namespace pass
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp

new file mode 100644 (file)

index 0000000..15a7f53
--- /dev/null
+++ b/inference-engine/src/vpu/common/include/vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+#include <vector>
+#include <memory>
+
+namespace ngraph {
+namespace pass {
+
+class DynamicToStaticShapeNonZero : public GraphRewrite {
+public:
+    DynamicToStaticShapeNonZero();
+};
+
+}  // namespace pass
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/error.hpp b/inference-engine/src/vpu/common/include/vpu/utils/error.hpp

index 8179535..ff8c921 100644 (file)
--- a/inference-engine/src/vpu/common/include/vpu/utils/error.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/error.hpp
@@ -36,19 +36,19 @@ void throwFormat(const char* fileName, int lineNumber, const char* messageFormat
  }  // namespace details
  
  #define VPU_THROW_FORMAT(...)                                                         \
-    vpu::details::throwFormat<details::VPUException>(__FILE__, __LINE__, __VA_ARGS__)
+    vpu::details::throwFormat<vpu::details::VPUException>(__FILE__, __LINE__, __VA_ARGS__)
  
  #define VPU_THROW_UNLESS(condition, ...)                                                       \
      do {                                                                                       \
          if (!(condition)) {                                                                    \
-            vpu::details::throwFormat<details::VPUException>(__FILE__, __LINE__, __VA_ARGS__); \
+            vpu::details::throwFormat<vpu::details::VPUException>(__FILE__, __LINE__, __VA_ARGS__); \
          }                                                                                      \
      } while (false)
  
  #define VPU_THROW_UNSUPPORTED_UNLESS(condition, ...)                                                        \
      do {                                                                                                    \
          if (!(condition)) {                                                                                 \
-            vpu::details::throwFormat<details::UnsupportedLayerException>(__FILE__, __LINE__, __VA_ARGS__); \
+            vpu::details::throwFormat<vpu::details::UnsupportedLayerException>(__FILE__, __LINE__, __VA_ARGS__); \
          }                                                                                                   \
      } while (false)
  
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/ie_helpers.hpp b/inference-engine/src/vpu/common/include/vpu/utils/ie_helpers.hpp

index 270966e..1564bab 100644 (file)
--- a/inference-engine/src/vpu/common/include/vpu/utils/ie_helpers.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/ie_helpers.hpp
@@ -25,7 +25,7 @@ VPU_DECLARE_ENUM(LayoutPreference,
  InferenceEngine::Layout deviceLayout(InferenceEngine::Layout const& layout,
                                       LayoutPreference const& layoutPreference);
  
-ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in);
+ie::Blob::Ptr convertBlobFP32toFP16(const ie::Blob::CPtr& in);
  
  ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& original);
  ie::Blob::Ptr copyBlob(const ie::Blob::Ptr& in, ie::Layout outLayout, void* ptr = nullptr);
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp

new file mode 100644 (file)

index 0000000..ebe0a62
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/dynamic_shape_resolver.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+namespace ngraph { namespace op {
+
+constexpr NodeTypeInfo DynamicShapeResolver::type_info;
+
+DynamicShapeResolver::DynamicShapeResolver(const Output<Node>& tensorWithData, const Output<Node>& tensorWithDims)
+    : Op(OutputVector{tensorWithData, tensorWithDims}) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<Node> DynamicShapeResolver::copy_with_new_args(const NodeVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<DynamicShapeResolver>(new_args.at(0), new_args.at(1));
+}
+
+void DynamicShapeResolver::validate_and_infer_types() {
+    NODE_VALIDATION_CHECK(this, get_input_size() == 2, "(", get_friendly_name(), ") supports only ", 2, " inputs, but ", get_input_size(), " provided");
+    NODE_VALIDATION_CHECK(this, get_input_partial_shape(0).is_static(), "(", get_friendly_name(), ") does not support dynamic shape for data tensor");
+    NODE_VALIDATION_CHECK(this, get_input_partial_shape(1).is_static(), "(", get_friendly_name(), ") does not support dynamic shape for dims tensor");
+
+    const auto& dimsElementType = get_input_element_type(1);
+    NODE_VALIDATION_CHECK(this, dimsElementType.is_integral_number() && dimsElementType.is_static(), "(", get_friendly_name(), ") supports only integral "
+        "number type for dims tensor, but ", dimsElementType, " provided");
+
+    const auto& dataShape = get_input_shape(0);
+    const auto& dimsShape = get_input_shape(1);
+    NODE_VALIDATION_CHECK(this, dimsShape.size() == 1 && dimsShape.front() == dataShape.size(), "(", get_friendly_name(), ") inputs shapes mismatch: first "
+        "input shape = ", dataShape, " second input shape = ", dimsShape, " but ", dataShape, " and ", Shape{dataShape.size()}, " are expected");
+
+    set_output_type(0, get_input_element_type(0), get_input_shape(0));
+}
+
+bool DynamicShapeResolver::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    return true;
+}
+
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp

new file mode 100644 (file)

index 0000000..3400e5b
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_nonzero.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+
+namespace ngraph {
+namespace op {
+
+constexpr NodeTypeInfo StaticShapeNonZero::type_info;
+
+StaticShapeNonZero::StaticShapeNonZero(const Output<Node>& input)
+        : Op({input}) {
+    constructor_validate_and_infer_types();
+}
+
+void StaticShapeNonZero::validate_and_infer_types() {
+    NODE_VALIDATION_CHECK(this, get_input_size() == 1,
+                          "StaticShapeNonZero must have only 1 input, provided: ",
+                          get_input_size());
+
+    const auto& arg_shape = get_input_partial_shape(0);
+    NODE_VALIDATION_CHECK(this, arg_shape.is_static(),
+                          "StaticShapeNonZero doesn't support dynamic input shape");
+
+    const auto& input_et = get_input_element_type(0);
+    NODE_VALIDATION_CHECK(this,
+                          input_et.is_integral_number() || input_et.is_real(),
+                          "StaticShapeNonZero input data type needs to be a numeric type. Got: ",
+                          input_et);
+
+    const auto total_dim_size = Dimension(shape_size(arg_shape.to_shape()));
+    set_output_type(0, element::i64, {arg_shape.rank(), total_dim_size});
+    set_output_type(1, element::i64, {Dimension(2)});
+}
+
+std::shared_ptr<Node> StaticShapeNonZero::copy_with_new_args(
+        const NodeVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<StaticShapeNonZero>(new_args.at(0));
+}
+
+bool StaticShapeNonZero::visit_attributes(ngraph::AttributeVisitor& visitor) {
+    return true;
+}
+
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp

new file mode 100644 (file)

index 0000000..e7f92cf
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape.hpp"
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp"
+
+#include <vpu/utils/error.hpp>
+
+namespace ngraph {
+namespace pass {
+
+bool DynamicToStaticShape::run_on_function(std::shared_ptr<ngraph::Function> function) {
+    DynamicToStaticShapeNonZero().run_on_function(function);
+
+    return validateStaticShapes(function);
+}
+
+bool DynamicToStaticShape::validateStaticShapes(std::shared_ptr<ngraph::Function> function) const {
+    function->validate_nodes_and_infer_types();
+
+    for (const auto& node : function->get_ops()) {
+        for (const auto& output : node->get_outputs()) {
+            const auto outputPartialShape = output.get_partial_shape();
+            VPU_THROW_UNLESS(outputPartialShape.is_static(),
+                             "DynamicToStaticShape pass: after all the transformations there is "
+                             "still dynamism in the network. First met node with dynamic output: "
+                             "%s (type: %s)", node->get_friendly_name(), node->get_type_name());
+            return false;
+        }
+    }
+    return true;
+}
+
+}  // namespace pass
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp

new file mode 100644 (file)

index 0000000..604dd90
--- /dev/null
+++ b/inference-engine/src/vpu/common/src/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp"
+
+#include <vpu/ngraph/operations/static_shape_nonzero.hpp>
+#include <vpu/ngraph/operations/dynamic_shape_resolver.hpp>
+
+#include <ngraph/opsets/opset3.hpp>
+
+#include <memory>
+
+namespace ngraph {
+namespace pass {
+
+DynamicToStaticShapeNonZero::DynamicToStaticShapeNonZero() {
+    // We don't set strict_mode when use pattern Matcher,
+    // so we can set any type and shape for input.
+    auto inputWithAnyTypeAndShape = std::make_shared<pattern::op::Label>(
+            element::dynamic, PartialShape{});
+    auto nonZeroPattern = std::make_shared<ngraph::op::NonZero>(inputWithAnyTypeAndShape);
+
+    ngraph::graph_rewrite_callback callback = [](pattern::Matcher& matcher) {
+        const auto nonZero = std::dynamic_pointer_cast<ngraph::opset3::NonZero>(matcher.get_match_root());
+        if (!nonZero) {
+            return false;
+        }
+
+        auto staticShapeNonZero = std::make_shared<ngraph::op::StaticShapeNonZero>(
+                nonZero->input(0).get_source_output());
+        staticShapeNonZero->set_friendly_name(nonZero->get_friendly_name() + "/static_shape");
+
+        auto dynamicShapeResolver = std::make_shared<ngraph::op::DynamicShapeResolver>(
+                staticShapeNonZero->output(0), staticShapeNonZero->output(1));
+        dynamicShapeResolver->set_friendly_name(nonZero->get_friendly_name() + "/resolve_shape");
+
+        ngraph::replace_node(matcher.get_match_root(), dynamicShapeResolver);
+        return true;
+    };
+
+    const auto matcher = std::make_shared<ngraph::pattern::Matcher>(
+            nonZeroPattern, "DynamicToStaticShapeNonZero");
+    this->add_matcher(matcher, callback, PassProperty::CHANGE_DYNAMIC_STATE);
+}
+
+}  // namespace pass
+}  // namespace ngraph
diff --git a/inference-engine/src/vpu/common/src/utils/ie_helpers.cpp b/inference-engine/src/vpu/common/src/utils/ie_helpers.cpp

index 3bc19c0..dc46aea 100644 (file)
--- a/inference-engine/src/vpu/common/src/utils/ie_helpers.cpp
+++ b/inference-engine/src/vpu/common/src/utils/ie_helpers.cpp
@@ -40,16 +40,13 @@ InferenceEngine::Layout deviceLayout(InferenceEngine::Layout const& layout,
      return layout;
  }
  
-ie::Blob::Ptr getBlobFP16(const ie::Blob::Ptr& in) {
-    IE_PROFILING_AUTO_SCOPE(getBlobFP16);
+ie::Blob::Ptr convertBlobFP32toFP16(const ie::Blob::CPtr& in) {
+    IE_PROFILING_AUTO_SCOPE(convertBlobFP32toFP16);
  
      auto inDesc = in->getTensorDesc();
  
      auto precision = inDesc.getPrecision();
  
-    if (precision == ie::Precision::FP16)
-        return in;
-
      if (precision != ie::Precision::FP32) {
          VPU_THROW_EXCEPTION << "Unsupported precision " << precision.name();
      }
diff --git a/inference-engine/src/vpu/custom_kernels/grn.cl b/inference-engine/src/vpu/custom_kernels/grn.cl

index 96df041..35379a1 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/grn.cl
+++ b/inference-engine/src/vpu/custom_kernels/grn.cl
@@ -1,6 +1,6 @@
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
+// Define if runtime supports it. MX runtime is compatible
  #define USE_MANUAL_DMA 1
  
  #if defined (USE_MANUAL_DMA)
diff --git a/inference-engine/src/vpu/custom_kernels/mvn.cl b/inference-engine/src/vpu/custom_kernels/mvn.cl

index ab595bb..16b4dc9 100644 (file)
--- a/inference-engine/src/vpu/custom_kernels/mvn.cl
+++ b/inference-engine/src/vpu/custom_kernels/mvn.cl
@@ -1,6 +1,6 @@
  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  
-// Define if runtime supports it. MX runtime is compatible, KMB is in WIP state
+// Define if runtime supports it. MX runtime is compatible
  #define USE_MANUAL_DMA 1
  
  // Set to 1 if only output is zerroed before kernel execution
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/backend/backend.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/backend/backend.hpp

index 87654a8..371504a 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/backend/backend.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/backend/backend.hpp
@@ -4,17 +4,18 @@
  
  #pragma once
  
-#include <memory>
+#include <vpu/graph_transformer.hpp>
+
+#include <vpu/model/model.hpp>
+#include <vpu/backend/blob_format.hpp>
+#include <ie_layers.h>
+
  #include <string>
+#include <memory>
  #include <set>
  #include <vector>
  #include <utility>
  
-#include <ie_layers.h>
-
-#include <vpu/graph_transformer.hpp>
-#include <vpu/model/model.hpp>
-
  namespace vpu {
  
  namespace ie = InferenceEngine;
@@ -38,6 +39,23 @@ private:
              std::pair<char*, size_t>& blobHeader,
              int& numActiveStages);
  
+    int serializeIOInfoSection(
+            const Model& model,
+            DataUsage dataUsage,
+            BlobSerializer& blobSerializer);
+
+    void serializeConstData(
+            const Model& model,
+            const mv_blob_header& blobHdr,
+            std::vector<char>& blob);
+
+    void serializeConstShapes(
+            const Model& model,
+            const mv_blob_header& blobHdr,
+            std::vector<char>& blob);
+
+    ElfN_Ehdr createElfHeader();
+
      void getMetaData(
              const Model& model,
              const std::vector<ie::CNNLayerPtr>& allLayers,
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp

index 90ba3af..cc3847a 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/frontend/frontend.hpp
@@ -146,6 +146,10 @@ public:
      void parseOneHot(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
      void parseExpPriorGridGenerator(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
      void parseExpGenerateProposals(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseScatterUpdate(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseExpTopKROIs(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseNonZero(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
+    void parseROIAlign(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const;
  
      //
      // Special layers
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp

index 6b1919f..d3449cf 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@@ -184,7 +184,7 @@ std::set<std::string> getSupportedLayers(
  //
  
  const uint32_t BLOB_MAGIC_NUMBER  = 9709;
-const uint32_t BLOB_VERSION_MAJOR = 5;
+const uint32_t BLOB_VERSION_MAJOR = 6;
  const uint32_t BLOB_VERSION_MINOR = 0;
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp

index d6006f8..66c44d1 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/allocator.hpp
@@ -77,6 +77,7 @@ public:
       * Allocates memory for single data node
       */
      bool allocateData(const Data& data);
+    ShapeLocation allocateConstShape(Data& data);
      void freeData(const Data& data, DeallocationMode mode = DeallocationMode::JustFree);
  
      void selfCheck();
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/tiling.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/tiling.hpp

index 8954e63..63e3e6a 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/tiling.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/tiling.hpp
@@ -175,6 +175,39 @@ struct HwConvTileInfo final {
      double cost = std::numeric_limits<double>::max();
  };
  
+//
+// Structs for split
+//
+
+struct Slice {
+    int start;
+    size_t size;
+
+    Slice(int start, size_t size) :
+            start(start),
+            size(size) {}
+};
+
+struct DataSlice {
+    Data data;
+    Slice slice;
+
+    DataSlice(Data data, Slice slice) :
+            data(std::move(data)),
+            slice(slice) {}
+};
+
+using DataSlices = std::vector<DataSlice>;
+
+struct ConvTileSlice {
+    HwConvTileInfo tile;
+    Slice slice;
+
+    ConvTileSlice(HwConvTileInfo tile, Slice slice) :
+            tile(tile),
+            slice(slice) {}
+};
+
  void printTo(std::ostream& os, const HwConvTileInfo& convTiles);
  void printTo(DotLabel& lbl, const HwConvTileInfo& convTiles);
  
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp

index 045588c..529587b 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/hw/utility.hpp
@@ -131,28 +131,6 @@ HwPaddingInfo getHwPaddingInfo(
  void printTo(std::ostream& os, const HwPaddingInfo& hwPad);
  void printTo(DotLabel& lbl, const HwPaddingInfo& hwPad);
  
-
-//
-// HwWeightsContent
-//
-
-class HwWeightsContent final : public CalculatedDataContent {
-public:
-    HwWeightsContent(
-            const DataContent::Ptr& origContent,
-            const DataDesc& origWeightsDesc,
-            int numInputChannels,
-            int channelStartIndex = 0);
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override;
-
-private:
-    DataDesc _origWeightsDesc;
-    int _numInputChannels = 0;
-    int _channelStartIndex = 0;
-};
-
  //
  // calculateHwBufferSize
  //
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/sw/utility.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/sw/utility.hpp

index 023c81e..3965a8b 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/sw/utility.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/sw/utility.hpp
@@ -83,18 +83,6 @@ void deconv_to_conv(const T* src, T* dst, const DataDesc& desc) {
  }
  
  //
-// DefaultSwWeightsContent
-//
-
-class DefaultSwWeightsContent final : public CalculatedDataContent {
-public:
-    explicit DefaultSwWeightsContent(const DataContent::Ptr& origContent);
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override;
-};
-
-//
  // getOneOfSingleNextStage
  //
  
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp

index ece7c1a..e4d656f 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data.hpp
@@ -4,21 +4,22 @@
  
  #pragma once
  
-#include <memory>
-#include <string>
-#include <functional>
-#include <vector>
-
-#include <ie_data.h>
-#include <ie_blob.h>
-
  #include <vpu/model/base.hpp>
  #include <vpu/model/edges.hpp>
  #include <vpu/model/data_desc.hpp>
+#include <vpu/model/data_contents/data_content.hpp>
  #include <vpu/backend/blob_serializer.hpp>
  #include <vpu/utils/enums.hpp>
  #include <vpu/utils/func_ref.hpp>
  
+#include <ie_data.h>
+#include <ie_blob.h>
+
+#include <memory>
+#include <string>
+#include <functional>
+#include <vector>
+
  namespace vpu {
  
  namespace ie = InferenceEngine;
@@ -46,15 +47,15 @@ VPU_DECLARE_ENUM(DataUsage,
  )
  
  //
-// DataLocation
+// Location
  //
  
  //
-// Describes where Data object is located.
+// Describes where particular data or shape is located.
  //
  
  // Must be synchronized with MvTensor
-VPU_DECLARE_ENUM(DataLocation,
+VPU_DECLARE_ENUM(Location,
      None = 0,
      Input = 1,
      Output = 2,
@@ -67,75 +68,25 @@ VPU_DECLARE_ENUM(MemoryType,
      DDR,
      CMX)
  
-//
-// DataContent
-//
-
-//
-// Content of the Const Data object.
-//
-
-class DataContent {
-public:
-    using Ptr = std::shared_ptr<DataContent>;
-
-    virtual ~DataContent();
-
-    // TYPED pointer
-    template <typename T>
-    const T* get() const {
-        return static_cast<const T*>(getRaw());
-    }
-
-    const DataDesc& desc() const {
-        return _desc;
-    }
-
-private:
-    // RAW pointer
-    virtual const void* getRaw() const = 0;
-
-private:
-    DataDesc _desc;
-
-    friend ModelObj;
+struct DataLocation final {
+    Location location;
+    int offset;
  };
  
-//
-// Data content that is calculated on the fly, using lazy calculation:
-//
-//   * It performs calculation on the first call and stores it in internal buffer.
-//   * Next access will return the pointer to calculated buffer.
-//
-class CalculatedDataContent : public DataContent {
-public:
-    CalculatedDataContent() = default;
-    explicit CalculatedDataContent(const SmallVector<DataContent::Ptr, 2>& baseContents) : _baseContents(baseContents) {}
-
-private:
-    const void* getRaw() const override;
-
-    virtual size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>& baseContents) const;
-    virtual void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const = 0;
-
-private:
-    mutable SmallVector<DataContent::Ptr, 2> _baseContents;
-    mutable std::vector<uint8_t> _temp;
+static constexpr DataLocation defaultDataLocation = {
+    Location::None, 0
  };
  
-DataContent::Ptr ieBlobContent(
-        const ie::Blob::Ptr& blob,
-        int repeat = 1);
-
-DataContent::Ptr replicateContent(float val, int count);
-DataContent::Ptr replicateContent(const DataContent::Ptr& origContent, int count);
-
-DataContent::Ptr scaleContent(const DataContent::Ptr& origContent, float scale);
+struct ShapeLocation final {
+    Location dimsLocation;
+    int dimsOffset;
+    Location stridesLocation;
+    int stridesOffset;
+};
  
-// The function scales the major dimension of 4D origContent
-DataContent::Ptr scaledChannelContent(
-        const DataContent::Ptr& origContent,
-        const DataContent::Ptr& scaleContent);
+static constexpr ShapeLocation defaultShapeLocation = {
+        Location::None, 0, Location::None, 0
+};
  
  //
  // DataNode
@@ -189,8 +140,8 @@ class DataNode final :
      //
  
      VPU_MODEL_ATTRIBUTE(MemoryType, memReqs, MemoryType::DDR)
-    VPU_MODEL_ATTRIBUTE(DataLocation, location, DataLocation::None)
-    VPU_MODEL_ATTRIBUTE(int, memoryOffset, 0)
+    VPU_MODEL_ATTRIBUTE(DataLocation, dataLocation, defaultDataLocation)
+    VPU_MODEL_ATTRIBUTE(ShapeLocation, shapeLocation, defaultShapeLocation)
  
      //
      // Edges wrappers
@@ -282,19 +233,18 @@ public:
  
      void setMemReqs(MemoryType mem);
  
-    void setIOInfo(DataLocation location, int ioBufferOffset);
+    void setIOInfo(Location location, int ioBufferOffset);
  
-    void setAllocationInfo(DataLocation location, int memoryOffset);
+    void setDataAllocationInfo(const DataLocation& dataLocation);
+
+    void setShapeAllocationInfo(const ShapeLocation& shapeLocation);
  
      //
      // Backend utilities
      //
  
      // Serialize as-is for new MvTensor kernels that can work with ND data.
-    // If `newOrder` is not empty, it will be used instead of original and missing dimensions will be set to 1.
-    void serializeBuffer(
-            BlobSerializer& serializer,
-            DimsOrder newOrder = DimsOrder());
+    void serializeBuffer(BlobSerializer& serializer);
  
      void serializeIOInfo(BlobSerializer& serializer) const;
  
@@ -304,11 +254,6 @@ private:
              const DataDesc& storedDesc,
              const DimValues& storedStrides) const;
  
-    void serializeBufferImpl(
-            BlobSerializer& serializer,
-            const DataDesc& storedDesc,
-            const DimValues& storedStrides) const;
-
  private:
      inline DataNode() :
          _consumerEdges(&StageInputEdge::_posInData),
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/batch_norm_contents.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/batch_norm_contents.hpp

new file mode 100644 (file)

index 0000000..f5a474e
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/batch_norm_contents.hpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+//
+// BatchNormalizationWeightsContent
+//
+
+class BatchNormalizationWeightsContent final : public CalculatedDataContent {
+public:
+    BatchNormalizationWeightsContent(const DataContent::Ptr& origContent, float epsilon);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    float _epsilon;
+};
+
+//
+// BatchNormalizationBiasesContent
+//
+
+class BatchNormalizationBiasesContent final : public CalculatedDataContent {
+public:
+    BatchNormalizationBiasesContent(const DataContent::Ptr& origContent, const DataContent::Ptr& weightsContent);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataContent::CPtr _weightsContent;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/calculated_data_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/calculated_data_content.hpp

new file mode 100644 (file)

index 0000000..4e6d89d
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/calculated_data_content.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/data_content.hpp>
+
+#include <vpu/utils/small_vector.hpp>
+#include <vpu/model/data_desc.hpp>
+
+namespace vpu {
+
+//
+// Data content that is calculated on the fly, using lazy calculation:
+//
+//   * It performs calculation on the first call and stores it in internal buffer.
+//   * Next access will return the pointer to calculated buffer.
+//
+
+class CalculatedDataContent : public DataContent {
+public:
+    CalculatedDataContent() = default;
+
+private:
+    const void* getRaw() const override;
+
+    virtual void fillTempBuf(void *tempBuf) const = 0;
+
+private:
+    mutable std::vector<uint8_t> _temp;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/conv_weights_contents.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/conv_weights_contents.hpp

new file mode 100644 (file)

index 0000000..162f5ae
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/conv_weights_contents.hpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+//
+// ConvIm2ColWeightsContent
+//
+
+class ConvIm2ColWeightsContent final : public CalculatedDataContent {
+public:
+    explicit ConvIm2ColWeightsContent(const DataContent::Ptr& origContent, DataDesc desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _desc;
+};
+
+//
+// Conv3x3WeightsContent
+//
+
+class Conv3x3WeightsContent final : public CalculatedDataContent {
+public:
+    explicit Conv3x3WeightsContent(const DataContent::Ptr& origContent, DataDesc desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _desc;
+};
+
+//
+// ConvCHWWeightsContent
+//
+
+class ConvCHWWeightsContent final : public CalculatedDataContent {
+public:
+    explicit ConvCHWWeightsContent(const DataContent::Ptr& origContent, DataDesc desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _desc;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/data_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/data_content.hpp

new file mode 100644 (file)

index 0000000..ae2a74f
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/data_content.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/utils/numeric.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <memory>
+#include <cstdint>
+
+namespace vpu {
+
+class DataContent {
+public:
+    using Ptr = std::shared_ptr<DataContent>;
+    using CPtr = std::shared_ptr<const DataContent>;
+
+    virtual ~DataContent();
+
+    template<typename T>
+    const T* get() const {
+        return static_cast<const T*>(getRaw());
+    }
+
+    virtual size_t byteSize() const = 0;
+
+private:
+    virtual const void* getRaw() const = 0;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/deconvolution_contents.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/deconvolution_contents.hpp

new file mode 100644 (file)

index 0000000..321cb2a
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/deconvolution_contents.hpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+//
+// DeconvolutionToConvolutionContent
+//
+
+class DeconvolutionToConvolutionContent final : public CalculatedDataContent {
+public:
+    DeconvolutionToConvolutionContent(const DataContent::Ptr& origContent, const DataDesc& desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _desc;
+};
+
+//
+// DepthDeconvolutionCHWWeightsContent
+//
+
+class DepthDeconvolutionCHWWeightsContent final : public CalculatedDataContent {
+public:
+    DepthDeconvolutionCHWWeightsContent(
+            const DataContent::Ptr& origContent,
+            int KX, int KY, int channels);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    int _KX;
+    int _KY;
+    int _channels;
+};
+
+//
+// DepthDeconvolutionHWCWeightsContent
+//
+
+class DepthDeconvolutionHWCWeightsContent final : public CalculatedDataContent {
+public:
+    DepthDeconvolutionHWCWeightsContent(
+            const DataContent::Ptr& origContent,
+            int KX, int KY, int channels);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    int _KX;
+    int _KY;
+    int _channels;
+};
+
+//
+// DeconvolutionWeightsContent
+//
+
+class DeconvolutionWeightsContent final : public CalculatedDataContent {
+public:
+    DeconvolutionWeightsContent(
+            const DataContent::Ptr& origContent,
+            DataDesc desc,
+            int KX, int KY,
+            int IC, int OC);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataDesc _desc;
+    DataContent::CPtr _origContent;
+    mutable std::vector<fp16_t> _intermBuf;
+    int _KX;
+    int _KY;
+    int _IC;
+    int _OC;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/default_sw_weights_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/default_sw_weights_content.hpp

new file mode 100644 (file)

index 0000000..5aee9b9
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/default_sw_weights_content.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+class DefaultSwWeightsContent final : public CalculatedDataContent {
+public:
+    DefaultSwWeightsContent(const DataContent::Ptr& origContent, const DataDesc& desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void* tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _desc;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_const_data_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_const_data_content.hpp

new file mode 100644 (file)

index 0000000..3452c61
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_const_data_content.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+#include <vpu/middleend/hw/tiling.hpp>
+
+namespace vpu {
+
+class HwConstData final : public CalculatedDataContent {
+public:
+    HwConstData(
+            const DataContent::Ptr& origContent,
+            const DataDesc& origDesc,
+            const DataDesc& resDesc,
+            const std::map<Dim, Slice> dimSlices);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *outBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _origDesc;
+    DataDesc _resDesc;
+    std::map<Dim, Slice> _dimSlices;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_weights_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_weights_content.hpp

new file mode 100644 (file)

index 0000000..1f16c19
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/hw_weights_content.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+class HwWeightsContent final : public CalculatedDataContent {
+public:
+    HwWeightsContent(
+            const DataContent::Ptr& origContent,
+            const DataDesc& origWeightsDesc,
+            const DataDesc& resDesc,
+            int numInputChannels,
+            int channelStartIndex = 0);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    DataDesc _origDesc;
+    DataDesc _resDesc;
+    int _numInputChannels = 0;
+    int _channelStartIndex = 0;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/ie_blob_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/ie_blob_content.hpp

new file mode 100644 (file)

index 0000000..ea49e17
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/ie_blob_content.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/data_content.hpp>
+
+#include <vpu/model/data.hpp>
+
+namespace vpu {
+
+class IeBlobContent final : public DataContent {
+public:
+    IeBlobContent(const ie::Blob::CPtr& blob, DataType resultDataType);
+
+    size_t byteSize() const override;
+
+protected:
+    const void* getRaw() const override;
+
+private:
+    DataType _resultDataType;
+    mutable ie::Blob::CPtr _blob;
+    mutable ie::Blob::CPtr _blobFp16;
+};
+
+DataContent::Ptr ieBlobContent(const ie::Blob::CPtr& blob, DataType resultPrecision = DataType::FP16);
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/kernel_binary_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/kernel_binary_content.hpp

new file mode 100644 (file)

index 0000000..b696f93
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/kernel_binary_content.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/data_content.hpp>
+
+namespace vpu {
+
+class KernelBinaryContent final : public DataContent {
+public:
+    explicit KernelBinaryContent(const std::string& blob);
+
+    size_t byteSize() const override;
+
+protected:
+    const void* getRaw() const override;
+
+private:
+    std::string _blob;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mean_contents.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mean_contents.hpp

new file mode 100644 (file)

index 0000000..ded2dd1
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mean_contents.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+#include <ie_preprocess.hpp>
+
+namespace vpu {
+
+//
+// MeanImageContent
+//
+
+class MeanImageContent final : public CalculatedDataContent {
+public:
+    MeanImageContent(const ie::PreProcessInfo& info, const DataDesc& desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataDesc _desc;
+    ie::PreProcessInfo _info;
+};
+
+//
+// MeanValueContent
+//
+
+class MeanValueContent final : public CalculatedDataContent {
+public:
+    explicit MeanValueContent(const ie::PreProcessInfo& info);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    ie::PreProcessInfo _info;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/merge_fc_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/merge_fc_content.hpp

new file mode 100644 (file)

index 0000000..7018a47
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/merge_fc_content.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+class MergeFullyConnectedContentsByChannels final : public CalculatedDataContent {
+public:
+    MergeFullyConnectedContentsByChannels(const std::vector<DataContent::CPtr> contents,
+                                          const std::vector<DataDesc> inDescs,
+                                          const DataDesc& resDesc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *temp) const override;
+
+private:
+    std::vector<DataContent::CPtr> _contents;
+    std::vector<DataDesc> _inDescs;
+    DataDesc _resDesc;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mtcnn_blob_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mtcnn_blob_content.hpp

new file mode 100644 (file)

index 0000000..625aa58
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/mtcnn_blob_content.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/data_content.hpp>
+
+namespace vpu {
+
+class MTCNNBlobContent final : public DataContent {
+public:
+    explicit MTCNNBlobContent(std::vector<char> blob);
+
+    size_t byteSize() const override;
+
+protected:
+    const void* getRaw() const override;
+
+private:
+    std::vector<char> _blob;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/prelu_blob_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/prelu_blob_content.hpp

new file mode 100644 (file)

index 0000000..4cc985e
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/prelu_blob_content.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/data_content.hpp>
+#include <vpu/model/data_desc.hpp>
+
+#include <ie_blob.h>
+
+namespace vpu {
+
+class PReLUBlobContent final : public DataContent {
+public:
+    PReLUBlobContent(const InferenceEngine::Blob::CPtr& blob, const DataDesc& desc, int repeat);
+
+    size_t byteSize() const override;
+
+protected:
+    const void* getRaw() const override;
+
+private:
+    InferenceEngine::Blob::CPtr _blob;
+    int _repeat = 0;
+    DataDesc _desc;
+
+    mutable InferenceEngine::Blob::CPtr _blobFp16;
+    mutable std::vector<fp16_t> _tempFp16;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/priorbox_contents.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/priorbox_contents.hpp

new file mode 100644 (file)

index 0000000..110109c
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/priorbox_contents.hpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+//
+// PriorBoxContent
+//
+
+class PriorBoxContent final : public CalculatedDataContent {
+public:
+    PriorBoxContent(
+            const DataDesc& inDesc0,
+            const DataDesc& inDesc1,
+            const DataDesc& outDesc,
+            const ie::CNNLayerPtr &layer);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataDesc _inDesc0;
+    DataDesc _inDesc1;
+    DataDesc _outDesc;
+    ie::CNNLayerPtr _layer;
+};
+
+//
+// PriorBoxClusteredContent
+//
+
+class PriorBoxClusteredContent final : public CalculatedDataContent {
+public:
+    PriorBoxClusteredContent(
+            const DataDesc& inDesc0,
+            const DataDesc& inDesc1,
+            const DataDesc& outDesc,
+            const ie::CNNLayerPtr& layer);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataDesc _inDesc0;
+    DataDesc _inDesc1;
+    DataDesc _outDesc;
+    ie::CNNLayerPtr _layer;
+};
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/replicated_data_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/replicated_data_content.hpp

new file mode 100644 (file)

index 0000000..a6ed7de
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/replicated_data_content.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+class ReplicatedContent final : public CalculatedDataContent {
+public:
+    ReplicatedContent(float val, int count, const DataDesc& desc);
+
+    ReplicatedContent(DataContent::Ptr origContent, int count, const DataDesc& desc);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent = nullptr;
+    DataDesc _desc;
+    float _factor = 1.0f;
+    int _count = 0;
+};
+
+DataContent::Ptr replicateContent(float val, int count, const DataDesc& desc);
+DataContent::Ptr replicateContent(const DataContent::Ptr& origContent, int count, const DataDesc& desc);
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/scaled_content.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/scaled_content.hpp

new file mode 100644 (file)

index 0000000..56cd314
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_contents/scaled_content.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+class ScaledContent final : public CalculatedDataContent {
+public:
+    ScaledContent(const DataContent::Ptr& origContent, float scale);
+
+    size_t byteSize() const override;
+
+protected:
+    void fillTempBuf(void *tempBuf) const override;
+
+private:
+    DataContent::CPtr _origContent;
+    float _factor = 1.0f;
+};
+
+DataContent::Ptr scaleContent(const DataContent::Ptr& origContent, float scale);
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_desc.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_desc.hpp

index 083c380..e0f17fd 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_desc.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/data_desc.hpp
@@ -567,6 +567,8 @@ public:
  
      int totalDimSize() const;
  
+    int dimsByteSize() const { return numDims() * static_cast<int>(sizeof(int32_t)); }
+
      //
      // DimsOrder
      //
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp

index 84786c1..206cd4e 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/stage.hpp
@@ -144,6 +144,7 @@ VPU_DECLARE_ENUM(StageType,
      Exp = 101,
      Floor = 102,
      TopK = 104,
+    ScatterUpdate = 103,
      ReduceMin = 105,
      ExpDetectionOutput = 106,  // ExperimentalDetectronDetectionOutput
      NonMaxSuppression = 107,
@@ -160,7 +161,10 @@ VPU_DECLARE_ENUM(StageType,
      LoopStart = 119,
      LoopEnd = 120,
      ExpPriorGridGenerator = 121,
+    NonZero = 122,
+    ROIAlign = 123,
      ExpGenerateProposals = 124,
+    ExpTopKROIs = 125,
  )
  
  //
diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp

index e87d841..b1a2156 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/stage_builder.hpp
@@ -265,6 +265,16 @@ public:
              const DataVector& inputs,
              const Data& output);
  
+    Stage addScatterUpdateStage(
+            const Model& model,
+            const std::string& name,
+            const ie::CNNLayerPtr& layer,
+            const Data& input,
+            const Data& output,
+            const Data& indices,
+            const Data& updates,
+            const Data& axis);
+
      Stage addLoopStartStage(
          const Model& model,
          const std::string& name,
diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/dump_to_dot.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/dump_to_dot.cpp

index 8e9912d..ec3311e 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/backend/dump_to_dot.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/dump_to_dot.cpp
@@ -129,15 +129,15 @@ void BackEnd::dumpModelToDot(
              } else if (data->usage() == DataUsage::Temp) {
                  dataColor = "cyan";
              } else if (data->usage() == DataUsage::Intermediate) {
-                if (data->location() == DataLocation::BSS) {
+                if (data->dataLocation().location == Location::BSS) {
                      dataColor = "cyan";
-                } else if (data->location() == DataLocation::CMX) {
+                } else if (data->dataLocation().location == Location::CMX) {
                      dataColor = "magenta";
-                } else if (data->location() == DataLocation::Blob) {
+                } else if (data->dataLocation().location == Location::Blob) {
                      dataColor = "aquamarine";
-                } else if (data->location() == DataLocation::Input) {
+                } else if (data->dataLocation().location == Location::Input) {
                      dataColor = "green";
-                } else if (data->location() == DataLocation::Output) {
+                } else if (data->dataLocation().location == Location::Output) {
                      dataColor = "deepskyblue";
                  }
              }
@@ -179,8 +179,8 @@ void BackEnd::dumpModelToDot(
                      }
                  }
                  lbl.appendPair("memReqs", data->memReqs());
-                lbl.appendPair("location", data->location());
-                lbl.appendPair("memoryOffset", data->memoryOffset());
+                lbl.appendPair("location", data->dataLocation().location);
+                lbl.appendPair("memoryOffset", data->dataLocation().offset);
                  if (!data->attrs().empty()) {
                      lbl.appendPair("extraAttrs", data->attrs());
                  }
diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp

index a6536ed..d98e6e0 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/backend/serialize.cpp
@@ -4,9 +4,21 @@
  
  #include <vpu/backend/backend.hpp>
  
+#include <vpu/parsed_config.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/auto_scope.hpp>
+#include <vpu/utils/dot_io.hpp>
+#include <vpu/utils/file_system.hpp>
+#include <vpu/utils/numeric.hpp>
+
+#include <precision_utils.h>
+#include <details/caseless.hpp>
+#include <graph_tools.hpp>
+#include <description_buffer.hpp>
+#include <xml_parse_utils.h>
+
  #include <climits>
  #include <cstring>
-
  #include <string>
  #include <memory>
  #include <list>
@@ -25,169 +37,230 @@
  #include <iomanip>
  #include <atomic>
  
-#include <precision_utils.h>
-#include <details/caseless.hpp>
-#include <graph_tools.hpp>
-#include <description_buffer.hpp>
-#include <xml_parse_utils.h>
-
-#include <vpu/parsed_config.hpp>
-#include <vpu/compile_env.hpp>
-#include <vpu/backend/blob_format.hpp>
-#include <vpu/utils/auto_scope.hpp>
-#include <vpu/utils/dot_io.hpp>
-#include <vpu/utils/file_system.hpp>
-#include <vpu/utils/numeric.hpp>
-
  namespace vpu {
  
-void BackEnd::serialize(
+struct ModelStagesStat final {
+    bool hasHwStage;
+    bool hasShaveStage;
+    bool hasDmaStage;
+};
+
+int BackEnd::serializeIOInfoSection(
          const Model& model,
-        std::vector<char>& blob,
-        std::pair<char*, size_t>& blobHeader,
-        int& numActiveStages) {
-    VPU_PROFILE(serialize);
+        DataUsage dataUsage,
+        BlobSerializer& blobSerializer) {
+    VPU_INTERNAL_CHECK(dataUsage == DataUsage::Input || dataUsage == DataUsage::Output,
+        "serializeIOInfoSection was called with {} usage while only {} and {} usages are supported",
+        dataUsage, DataUsage::Input, DataUsage::Output);
  
-    const auto& env = CompileEnv::get();
+    int datasNumber = 0;
+
+    for (const auto& data : model->datas()) {
+        if (data->usage() != dataUsage) {
+            continue;
+        }
  
-    auto batchSize = model->batchSize();
-    auto usedMemory = model->attrs().get<UsedMemory>("usedMemory");
+        if (dataUsage == DataUsage::Input) {
+            VPU_INTERNAL_CHECK(data->producerEdge() == nullptr,
+                "serializeIOInfoSection failed on input data {}. Input must have no producer but actually it has: {} with type {}",
+                data->name(), data->producerEdge()->producer()->name(), data->producerEdge()->producer()->type());
+            VPU_INTERNAL_CHECK(data->numConsumers() != 0,
+                "serializeIOInfoSection failed on input data {}. Input must have at least one consumer but it doesn't ",
+                data->usage());
+        }
  
-    //
-    // Remove special stages from the stages list
-    //
+        if (dataUsage == DataUsage::Output) {
+            VPU_INTERNAL_CHECK(data->producerEdge() != nullptr,
+                "serializeIOInfoSection failed on output data {}. Output must have any producer but it doesn't",
+                data->usage());
+        }
  
-    bool hasHwStage = false;
-    bool hasShaveStage = false;
-    bool hasDmaStage = false;
+        VPU_INTERNAL_CHECK(data->parentDataEdge() == nullptr,
+            "serializeIOInfoSection failed on {} with usage {}. IO data must have no parentDatas but it does");
  
-    StageVector execStages;
-    execStages.reserve(model->numStages());
+        VPU_INTERNAL_CHECK(!data->attrs().has("ioIdx"),
+            "serializeIOInfoSection failed: IO data {} with usage {} doesn't have ioIdx attribute",
+            data->name(), data->usage());
  
-    for (const auto& stage : model->getStages()) {
-        if (stage->category() == StageCategory::Special) {
-            continue;
-        }
+        data->attrs().set("ioIdx", datasNumber);
  
-        if (stage->category() == StageCategory::HW) {
-            hasHwStage = true;
-        } else if (stage->category() == StageCategory::SHAVE) {
-            hasShaveStage = true;
-        } else if (stage->category() == StageCategory::DMA) {
-            hasDmaStage = true;
-        }
+        data->serializeIOInfo(blobSerializer);
  
-        execStages.emplace_back(stage);
+        ++datasNumber;
      }
  
-    numActiveStages = execStages.size();
+    return datasNumber;
+}
  
-    //
-    // I/O info sections
-    //
+ElfN_Ehdr BackEnd::createElfHeader() {
+    ElfN_Ehdr elfHdr = {};
+    elfHdr.e_ident[0] = 0x7f;
+    elfHdr.e_ident[1] = 'e';
+    elfHdr.e_ident[2] = 'l';
+    elfHdr.e_ident[3] = 'f';
+    for (int i = 4; i < 16; i++) {
+        elfHdr.e_ident[i] = 0;
+    }
+    elfHdr.e_type = 1;
+    elfHdr.e_machine = 2;
+    elfHdr.e_version = 2;
+    elfHdr.e_entry = 0;
+    elfHdr.e_phoff = 0;
+    elfHdr.e_shoff = 0;
+    elfHdr.e_ehsize = 8 * sizeof(elfHdr);
  
-    int numInputs = 0;
-    BlobSerializer inputInfoSerializer;
+    return elfHdr;
+}
+
+void BackEnd::serializeConstData(const Model& model, const mv_blob_header& blobHdr, std::vector<char>& blob) {
      for (const auto& data : model->datas()) {
-        if (data->usage() != DataUsage::Input) {
+        if (data->usage() != DataUsage::Const) {
              continue;
          }
  
          IE_ASSERT(data->producerEdge() == nullptr);
          IE_ASSERT(data->parentDataEdge() == nullptr);
          IE_ASSERT(data->numConsumers() != 0);
+        IE_ASSERT(data->dataLocation().location == Location::Blob);
  
-        IE_ASSERT(!data->attrs().has("ioIdx"));
-        data->attrs().set("ioIdx", numInputs);
-
-        data->serializeIOInfo(inputInfoSerializer);
+        const auto content = data->content();
+        IE_ASSERT(content != nullptr);
  
-        ++numInputs;
+        std::copy_n(content->get<uint8_t>(), content->byteSize(), blob.data() + blobHdr.const_data_section_offset + data->dataLocation().offset);
      }
+}
  
-    int numOutputs = 0;
-    BlobSerializer outputInfoSerializer;
+void BackEnd::serializeConstShapes(const Model& model, const mv_blob_header& blobHdr, std::vector<char>& blob) {
      for (const auto& data : model->datas()) {
-        if (data->usage() != DataUsage::Output) {
-            continue;
-        }
+        const auto serializeToBlob = [&data, &blob, &blobHdr](const BlobSerializer& serializer, int offset) {
+            std::copy_n(serializer.data(), data->desc().numDims() * sizeof(uint32_t), blob.data() + blobHdr.const_data_section_offset + offset);
+        };
  
-        IE_ASSERT(data->producerEdge() != nullptr);
-        IE_ASSERT(data->parentDataEdge() == nullptr);
+        const auto dimsOrder = data->desc().dimsOrder();
+        const auto storedPerm = dimsOrder.toPermutation();
  
-        IE_ASSERT(!data->attrs().has("ioIdx"));
-        data->attrs().set("ioIdx", numOutputs);
+        const auto shapeLocation = data->shapeLocation();
  
-        data->serializeIOInfo(outputInfoSerializer);
+        if (shapeLocation.dimsLocation == Location::Blob) {
+            BlobSerializer dimsSerializer;
+            const auto dims = data->desc().dims();
  
-        ++numOutputs;
+            for (const auto& d : storedPerm) {
+                dimsSerializer.append(checked_cast<uint32_t>(dims[d]));
+            }
+            serializeToBlob(dimsSerializer, shapeLocation.dimsOffset);
+        }
+
+        if (shapeLocation.stridesLocation == Location::Blob) {
+            BlobSerializer stridesSerializer;
+            const auto strides = data->strides();
+
+            for (const auto& d : storedPerm) {
+                stridesSerializer.append(checked_cast<uint32_t>(strides[d]));
+            }
+            serializeToBlob(stridesSerializer, shapeLocation.stridesOffset);
+        }
      }
+}
  
-    //
-    // Stages section
-    //
+void BackEnd::serialize(
+        const Model& model,
+        std::vector<char>& blob,
+        std::pair<char*, size_t>& blobHeader,
+        int& numActiveStages) {
+    VPU_PROFILE(serialize);
  
+    const auto& env = CompileEnv::get();
  
+    BlobSerializer inputInfoSerializer;
+    BlobSerializer outputInfoSerializer;
      BlobSerializer stagesSerializer;
+
+    const auto getExecStages = [&model]() {
+        StageVector execStages;
+        execStages.reserve(model->numStages());
+
+        for (const auto& stage : model->getStages()) {
+            if (stage->category() == StageCategory::Special) {
+                continue;
+            }
+
+            execStages.emplace_back(stage);
+        }
+
+        return execStages;
+    };
+
+    const auto getModelStagesStat = [&model]() {
+        ModelStagesStat modelStagesStat{false, false, false};
+
+        for (const auto& stage : model->getStages()) {
+            if (stage->category() == StageCategory::Special) {
+                continue;
+            }
+
+            if (stage->category() == StageCategory::HW) {
+                modelStagesStat.hasHwStage = true;
+            } else if (stage->category() == StageCategory::SHAVE) {
+                modelStagesStat.hasShaveStage = true;
+            } else if (stage->category() == StageCategory::DMA) {
+                modelStagesStat.hasDmaStage = true;
+            }
+        }
+
+        return modelStagesStat;
+    };
+
+    const auto createBlobHeader = [&env, &model, &inputInfoSerializer, &outputInfoSerializer, &stagesSerializer]
+            (int numInputs, int numOutputs, const StageVector& execStages, const ModelStagesStat& modelStagesStat) {
+        const auto batchSize = model->batchSize();
+        const auto usedMemory = model->attrs().get<UsedMemory>("usedMemory");
+
+        const auto hdrSize = alignVal<int>(sizeof(ElfN_Ehdr) + sizeof(mv_blob_header), 64);
+        const auto inputInfoSecSize = alignVal(inputInfoSerializer.size(), 64);
+        const auto outputInfoSecSize = alignVal(outputInfoSerializer.size(), 64);
+        const auto stagesSecSize = alignVal(stagesSerializer.size(), 64);
+        const auto constDataSecSize = alignVal(usedMemory.blob, 64);
+
+        mv_blob_header blobHdr = {};
+        blobHdr.magic_number = BLOB_MAGIC_NUMBER;
+        blobHdr.file_size = checked_cast<uint32_t>(hdrSize + inputInfoSecSize + outputInfoSecSize + stagesSecSize + constDataSecSize);
+        blobHdr.blob_ver_major = BLOB_VERSION_MAJOR;
+        blobHdr.blob_ver_minor = BLOB_VERSION_MINOR;
+        blobHdr.inputs_count = checked_cast<uint32_t>(numInputs);
+        blobHdr.outputs_count = checked_cast<uint32_t>(numOutputs);
+        blobHdr.stages_count = checked_cast<uint32_t>(execStages.size());
+        blobHdr.inputs_size = checked_cast<uint32_t>(usedMemory.input);
+        blobHdr.outputs_size = checked_cast<uint32_t>(usedMemory.output);
+        blobHdr.batch_size = checked_cast<uint32_t>(batchSize);
+        blobHdr.bss_mem_size = checked_cast<uint32_t>(usedMemory.BSS);
+        blobHdr.number_of_cmx_slices = checked_cast<uint32_t>(env.resources.numCMXSlices);
+        blobHdr.number_of_shaves = checked_cast<uint32_t>(env.resources.numSHAVEs);
+        blobHdr.has_hw_stage = checked_cast<uint32_t>(modelStagesStat.hasHwStage);
+        blobHdr.has_shave_stage = checked_cast<uint32_t>(modelStagesStat.hasShaveStage);
+        blobHdr.has_dma_stage = checked_cast<uint32_t>(modelStagesStat.hasDmaStage);
+        blobHdr.input_info_section_offset = checked_cast<uint32_t>(hdrSize);
+        blobHdr.output_info_section_offset = checked_cast<uint32_t>(blobHdr.input_info_section_offset + inputInfoSecSize);
+        blobHdr.stage_section_offset = checked_cast<uint32_t>(blobHdr.output_info_section_offset + outputInfoSecSize);
+        blobHdr.const_data_section_offset = checked_cast<uint32_t>(blobHdr.stage_section_offset + stagesSecSize);
+
+        return blobHdr;
+    };
+
+    const int numInputs = serializeIOInfoSection(model, DataUsage::Input, inputInfoSerializer);
+    const int numOutputs = serializeIOInfoSection(model, DataUsage::Output, outputInfoSerializer);
+
+    const auto& execStages = getExecStages();
+    numActiveStages = checked_cast<int>(execStages.size());
+
      for (const auto& stage : execStages) {
          stage->serialize(stagesSerializer);
      }
  
-    //
-    // Elf header
-    //
+    const auto modelStagesStat = getModelStagesStat();
  
-    ElfN_Ehdr elfHdr = {};
-    elfHdr.e_ident[0] = 0x7f;
-    elfHdr.e_ident[1] = 'e';
-    elfHdr.e_ident[2] = 'l';
-    elfHdr.e_ident[3] = 'f';
-    for (int i = 4; i < 16; i++) {
-        elfHdr.e_ident[i] = 0;
-    }
-    elfHdr.e_type = 1;
-    elfHdr.e_machine = 2;
-    elfHdr.e_version = 2;
-    elfHdr.e_entry = 0;
-    elfHdr.e_phoff = 0;
-    elfHdr.e_shoff = 0;
-    elfHdr.e_ehsize = 8 * sizeof(elfHdr);
-
-    //
-    // Blob header
-    //
-
-    auto hdrSize = alignVal<int>(sizeof(ElfN_Ehdr) + sizeof(mv_blob_header), 64);
-    auto inputInfoSecSize = alignVal(inputInfoSerializer.size(), 64);
-    auto outputInfoSecSize = alignVal(outputInfoSerializer.size(), 64);
-    auto stagesSecSize = alignVal(stagesSerializer.size(), 64);
-    auto constDataSecSize = alignVal(usedMemory.blob, 64);
-
-    mv_blob_header blobHdr = {};
-    blobHdr.magic_number = BLOB_MAGIC_NUMBER;
-    blobHdr.file_size = checked_cast<uint32_t>(hdrSize + inputInfoSecSize + outputInfoSecSize + stagesSecSize + constDataSecSize);
-    blobHdr.blob_ver_major = BLOB_VERSION_MAJOR;
-    blobHdr.blob_ver_minor = BLOB_VERSION_MINOR;
-    blobHdr.inputs_count = checked_cast<uint32_t>(numInputs);
-    blobHdr.outputs_count = checked_cast<uint32_t>(numOutputs);
-    blobHdr.stages_count = checked_cast<uint32_t>(execStages.size());
-    blobHdr.inputs_size = checked_cast<uint32_t>(usedMemory.input);
-    blobHdr.outputs_size = checked_cast<uint32_t>(usedMemory.output);
-    blobHdr.batch_size = checked_cast<uint32_t>(batchSize);
-    blobHdr.bss_mem_size = checked_cast<uint32_t>(usedMemory.BSS);
-    blobHdr.number_of_cmx_slices = checked_cast<uint32_t>(env.resources.numCMXSlices);
-    blobHdr.number_of_shaves = checked_cast<uint32_t>(env.resources.numSHAVEs);
-    blobHdr.has_hw_stage = checked_cast<uint32_t>(hasHwStage);
-    blobHdr.has_shave_stage = checked_cast<uint32_t>(hasShaveStage);
-    blobHdr.has_dma_stage = checked_cast<uint32_t>(hasDmaStage);
-    blobHdr.input_info_section_offset = checked_cast<uint32_t>(hdrSize);
-    blobHdr.output_info_section_offset = checked_cast<uint32_t>(blobHdr.input_info_section_offset + inputInfoSecSize);
-    blobHdr.stage_section_offset = checked_cast<uint32_t>(blobHdr.output_info_section_offset + outputInfoSecSize);
-    blobHdr.const_data_section_offset = checked_cast<uint32_t>(blobHdr.stage_section_offset + stagesSecSize);
-
-    //
-    // Generate fathom blob
-    //
+    const auto elfHdr = createElfHeader();
+    const auto blobHdr = createBlobHeader(numInputs, numOutputs, execStages, modelStagesStat);
  
      blob.clear();
      blob.resize(blobHdr.file_size, 0);
@@ -198,25 +271,8 @@ void BackEnd::serialize(
      std::copy_n(outputInfoSerializer.data(), outputInfoSerializer.size(), blob.data() + blobHdr.output_info_section_offset);
      std::copy_n(stagesSerializer.data(), stagesSerializer.size(), blob.data() + blobHdr.stage_section_offset);
  
-    for (const auto& data : model->datas()) {
-        if (data->usage() != DataUsage::Const) {
-            continue;
-        }
-
-        IE_ASSERT(data->producerEdge() == nullptr);
-        IE_ASSERT(data->parentDataEdge() == nullptr);
-        IE_ASSERT(data->numConsumers() != 0);
-        IE_ASSERT(data->location() == DataLocation::Blob);
-
-        auto content = data->content();
-        IE_ASSERT(content != nullptr);
-
-        std::copy_n(content->get<uint8_t>(), data->totalByteSize(), blob.data() + blobHdr.const_data_section_offset + data->memoryOffset());
-    }
-
-    //
-    // Blob header spec begin containing elf header and blobHeader
-    //
+    serializeConstData(model, blobHdr, blob);
+    serializeConstShapes(model, blobHdr, blob);
  
      blobHeader.first = blob.data();
      blobHeader.second = sizeof(ElfN_Ehdr) + sizeof(mv_blob_header);
diff --git a/inference-engine/src/vpu/graph_transformer/src/blob_reader.cpp b/inference-engine/src/vpu/graph_transformer/src/blob_reader.cpp

index ab2546c..80b34c8 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/blob_reader.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/blob_reader.cpp
@@ -65,7 +65,7 @@ void BlobReader::parse(const std::vector<char>& blob) {
          // Truncate zeros
          inputName = inputName.c_str();
  
-        auto dataType = static_cast<DataType>(readFromBlob<uint32_t>(blob, inputInfoSecOffset));
+        auto dataType = readFromBlob<DataType>(blob, inputInfoSecOffset);
          auto orderCode = readFromBlob<uint32_t>(blob, inputInfoSecOffset);
  
          auto numDims = readFromBlob<uint32_t>(blob, inputInfoSecOffset);
@@ -74,14 +74,21 @@ void BlobReader::parse(const std::vector<char>& blob) {
          auto perm = dimsOrder.toPermutation();
          IE_ASSERT(perm.size() == numDims);
  
+        auto dimsLocation = readFromBlob<Location>(blob, inputInfoSecOffset);
+        VPU_THROW_UNLESS(dimsLocation == Location::Blob,
+            "BlobReader error while parsing {} input data: only Blob location for input shape is supported, but {} was given",
+            inputName, dimsLocation);
+        auto dimsOffset = _blobHeader.const_data_section_offset + readFromBlob<uint32_t>(blob, inputInfoSecOffset);
+
+        // Skip strides' location and offset
+        inputInfoSecOffset += 2 * sizeof(uint32_t);
+
          DimValues vpuDims;
+
          for (int i = 0; i < perm.size(); ++i) {
-            vpuDims.set(perm[i], readFromBlob<uint32_t>(blob, inputInfoSecOffset));
+            vpuDims.set(perm[i], readFromBlob<uint32_t>(blob, dimsOffset));
          }
  
-        // Skip strides
-        inputInfoSecOffset += perm.size() * sizeof(uint32_t);
-
          ie::TensorDesc ieDesc = DataDesc(dataType, dimsOrder, vpuDims).toTensorDesc();
          ie::Data inputData(inputName, ieDesc);
  
@@ -108,7 +115,7 @@ void BlobReader::parse(const std::vector<char>& blob) {
          // Truncate zeros
          outputName = outputName.c_str();
  
-        auto dataType = static_cast<DataType>(readFromBlob<uint32_t>(blob, outputInfoSecOffset));
+        auto dataType = readFromBlob<DataType>(blob, outputInfoSecOffset);
          auto orderCode = readFromBlob<uint32_t>(blob, outputInfoSecOffset);
  
          auto numDims = readFromBlob<uint32_t>(blob, outputInfoSecOffset);
@@ -117,14 +124,21 @@ void BlobReader::parse(const std::vector<char>& blob) {
          auto perm = dimsOrder.toPermutation();
          IE_ASSERT(perm.size() == numDims);
  
+        auto dimsLocation = readFromBlob<Location>(blob, outputInfoSecOffset);
+        VPU_THROW_UNLESS(dimsLocation == Location::Blob,
+            "BlobReader error while parsing {} output data: only Blob location for output shape is supported, but {} was given",
+            outputName, dimsLocation);
+        auto dimsOffset = _blobHeader.const_data_section_offset + readFromBlob<uint32_t>(blob, outputInfoSecOffset);
+
+        // Skip strides' location and offset
+        outputInfoSecOffset += 2 * sizeof(uint32_t);
+
          DimValues vpuDims;
+
          for (int i = 0; i < perm.size(); ++i) {
-            vpuDims.set(perm[i], readFromBlob<uint32_t>(blob, outputInfoSecOffset));
+            vpuDims.set(perm[i], readFromBlob<uint32_t>(blob, dimsOffset));
          }
  
-        // Skip strides
-        outputInfoSecOffset += perm.size() * sizeof(uint32_t);
-
          ie::TensorDesc ieDesc = DataDesc(dataType, dimsOrder, vpuDims).toTensorDesc();
          ie::Data outputData(outputName, ieDesc);
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/detect_network_batch.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/detect_network_batch.cpp

index 4261bf4..c00fc96 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/detect_network_batch.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/detect_network_batch.cpp
@@ -13,7 +13,6 @@
  #include <details/caseless.hpp>
  #include <details/ie_cnn_network_iterator.hpp>
  #include <cpp/ie_cnn_network.h>
-#include <cnn_network_ngraph_impl.hpp>
  #include <graph_tools.hpp>
  
  #include <ngraph/function.hpp>
@@ -39,7 +38,7 @@ void FrontEnd::detectNetworkBatch(
      auto checkForDeprecatedCnn = [&network, &env]() {
          return !network.getFunction()
                 && !env.config.forceDeprecatedCnnConversion
-               && dynamic_cast<const ie::details::CNNNetworkNGraphImpl*>(&network);
+               && !dynamic_cast<const ie::details::CNNNetworkImpl*>(&network);
      };
      VPU_THROW_UNLESS(!checkForDeprecatedCnn(), "Unexpected CNNNetwork format: it was converted to deprecated format prior plugin's call");
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp

index 8d2c822..03ede02 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp
@@ -5,6 +5,7 @@
  #include "vpu/frontend/frontend.hpp"
  #include "vpu/utils/profiling.hpp"
  #include "vpu/compile_env.hpp"
+#include "vpu/model/data_contents/ie_blob_content.hpp"
  
  #include "net_pass.h"
  
@@ -98,6 +99,10 @@ FrontEnd::FrontEnd(StageBuilder::Ptr stageBuilder)
          {"OneHot",                                             LAYER_PARSER(parseOneHot)},
          {"ExperimentalDetectronPriorGridGenerator",            LAYER_PARSER(parseExpPriorGridGenerator)},
          {"ExperimentalDetectronGenerateProposalsSingleImage",  LAYER_PARSER(parseExpGenerateProposals)},
+        {"ScatterUpdate",                                      LAYER_PARSER(parseScatterUpdate)},
+        {"ExperimentalDetectronTopKROIs",                      LAYER_PARSER(parseExpTopKROIs)},
+        {"StaticShapeNonZero",                                 LAYER_PARSER(parseNonZero)},
+        {"ROIAlign",                                           LAYER_PARSER(parseROIAlign)},
      }} {}
  
  ModelPtr FrontEnd::buildInitialModel(ie::ICNNNetwork& network) {
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp

index f419b73..16c14ac 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/parse_data.cpp
@@ -4,15 +4,16 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/ie_helpers.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <memory>
  #include <algorithm>
  #include <set>
  #include <map>
  #include <string>
  
-#include <vpu/compile_env.hpp>
-#include <vpu/utils/ie_helpers.hpp>
-
  namespace vpu {
  
  void FrontEnd::parseInputAndOutputData(const Model& model) {
@@ -113,7 +114,7 @@ void FrontEnd::parseInputAndOutputData(const Model& model) {
          const auto vpuData = model->addConstData(
              ieData->getName(),
              descriptor,
-            ieBlobContent(ieBlob));
+            ieBlobContent(ieBlob, descriptor.type()));
  
          // User might ask to return the output from Const layer.
          if (const auto vpuOutData = getVpuData(ieData)) {
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp

index 29fc10b..72185e3 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/pre_process.cpp
@@ -4,100 +4,22 @@
  
  #include <vpu/frontend/frontend.hpp>
  
-#include <vector>
-#include <memory>
-#include <string>
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/utils/ie_helpers.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/model/data_contents/mean_contents.hpp>
  
  #include <details/caseless.hpp>
  #include <cpp/ie_cnn_network.h>
  #include <precision_utils.h>
  #include <ie_parallel.hpp>
  
-#include <vpu/middleend/sw/utility.hpp>
-#include <vpu/utils/ie_helpers.hpp>
-#include <vpu/compile_env.hpp>
+#include <vector>
+#include <memory>
+#include <string>
  
  namespace vpu {
  
-namespace {
-
-class MeanImageContent final : public CalculatedDataContent {
-public:
-    explicit MeanImageContent(const ie::PreProcessInfo& info) : _info(info) {
-    }
-
-protected:
-    size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
-        size_t countElem = checked_cast<size_t>(desc().dim(Dim::W) * desc().dim(Dim::H) * desc().dim(Dim::C));
-        if (desc().dimsOrder() == DimsOrder::NHWC || desc().dimsOrder() == DimsOrder::HWC) {
-            countElem *= 2;
-        }
-
-        return countElem * sizeof(fp16_t);
-    }
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
-        VPU_PROFILE(MeanImageContent);
-
-        const size_t numOfChannel = _info.getNumberOfChannels();
-
-        const size_t imagePixels = checked_cast<size_t>(desc().dim(Dim::W) * desc().dim(Dim::H));
-        const size_t countElem = checked_cast<size_t>(desc().dim(Dim::W) * desc().dim(Dim::H) * desc().dim(Dim::C));
-
-        const auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        auto dstPtr2 = dstPtr;
-        if (desc().dimsOrder() == DimsOrder::NHWC || desc().dimsOrder() == DimsOrder::HWC) {
-            dstPtr2 += countElem;
-        }
-
-        ie::parallel_for(numOfChannel, [=](size_t i) {
-            const auto meanDataBlob = _info[i]->meanData;
-
-            ie::PrecisionUtils::f32tof16Arrays(
-                dstPtr2 + i * imagePixels,
-                meanDataBlob->buffer().as<const float*>(),
-                imagePixels,
-                -1.0f);
-        });
-
-        if (desc().dimsOrder() == DimsOrder::NHWC || desc().dimsOrder() == DimsOrder::HWC) {
-            kchw_to_hwck(dstPtr2, dstPtr, desc());
-        }
-    }
-
-private:
-    ie::PreProcessInfo _info;
-};
-
-class MeanValueContent final : public CalculatedDataContent {
-public:
-    explicit MeanValueContent(const ie::PreProcessInfo& info) : _info(info) {
-    }
-
-protected:
-    size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
-        return _info.getNumberOfChannels() * sizeof(fp16_t);
-    }
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
-        VPU_PROFILE(MeanValueContent);
-
-        IE_ASSERT(checked_cast<size_t>(desc().totalDimSize()) == _info.getNumberOfChannels());
-
-        const auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        ie::parallel_for(_info.getNumberOfChannels(), [dstPtr, this](size_t i) {
-            dstPtr[i] = ie::PrecisionUtils::f32tof16(-_info[i]->meanValue);
-        });
-    }
-
-private:
-    ie::PreProcessInfo _info;
-};
-
-}  // namespace
-
  void FrontEnd::addPreProcessStages(const Model& model) {
      VPU_PROFILE(addPreProcessStages);
  
@@ -131,7 +53,7 @@ void FrontEnd::addPreProcessStages(const Model& model) {
              const auto meanImage = model->addConstData(
                  input->name() + "@mean-image",
                  input->desc(),
-                std::make_shared<MeanImageContent>(preProcess));
+                std::make_shared<MeanImageContent>(preProcess, input->desc()));
  
              const auto newInput = model->duplicateData(
                  input,
diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/remove_const_layers.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/remove_const_layers.cpp

index 5551c70..15b95fa 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/remove_const_layers.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/remove_const_layers.cpp
@@ -7,7 +7,6 @@
  #include "graph_transformer.h"
  
  #include "cnn_network_impl.hpp"
-#include "cnn_network_ngraph_impl.hpp"
  
  namespace vpu {
  
@@ -19,16 +18,7 @@ void FrontEnd::removeConstLayers(ie::ICNNNetwork& network) {
      env.log->trace("Remove const layers");
      VPU_LOGGER_SECTION(env.log);
  
-    ie::ICNNNetwork* cnnNetwork = &network;
-    if (auto nGraphImpl = dynamic_cast<ie::details::CNNNetworkNGraphImpl*>(&network)) {
-        // NGraph implementation cannot be casted to CNNNetworkImpl directly
-        cnnNetwork = nGraphImpl->getCNNNetwork().get();
-    }
-
-    // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
-    if (auto cnnNetworkImpl = dynamic_cast<ie::details::CNNNetworkImpl*>(cnnNetwork)) {
-        ie::ConstTransformer(cnnNetworkImpl).fullTrim();
-    }
+    ie::ConstTransformer(&network).fullTrim();
  }
  
  }  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp

index d30fb3b..a20de0c 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/allocator/allocator.cpp
@@ -65,7 +65,7 @@ void updateChildDataAllocation(const Data& data, int offsetLimitation) {
          auto parent = edge->parent();
          auto child = edge->child();
  
-        auto memoryOffset = parent->memoryOffset();
+        auto memoryOffset = parent->dataLocation().offset;
  
          if (edge->mode() == SharedDataMode::ROI) {
              auto parentStrides = parent->strides();
@@ -86,7 +86,7 @@ void updateChildDataAllocation(const Data& data, int offsetLimitation) {
              IE_ASSERT(false) << "Unsupported enum value";
          }
  
-        child->setAllocationInfo(parent->location(), memoryOffset);
+        child->setDataAllocationInfo({parent->dataLocation().location, memoryOffset});
  
          updateChildDataAllocation(child, offsetLimitation);
      }
@@ -127,7 +127,7 @@ bool Allocator::allocateData(const Data& data) {
  
              auto finalByteSize = data->totalByteSize() * _modelBatchSize;
  
-            data->setIOInfo(DataLocation::Input, alignVal(_inputMemOffset, DATA_ALIGNMENT));
+            data->setIOInfo(Location::Input, alignVal(_inputMemOffset, DATA_ALIGNMENT));
              _inputMemOffset = alignVal(_inputMemOffset, DATA_ALIGNMENT) + finalByteSize;
  
              updateChildDataAllocation(data, DDR_MAX_SIZE);
@@ -153,7 +153,7 @@ bool Allocator::allocateData(const Data& data) {
                  finalByteSize = data->totalByteSize() * _modelBatchSize;
              }
  
-            data->setIOInfo(DataLocation::Output, alignVal(_outputMemOffset, DATA_ALIGNMENT));
+            data->setIOInfo(Location::Output, alignVal(_outputMemOffset, DATA_ALIGNMENT));
              _outputMemOffset = alignVal(_outputMemOffset, DATA_ALIGNMENT) + finalByteSize;
  
              updateChildDataAllocation(data, DDR_MAX_SIZE);
@@ -176,7 +176,7 @@ bool Allocator::allocateData(const Data& data) {
  
              auto finalByteSize = calcAllocationSize(data);
  
-            data->setAllocationInfo(DataLocation::Blob, _blobMemOffset);
+            data->setDataAllocationInfo({Location::Blob, _blobMemOffset});
              _blobMemOffset += finalByteSize;
  
              updateChildDataAllocation(data, DDR_MAX_SIZE);
@@ -257,9 +257,9 @@ bool Allocator::allocateData(const Data& data) {
      // Update data allocation info
      //
  
-    data->setAllocationInfo(chunk->memType == MemoryType::CMX ? DataLocation::CMX : DataLocation::BSS, chunk->pointer);
+    data->setDataAllocationInfo({chunk->memType == MemoryType::CMX ? Location::CMX : Location::BSS, chunk->pointer});
  
-    auto offsetLimitation = (data->location() == DataLocation::CMX) ? _maxCmxSize : DDR_MAX_SIZE;
+    auto offsetLimitation = (data->dataLocation().location == Location::CMX) ? _maxCmxSize : DDR_MAX_SIZE;
      updateChildDataAllocation(data, offsetLimitation);
  
      _memChunksPerData.emplace(data, chunk);
@@ -268,6 +268,23 @@ bool Allocator::allocateData(const Data& data) {
      return chunk->memType == memoryType;
  }
  
+ShapeLocation Allocator::allocateConstShape(Data& data) {
+    ShapeLocation shapeLocation;
+
+    shapeLocation.dimsLocation = Location::Blob;
+    shapeLocation.stridesLocation = Location::Blob;
+
+    const auto dimsByteSize = data->desc().dimsByteSize();
+
+    shapeLocation.dimsOffset = _blobMemOffset;
+    _blobMemOffset += dimsByteSize;
+
+    shapeLocation.stridesOffset = _blobMemOffset;
+    _blobMemOffset += dimsByteSize;
+
+    return shapeLocation;
+}
+
  void Allocator::freeData(const Data& data, DeallocationMode mode) {
      //
      // Release the chunk
@@ -313,7 +330,7 @@ void Allocator::freeData(const Data& data, DeallocationMode mode) {
  
              _memChunksPerData[data] = ddrChunk;
  
-            data->setAllocationInfo(DataLocation::BSS, ddrChunk->pointer);
+            data->setDataAllocationInfo({Location::BSS, ddrChunk->pointer});
              updateChildDataAllocation(data, DDR_MAX_SIZE);
  
              break;
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/hw/conv_tiling/hw_stage_tiler.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/hw/conv_tiling/hw_stage_tiler.cpp

index d6c5e9d..40831e6 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/hw/conv_tiling/hw_stage_tiler.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/hw/conv_tiling/hw_stage_tiler.cpp
@@ -4,6 +4,16 @@
  
  #include <vpu/middleend/hw/conv_tiling/hw_stage_tiler.hpp>
  
+#include <vpu/stages/stub_stage.hpp>
+#include <vpu/stages/mx_stage.hpp>
+#include <vpu/middleend/hw/tiling.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+#include <vpu/utils/attributes_map.hpp>
+#include <vpu/model/data_contents/hw_weights_content.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+#include <vpu/model/data_contents/replicated_data_content.hpp>
+#include <vpu/model/data_contents/scaled_content.hpp>
+
  #include <precision_utils.h>
  #include <memory>
  #include <list>
@@ -13,12 +23,6 @@
  #include <unordered_map>
  #include <set>
  
-#include <vpu/stages/stub_stage.hpp>
-#include <vpu/stages/mx_stage.hpp>
-#include <vpu/middleend/hw/tiling.hpp>
-#include <vpu/middleend/hw/utility.hpp>
-#include <vpu/utils/attributes_map.hpp>
-
  namespace vpu {
  
  namespace {
@@ -149,7 +153,7 @@ Data HWConvStageTiler::createScales(const HwConvTilingPtr& tiling, const HWConvS
                  hwScales = _model->addConstData(
                      _original->name() + "@scales",
                      DataDesc({maxExtendedOutputDimC}),
-                    replicateContent(stageOptions.reluScale, maxExtendedOutputDimC));
+                    replicateContent(stageOptions.reluScale, maxExtendedOutputDimC, DataDesc{maxExtendedOutputDimC}));
              } else {
                  hwScales = _model->addFakeData();
              }
@@ -363,6 +367,7 @@ Data HWConvStageTiler::createConstTileWeights(const HwConvChannelTilePtr& channe
          const auto content = std::make_shared<HwWeightsContent>(
              io.origWeights->content(),
              io.origWeights->desc(),
+            descriptor,
              channelTile->numInputChannels,
              channelTile->channelStartIndex);
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp

index aeb0663..2824103 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/hw/utility.cpp
@@ -79,95 +79,6 @@ void printTo(DotLabel& lbl, const HwPaddingInfo& hwPad) {
      }
  }
  
-//
-// HwWeightsContent
-//
-
-HwWeightsContent::HwWeightsContent(const DataContent::Ptr& origContent,
-        const DataDesc& origWeightsDesc,
-        int numInputChannels,
-        int channelStartIndex) :
-        CalculatedDataContent({origContent}),
-        _origWeightsDesc(origWeightsDesc),
-        _numInputChannels(numInputChannels),
-        _channelStartIndex(channelStartIndex) {
-}
-
-void HwWeightsContent::fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
-    VPU_PROFILE(HwWeightsContent);
-
-    IE_ASSERT(desc().type() == DataType::FP16);
-    IE_ASSERT(baseContents.size() == 1);
-
-    auto KX = _origWeightsDesc.dim(Dim::W);
-    auto KY = _origWeightsDesc.dim(Dim::H);
-    auto IC = _origWeightsDesc.dim(Dim::C);
-    auto OC = _origWeightsDesc.dim(Dim::N);
-    auto origTotalSize = _origWeightsDesc.totalDimSize();
-
-    auto HW_OC_inner = desc().dim(Dim::W);
-    auto HW_OC_outer = desc().dim(Dim::N);
-    IE_ASSERT(HW_OC_outer * HW_OC_inner >= OC);
-
-    auto HW_K = desc().dim(Dim::H);
-    IE_ASSERT(HW_K == KX * KY);
-
-    IE_ASSERT(_channelStartIndex < IC);
-    auto HW_IC = desc().dim(Dim::C);
-    auto HW_IC_real = std::min(_numInputChannels, IC - _channelStartIndex);
-
-    auto srcData = baseContents[0]->get<fp16_t>();
-    IE_ASSERT(srcData != nullptr);
-
-    auto dstData = static_cast<fp16_t*>(tempBuf);
-
-    IE_ASSERT((_channelStartIndex + HW_IC_real) * HW_K + (OC - 1) * HW_K * IC - 1 < origTotalSize);
-    IE_ASSERT((OC - 1) % HW_OC_inner +
-              (HW_K - 1) * HW_OC_inner +
-              (HW_IC_real - 1) * HW_OC_inner * HW_K +
-              ((OC - 1) / 8) * HW_OC_inner * HW_K * HW_IC < desc().totalDimSize());
-
-    if (KX == 1 && KY == 1) {
-        ie::parallel_for(OC, [=](int oc) {
-            auto oc_inner = oc % HW_OC_inner;
-            auto oc_outer = oc / HW_OC_inner;
-            for (int ic = 0; ic < HW_IC_real; ++ic) {
-                auto srcInd =
-                        (_channelStartIndex + ic) +
-                        oc * IC;
-                auto dstInd =
-                        oc_inner +
-                        ic * HW_OC_inner * HW_K +
-                        oc_outer * HW_OC_inner * HW_K * HW_IC;
-
-                dstData[dstInd] = srcData[srcInd];
-            }
-        });
-    } else {
-        ie::parallel_for(OC, [=](int oc) {
-            auto oc_inner = oc % HW_OC_inner;
-            auto oc_outer = oc / HW_OC_inner;
-            for (int ic = 0; ic < HW_IC_real; ++ic) {
-                for (int ky = 0; ky < KY; ++ky) {
-                    for (int kx = 0; kx < KX; ++kx) {
-                        auto srcInd =
-                                (kx + ky * KX) +
-                                (_channelStartIndex + ic) * HW_K +
-                                oc * HW_K * IC;
-                        auto dstInd =
-                                oc_inner +
-                                (ky * KX + kx) * HW_OC_inner +
-                                ic * HW_OC_inner * HW_K +
-                                oc_outer * HW_OC_inner * HW_K * HW_IC;
-
-                        dstData[dstInd] = srcData[srcInd];
-                    }
-                }
-            }
-        });
-    }
-}
-
  int calculateHwBufferSize(const DimValues& dims, const DimsOrder& order) {
      const auto desc = DataDesc{DataType::FP16, order.empty() ? DimsOrder::fromNumDims(dims.size()) : order, dims};
      IE_ASSERT(desc.numDims() > 2 || desc.dimsOrder() == DimsOrder::NC);
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_batch.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_batch.cpp

index 0250216..8fb1480 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_batch.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_batch.cpp
@@ -4,6 +4,7 @@
  
  #include "vpu/stages/iteration_rule.hpp"
  #include "vpu/middleend/pass_manager.hpp"
+#include "vpu/model/data_contents/replicated_data_content.hpp"
  
  #include <utility>
  #include <string>
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp

index cfbcb9f..7381360 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/adjust_data_location.cpp
@@ -358,9 +358,9 @@ void PassImpl::copyHwMisalignedInput(const Model& model) {
  
          auto inputEdge = stage->inputEdge(0);
          auto input = inputEdge->input();
-        IE_ASSERT(input->location() != DataLocation::None);
+        IE_ASSERT(input->dataLocation().location != Location::None);
  
-        if (input->memoryOffset() % 16 != 0) {
+        if (input->dataLocation().offset % 16 != 0) {
              env.log->trace("HW Stage [%s] input [%s]", stage->name(), input->name());
  
              auto newInput = model->duplicateData(
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp

index 6a099a6..b664eb6 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/allocate_resources.cpp
@@ -184,6 +184,15 @@ AllocationResult runAllocator(const Model& model, bool onlyCheckCMX) {
          }
      }
  
+    //
+    // Allocate shape for all datas
+    //
+
+    for (auto data : model->datas()) {
+        const auto shapeLocation = allocator.allocateConstShape(data);
+        data->setShapeAllocationInfo(shapeLocation);
+    }
+
      return AllocationResult();
  }
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp

index f65c559..d465ce2 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/eliminate_const_concat.cpp
@@ -4,12 +4,13 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
-#include <vector>
-#include <memory>
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
  
  #include <blob_factory.hpp>
  
-#include <vpu/middleend/sw/utility.hpp>
+#include <vector>
+#include <memory>
  
  namespace vpu {
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp

index 45aa93e..afbf7ed 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/final_check.cpp
@@ -41,7 +41,7 @@ void PassImpl::run(const Model& model) {
          });
  
          if (memoryType == MemoryType::CMX) {
-            IE_ASSERT(topParent->location() == DataLocation::CMX);
+            IE_ASSERT(topParent->dataLocation().location == Location::CMX);
          }
  
          //
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp

index 90f009a..4066905 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_extra_split.cpp
@@ -4,9 +4,6 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
-#include <precision_utils.h>
-#include <ie_parallel.hpp>
-
  #include <vpu/compile_env.hpp>
  #include <vpu/stages/stub_stage.hpp>
  #include <vpu/stages/mx_stage.hpp>
@@ -14,6 +11,10 @@
  #include <vpu/middleend/hw/utility.hpp>
  #include <vpu/middleend/hw/conv_tiling/hw_convolution_tiler.hpp>
  #include <vpu/middleend/hw/conv_tiling/hw_stage_tiler.hpp>
+#include <vpu/model/data_contents/hw_const_data_content.hpp>
+
+#include <precision_utils.h>
+#include <ie_parallel.hpp>
  
  #include <utility>
  #include <memory>
@@ -29,35 +30,6 @@ namespace vpu {
  
  namespace {
  
-struct Slice {
-    int start;
-    size_t size;
-
-    Slice(int start, size_t size) :
-        start(start),
-        size(size) {}
-};
-
-struct DataSlice {
-    Data data;
-    Slice slice;
-
-    DataSlice(Data data, Slice slice) :
-        data(std::move(data)),
-        slice(slice) {}
-};
-
-using DataSlices = std::vector<DataSlice>;
-
-struct ConvTileSlice {
-    HwConvTileInfo tile;
-    Slice slice;
-
-    ConvTileSlice(HwConvTileInfo tile, Slice slice) :
-        tile(tile),
-        slice(slice) {}
-};
-
  class PassImpl final : public Pass {
  public:
      explicit PassImpl(StageBuilder::Ptr stageBuilder) : _stageBuilder(std::move(stageBuilder)) {}
@@ -133,7 +105,7 @@ private:
              if (infoData1 != infoData2)
                  return infoData1 < infoData2;
  
-            const auto size = data1->content()->desc().totalDimSize();
+            const auto size = data1->content()->byteSize() / sizeof(fp16_t);
  
              const auto content1 = data1->content()->get<fp16_t>();
              const auto content2 = data2->content()->get<fp16_t>();
@@ -144,98 +116,6 @@ private:
      std::map<Data, DataSlices, LexicographicalCompareByData> _splitConstData;
  };
  
-class HwConstData final : public CalculatedDataContent {
-public:
-    HwConstData(
-        const DataContent::Ptr& origContent,
-        const DataDesc& origDesc,
-        const std::map<Dim, Slice> dimSlices) :
-            CalculatedDataContent({origContent}),
-            _origDesc(origDesc),
-            _dimSlices(dimSlices) {}
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* outBuf) const override {
-        VPU_PROFILE(HwConstData);
-
-        VPU_THROW_UNLESS(
-            desc().type() == DataType::FP16,
-            "Constant data has %v data type while only %v is supported",
-            desc().type(), DataType::FP16);
-
-        VPU_THROW_UNLESS(baseContents.size() == 1,
-            "Missing source buffer for constant data");
-
-        const auto srcData = baseContents[0]->get<fp16_t>();
-        auto dstData = static_cast<fp16_t*>(outBuf);
-
-        VPU_THROW_UNLESS(srcData != nullptr,
-            "Source buffer for constant data has null address");
-
-        auto getDimSlice = [this](const Dim dim) {
-            auto it = _dimSlices.find(dim);
-            if (it != _dimSlices.end()) {
-                return it->second;
-            }
-
-            const int startInd = 0;
-            const size_t size = _origDesc.dim(dim);
-
-            return Slice(startInd, size);
-        };
-
-        if (_origDesc.numDims() == 4) {
-            Slice slice = getDimSlice(Dim::N);
-
-            int startOC = slice.start;
-            size_t numOC = slice.size;
-
-            const auto IC = _origDesc.dim(Dim::C);
-            const auto K = _origDesc.dim(Dim::H);
-            const auto V = _origDesc.dim(Dim::W);
-
-            const auto kernelStride     = V;
-            const auto inChannelStride  = K * kernelStride;
-            const auto outerStride      = IC * inChannelStride;
-
-            ie::parallel_for(numOC, [=](int oc) {
-                const auto ocSlice = oc;
-                oc += startOC;
-
-                const auto ocInner = oc % V;
-                const auto ocOuter = oc / V;
-                const auto ocSliceInner = ocSlice % V;
-                const auto ocSliceOuter = ocSlice / V;
-
-                const auto ocSrc = ocInner + ocOuter * outerStride;
-                const auto ocDst = ocSliceInner + ocSliceOuter * outerStride;
-
-                for (int ic = 0; ic < IC; ++ic)
-                    for (int k = 0; k < K; ++k) {
-                        const auto srcInd = ocSrc +
-                                            k * kernelStride +
-                                            ic * inChannelStride;
-                        const auto dstInd = ocDst +
-                                            k * kernelStride +
-                                            ic * inChannelStride;
-
-                        dstData[dstInd] = srcData[srcInd];
-                    }
-            });
-        } else if (_origDesc.numDims() == 1) {
-            Slice slice = getDimSlice(Dim::C);
-
-            std::copy(srcData + slice.start, srcData + slice.start + slice.size, dstData);
-        } else {
-            THROW_IE_EXCEPTION << "Invalid number of dimensions " << _origDesc.numDims();
-        }
-    }
-
-private:
-    DataDesc _origDesc;
-    std::map<Dim, Slice> _dimSlices;
-};
-
  void PassImpl::run(const Model& model) {
      VPU_PROFILE(hwExtraSplit);
  
@@ -444,6 +324,7 @@ Data PassImpl::splitWeights(
      const auto content = std::make_shared<HwConstData>(
          weights->content(),
          weights->desc(),
+        weightsDesc,
          dimSlices);
  
      weightsDesc.setDim(Dim::N, alignVal(numChannels, 8) / vectorSize);
@@ -474,6 +355,7 @@ Data PassImpl::splitBiases(
      const auto biasesContent = std::make_shared<HwConstData>(
          biases->content(),
          biases->desc(),
+        newBiasesDesc,
          dimSlices);
      const auto newBiases = model->duplicateData(biases, postfix, newBiasesDesc, biasesContent);
  
@@ -502,6 +384,7 @@ Data PassImpl::splitScales(
      const auto scalesContent = std::make_shared<HwConstData>(
          scales->content(),
          scales->desc(),
+        newScalesDesc,
          dimSlices);
      const auto newScales = model->duplicateData(scales, postfix, newScalesDesc, scalesContent);
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_fc_tiling.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_fc_tiling.cpp

index fd1a7e0..c343b46 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_fc_tiling.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/hw_fc_tiling.cpp
@@ -4,8 +4,17 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
-#include <cmath>
+#include <vpu/compile_env.hpp>
+#include <vpu/stages/stub_stage.hpp>
+#include <vpu/stages/mx_stage.hpp>
+#include <vpu/middleend/hw/tiling.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+#include <vpu/model/data_contents/hw_weights_content.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <precision_utils.h>
  
+#include <cmath>
  #include <tuple>
  #include <vector>
  #include <limits>
@@ -17,14 +26,6 @@
  #include <set>
  #include <array>
  
-#include <precision_utils.h>
-
-#include <vpu/compile_env.hpp>
-#include <vpu/stages/stub_stage.hpp>
-#include <vpu/stages/mx_stage.hpp>
-#include <vpu/middleend/hw/tiling.hpp>
-#include <vpu/middleend/hw/utility.hpp>
-
  namespace vpu {
  
  namespace {
@@ -190,6 +191,7 @@ Data createHWWeights(const Model& model, const Stage& original, int hwInputDimC,
  
          const auto& content = std::make_shared<HwWeightsContent>(
              origWeights->content(),
+            dataDescriptor,
              contentDescriptor,
              extendedHWInputDimC);
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp

index 514a23e..571c690 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_parallel_fc.cpp
@@ -2,62 +2,20 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
-#include <memory>
-#include <utility>
-#include <vector>
+#include <vpu/middleend/pass_manager.hpp>
+#include <vpu/stages/stub_stage.hpp>
+#include <vpu/model/data_contents/merge_fc_content.hpp>
  
  #include <ie_parallel.hpp>
  
-#include <vpu/middleend/pass_manager.hpp>
-#include <vpu/stages/stub_stage.hpp>
+#include <memory>
+#include <utility>
+#include <vector>
  
  namespace vpu {
  
  namespace {
  
-class MergeFullyConnectedContentsByChannels final : public CalculatedDataContent {
-public:
-    explicit MergeFullyConnectedContentsByChannels(const SmallVector<DataContent::Ptr, 2>& contents) :
-        CalculatedDataContent(contents) {}
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& contents, void* temp) const override {
-        IE_ASSERT(!contents.empty());
-        // vpu::DataNode has content and vpu::DataDesc with dimensions' vector
-        // content has dimensions's vector as well
-        // they can be different so we extract channels number from contents
-        const auto dstC = std::accumulate(contents.begin(), contents.end(), 0, [](int reduction, const DataContent::Ptr& content) {
-            return reduction + content->desc().dims()[Dim::C];});
-
-        for (std::size_t i = 0, dstChannelsOffset = 0; i < contents.size(); ++i) {
-            const auto& content = contents[i];
-            const auto& srcDesc = content->desc();
-
-            const auto& srcDims = srcDesc.dims();
-            const auto& elemSize = srcDesc.elemSize();
-
-            const auto N = srcDims.get(Dim::N, 1);
-            const auto H = srcDims.get(Dim::H, 1);
-            const auto W = srcDims.get(Dim::W, 1) * elemSize;
-
-            const auto& srcC = srcDims[Dim::C];
-
-            const auto src = content->get<uint8_t>();
-                  auto dst = static_cast<uint8_t*>(temp);
-
-            InferenceEngine::parallel_for4d(N, srcC, H, W, [dstChannelsOffset, N, H, W, src, dst, srcC, dstC](int n, int c, int h, int w) {
-                const auto& srcc = c;
-                const auto& dstc = dstChannelsOffset + c;
-
-                const auto& srcOffset = n * H * W * srcC + srcc * H * W + h * W + w;
-                const auto& dstOffset = n * H * W * dstC + dstc * H * W + h * W + w;
-                dst[dstOffset] = src[srcOffset];
-            });
-
-            dstChannelsOffset += srcC;
-        }
-    }
-};
-
  DataDesc mergeDescriptors(const DataVector& dataObjects) {
      const auto& targetDim = Dim::C;
      auto mergedDescriptor = dataObjects.front()->desc();
@@ -72,13 +30,17 @@ Data mergeConstDataObjects(const Model& model, const DataVector& dataObjects) {
          return model->addFakeData();
      }
  
-    std::vector<DataContent::Ptr> contents;
+    std::vector<DataContent::CPtr> contents;
+    std::vector<DataDesc> descs;
      for (const auto& data : dataObjects) {
          contents.push_back(data->content());
+        descs.push_back(data->desc());
      }
  
-    auto content = std::make_shared<MergeFullyConnectedContentsByChannels>(contents);
-    return model->duplicateData(dataObjects.front(), "@merge-parallel-fc", mergeDescriptors(dataObjects), content);
+    auto mergedDesc = mergeDescriptors(dataObjects);
+
+    auto content = std::make_shared<MergeFullyConnectedContentsByChannels>(contents, descs, mergedDesc);
+    return model->duplicateData(dataObjects.front(), "@merge-parallel-fc", mergedDesc, content);
  }
  
  Data mergeOutputs(const Model& model, const DataVector& dataObjects) {
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_deconv_by_conv.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_deconv_by_conv.cpp

index 9bf073f..47b2ecf 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_deconv_by_conv.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_deconv_by_conv.cpp
@@ -4,6 +4,11 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/stages/stub_stage.hpp>
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/model/data_contents/deconvolution_contents.hpp>
+
  #include <tuple>
  #include <vector>
  #include <algorithm>
@@ -16,10 +21,6 @@
  #include <unordered_map>
  #include <memory>
  
-#include <vpu/stages/stub_stage.hpp>
-#include <vpu/middleend/sw/utility.hpp>
-#include <vpu/compile_env.hpp>
-
  namespace vpu {
  
  namespace {
@@ -91,25 +92,6 @@ private:
      }
  };
  
-
-class DeconvolutionToConvolutionContent final : public CalculatedDataContent {
-public:
-    DeconvolutionToConvolutionContent(
-            const DataContent::Ptr& origContent) :
-            CalculatedDataContent({origContent}) {
-    }
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
-        VPU_PROFILE(DeconvolutionToConvolutionContent);
-
-        IE_ASSERT(baseContents.size() == 1);
-        IE_ASSERT(desc().type() == DataType::FP16);
-
-        deconv_to_conv(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), desc());
-    }
-};
-
-
  class PassImpl final : public Pass {
  public:
      explicit PassImpl(const StageBuilder::Ptr& stageBuilder) : _stageBuilder(stageBuilder) {}
@@ -192,7 +174,7 @@ void PassImpl::run(const Model& model) {
  
          auto newOutput = model->duplicateData(output, "@upsampleData", newDesc);
          auto newWeights = model->duplicateData(weights, "@upsampleData", weights->desc(),
-                     std::make_shared<DeconvolutionToConvolutionContent>(weights->content()));
+                     std::make_shared<DeconvolutionToConvolutionContent>(weights->content(), weights->desc()));
  
          auto upsampleStage = model->addNewStage<UpsamplingStage>(
                  stage->origLayerName() + "@Upsample",
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_gemm_by_conv.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_gemm_by_conv.cpp

index 1053fa8..a8ffd51 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_gemm_by_conv.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_gemm_by_conv.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp

index 95cd18d..1f9875f 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_priorbox_with_const.cpp
@@ -4,6 +4,12 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/stages/stub_stage.hpp>
+#include <vpu/model/data_contents/priorbox_contents.hpp>
+
+#include <ie_parallel.hpp>
+#include <precision_utils.h>
+
  #include <cmath>
  
  #include <algorithm>
@@ -13,383 +19,10 @@
  #include <vector>
  #include <queue>
  
-#include <ie_parallel.hpp>
-#include <precision_utils.h>
-
-#include <vpu/stages/stub_stage.hpp>
-
  namespace vpu {
  
  namespace {
  
-class PriorBoxContent final : public CalculatedDataContent {
-public:
-    PriorBoxContent(
-        const DataDesc& inDesc0,
-        const DataDesc& inDesc1,
-        const DataDesc& outDesc,
-        const ie::CNNLayerPtr &layer) :
-        _inDesc0(inDesc0), _inDesc1(inDesc1), _outDesc(outDesc),
-        _layer(layer) {
-        IE_ASSERT(layer != nullptr);
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2> &, void *tempBuf) const override {
-        VPU_PROFILE(PriorBoxContent);
-
-        auto tempPtr = static_cast<fp16_t*>(tempBuf);
-
-        auto _min_sizes = _layer->GetParamAsFloats("min_size", {});
-        auto _max_sizes = _layer->GetParamAsFloats("max_size", {});
-        auto aspect_ratios = _layer->GetParamAsFloats("aspect_ratio");
-        auto _flip = static_cast<bool>(_layer->GetParamAsInt("flip"));
-        auto _clip = static_cast<bool>(_layer->GetParamAsInt("clip"));
-        auto _variance = _layer->GetParamAsFloats("variance");
-        auto _img_h = _layer->GetParamAsInt("img_h", 0);
-        auto _img_w = _layer->GetParamAsInt("img_w", 0);
-        auto _step = _layer->GetParamAsFloat("step", 0);
-        auto _offset = _layer->GetParamAsFloat("offset", 0);
-        auto _scale_all_sizes = static_cast<bool>(_layer->GetParamAsInt("scale_all_sizes", 1));
-
-        auto _fixed_sizes = _layer->GetParamAsFloats("fixed_size", {});
-        auto _fixed_ratios = _layer->GetParamAsFloats("fixed_ratio", {});
-        auto _densitys = _layer->GetParamAsFloats("density", {});
-
-        SmallVector<float> _aspect_ratios;
-        _aspect_ratios.reserve(aspect_ratios.size() + 1);
-
-        _aspect_ratios.push_back(1.0f);
-        for (const auto& aspect_ratio : aspect_ratios) {
-            bool exist = false;
-
-            for (const auto& _aspect_ratio : _aspect_ratios) {
-                if (fabsf(aspect_ratio - _aspect_ratio) < 1e-6) {
-                    exist = true;
-                    break;
-                }
-            }
-            if (!exist) {
-                _aspect_ratios.push_back(aspect_ratio);
-                if (_flip) {
-                    if (isFloatEqual(aspect_ratio, 0.f)) {
-                        THROW_IE_EXCEPTION << "[VPU] PriorBox has 0.0 aspect ratio param in flip mode, "
-                                           << " possible division by zero";
-                    }
-                    _aspect_ratios.push_back(1.0f / aspect_ratio);
-                }
-            }
-        }
-
-        int _num_priors;
-        if (_scale_all_sizes) {
-            _num_priors = static_cast<int>(_aspect_ratios.size() * _min_sizes.size());
-        } else {
-            _num_priors = static_cast<int>(_aspect_ratios.size() + _min_sizes.size() - 1);
-        }
-
-        if (!_fixed_sizes.empty()) {
-            _num_priors = static_cast<int>(_aspect_ratios.size() * _fixed_sizes.size());
-        }
-
-        if (!_densitys.empty()) {
-            for (const auto& _density : _densitys) {
-                if (!_fixed_ratios.empty()) {
-                    _num_priors += _fixed_ratios.size() * (static_cast<int>(pow(_density, 2)) - 1);
-                } else {
-                    _num_priors += _aspect_ratios.size() * (static_cast<int>(pow(_density, 2)) - 1);
-                }
-            }
-        }
-
-        _num_priors += _max_sizes.size();
-
-        auto W  = _inDesc0.dim(Dim::W);
-        auto H  = _inDesc0.dim(Dim::H);
-        auto IW = _img_w == 0 ? _inDesc1.dim(Dim::W) : _img_w;
-        auto IH = _img_h == 0 ? _inDesc1.dim(Dim::H) : _img_h;
-        auto IWI = 1.0f / static_cast<float>(IW);
-        auto IHI = 1.0f / static_cast<float>(IH);
-
-        auto OW = (_outDesc.numDims() >= 4) ? _outDesc.dim(Dim::N) : 1;
-        auto OH = _outDesc.dim(Dim::W);
-
-        float step_x = 0.0f;
-        float step_y = 0.0f;
-
-        if (_step == 0) {
-            step_x = static_cast<float>(IW) / W;
-            step_y = static_cast<float>(IH) / H;
-        } else {
-            step_x = _step;
-            step_y = _step;
-        }
-
-        auto dst_data = tempPtr;
-
-        int dim = H * W * _num_priors * 4;
-        float center_x = 0.0f;
-        float center_y = 0.0f;
-
-        float box_width = 0.0f;
-        float box_height = 0.0f;
-
-        if (_outDesc.dim(Dim::W) != dim || _outDesc.dim(Dim::H) != 2) {
-            THROW_IE_EXCEPTION << "[VPU] PriorBox output have invalid dimension, exptected " << dim << "x2"
-                               << ", got " << _outDesc.dim(Dim::W) << "x" << _outDesc.dim(Dim::H)
-                               << ", layer name is: " << _layer->name;
-        }
-
-        auto max_fp16 = [](const float value, const float min) {
-            return ie::PrecisionUtils::f32tof16(value > min ? value : min);
-        };
-
-        auto min_fp16 = [](const float value, const float max) {
-            return ie::PrecisionUtils::f32tof16(value < max ? value : max);
-        };
-
-        size_t idx = 0;
-        for (int h = 0; h < H; ++h) {
-            for (int w = 0; w < W;  ++w) {
-                if (_step == 0) {
-                    center_x = (static_cast<float>(w) + 0.5f) * step_x;
-                    center_y = (static_cast<float>(h) + 0.5f) * step_y;
-                } else {
-                    center_x = (_offset + static_cast<float>(w)) * _step;
-                    center_y = (_offset + static_cast<float>(h)) * _step;
-                }
-
-                for (size_t s = 0; s < _fixed_sizes.size(); ++s) {
-                    auto fixed_size_ = static_cast<size_t>(_fixed_sizes[s]);
-                    box_width = box_height = fixed_size_ * 0.5f;
-
-                    int density_ = 0;
-                    int shift = 0;
-                    if (s < _densitys.size()) {
-                        density_ = static_cast<size_t>(_densitys[s]);
-                        shift = static_cast<int>(_fixed_sizes[s] / density_);
-                    }
-
-                    if (!_fixed_ratios.empty()) {
-                        for (const auto& fr : _fixed_ratios) {
-                            const auto box_width_ratio = _fixed_sizes[s] * 0.5f * std::sqrt(fr);
-                            const auto box_height_ratio = _fixed_sizes[s] * 0.5f / std::sqrt(fr);
-
-                            for (size_t r = 0; r < density_; ++r) {
-                                for (size_t c = 0; c < density_; ++c) {
-                                    const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-
-                                    dst_data[idx++] = max_fp16((center_x_temp - box_width_ratio) * IWI, 0.f);
-                                    dst_data[idx++] = max_fp16((center_y_temp - box_height_ratio) * IHI, 0.f);
-                                    dst_data[idx++] = min_fp16((center_x_temp + box_width_ratio) * IWI, 1.f);
-                                    dst_data[idx++] = min_fp16((center_y_temp + box_height_ratio) * IHI, 1.f);
-                                }
-                            }
-                        }
-                    } else {
-                        if (!_densitys.empty()) {
-                            for (int r = 0; r < density_; ++r) {
-                                for (int c = 0; c < density_; ++c) {
-                                    const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-
-                                    dst_data[idx++] = max_fp16((center_x_temp - box_width) * IWI, 0.f);
-                                    dst_data[idx++] = max_fp16((center_y_temp - box_height) * IHI, 0.f);
-                                    dst_data[idx++] = min_fp16((center_x_temp + box_width) * IWI, 1.f);
-                                    dst_data[idx++] = min_fp16((center_y_temp + box_height) * IHI, 1.f);
-                                }
-                            }
-                        }
-                        //  Rest of priors
-                        for (const auto& ar : _aspect_ratios) {
-                            if (fabs(ar - 1.) < 1e-6) {
-                                continue;
-                            }
-
-                            const auto box_width_ratio = _fixed_sizes[s] * 0.5f * std::sqrt(ar);
-                            const auto box_height_ratio = _fixed_sizes[s] * 0.5f / std::sqrt(ar);
-                            for (int r = 0; r < density_; ++r) {
-                                for (int c = 0; c < density_; ++c) {
-                                    const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
-                                    const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
-
-                                    dst_data[idx++] = max_fp16((center_x_temp - box_width_ratio) * IWI, 0.f);
-                                    dst_data[idx++] = max_fp16((center_y_temp - box_height_ratio) * IHI, 0.f);
-                                    dst_data[idx++] = min_fp16((center_x_temp + box_width_ratio) * IWI, 1.f);
-                                    dst_data[idx++] = min_fp16((center_y_temp + box_height_ratio) * IHI, 1.f);
-                                }
-                            }
-                        }
-                    }
-                }
-
-                for (size_t msIdx = 0; msIdx < _min_sizes.size(); msIdx++) {
-                    box_width = _min_sizes[msIdx];
-                    box_height = _min_sizes[msIdx];
-
-                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
-                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
-                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
-                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
-
-                    if (_max_sizes.size() > msIdx) {
-                        box_width = box_height = std::sqrt(_min_sizes[msIdx] * _max_sizes[msIdx]);
-
-                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
-                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
-                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
-                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
-                    }
-
-                    if (_scale_all_sizes || (!_scale_all_sizes && (msIdx == _min_sizes.size() - 1))) {
-                        size_t sIdx = _scale_all_sizes ? msIdx : 0;
-                        for (const auto& ar : _aspect_ratios) {
-                            if (std::fabs(ar - 1.0f) < 1e-6) {
-                                continue;
-                            }
-
-                            box_width = _min_sizes[sIdx] * std::sqrt(ar);
-                            box_height = _min_sizes[sIdx] / std::sqrt(ar);
-
-                            dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
-                            dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
-                            dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
-                            dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
-                        }
-                    }
-                }
-            }
-        }
-
-        if (_clip) {
-            for (int d = 0; d < dim; ++d) {
-                dst_data[d] = (std::min)((std::max)(dst_data[d], ie::PrecisionUtils::f32tof16(0.0f)), ie::PrecisionUtils::f32tof16(1.0f));
-            }
-        }
-
-        int channel_size = OH * OW;
-
-        dst_data += channel_size;
-
-        if (_variance.size() == 1) {
-            ie::parallel_for(channel_size, [&](int i) {
-                dst_data[i] = ie::PrecisionUtils::f32tof16(_variance[0]);
-            });
-        } else {
-            ie::parallel_for4d(H, W, _num_priors, 4, [&](int h, int w, int i, int j) {
-                dst_data[j + 4 * (i + _num_priors * (w + W * h))] = ie::PrecisionUtils::f32tof16(_variance[j]);
-            });
-        }
-    }
-
-private:
-    DataDesc _inDesc0;
-    DataDesc _inDesc1;
-    DataDesc _outDesc;
-    ie::CNNLayerPtr _layer;
-};
-
-class PriorBoxClusteredContent final : public CalculatedDataContent {
-public:
-    PriorBoxClusteredContent(
-        const DataDesc& inDesc0,
-        const DataDesc& inDesc1,
-        const DataDesc& outDesc,
-        const ie::CNNLayerPtr& layer) :
-        _inDesc0(inDesc0), _inDesc1(inDesc1), _outDesc(outDesc),
-        _layer(layer) {
-        IE_ASSERT(layer != nullptr);
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>&, void* tempBuf) const override {
-        VPU_PROFILE(PriorBoxClusteredContent);
-
-        auto tempPtr = static_cast<fp16_t*>(tempBuf);
-
-        auto widths_ = _layer->GetParamAsFloats("width");
-        auto heights_ = _layer->GetParamAsFloats("height");
-        auto clip_ = _layer->GetParamAsInt("clip");
-        auto variance_ = _layer->GetParamAsFloats("variance");
-        auto img_h_ = _layer->GetParamAsInt("img_h", 0);
-        auto img_w_ = _layer->GetParamAsInt("img_w", 0);
-        auto step_ = _layer->GetParamAsFloat("step", 0);
-        auto step_h_ = _layer->GetParamAsFloat("step_h", 0);
-        auto step_w_ = _layer->GetParamAsFloat("step_w", 0);
-        auto offset_ = _layer->GetParamAsFloat("offset", 0);
-
-        auto num_priors_ = widths_.size();
-
-        if (variance_.empty()) {
-            variance_.push_back(0.1);
-        }
-
-        auto layer_width  = _inDesc0.dim(Dim::W);
-        auto layer_height = _inDesc0.dim(Dim::H);
-
-        auto img_width  = img_w_ == 0 ? _inDesc1.dim(Dim::W) : img_w_;
-        auto img_height = img_h_ == 0 ? _inDesc1.dim(Dim::H) : img_h_;
-
-        auto step_w = step_w_ == 0 ? step_ : step_w_;
-        auto step_h = step_h_ == 0 ? step_ : step_h_;
-        if (step_w == 0 || step_h == 0) {
-            step_w = static_cast<float>(img_width) / layer_width;
-            step_h = static_cast<float>(img_height) / layer_height;
-        }
-
-        auto expetected_output_dimx = layer_height * layer_width * num_priors_ * 4;
-        if (_outDesc.dim(Dim::W) != expetected_output_dimx || _outDesc.dim(Dim::H) != 2) {
-            THROW_IE_EXCEPTION << "PriorBoxClustered output has invalid dimension, exptected " << expetected_output_dimx << "x2"
-                               << ", got " << _outDesc.dim(Dim::W) << "x" << _outDesc.dim(Dim::H) << ", layer name is: " << _layer->name;
-        }
-
-        auto offset = _outDesc.dim(Dim::W);
-        auto var_size = variance_.size();
-
-        auto top_data_0 = tempPtr;
-        auto top_data_1 = top_data_0 + offset;
-
-        ie::parallel_for2d(layer_height, layer_width, [=](int h, int w) {
-            auto center_x = (w + offset_) * step_w;
-            auto center_y = (h + offset_) * step_h;
-
-            for (int s = 0; s < num_priors_; ++s) {
-                auto box_width  = widths_[s];
-                auto box_height = heights_[s];
-
-                auto xmin = (center_x - box_width  / 2.0f) / img_width;
-                auto ymin = (center_y - box_height / 2.0f) / img_height;
-                auto xmax = (center_x + box_width  / 2.0f) / img_width;
-                auto ymax = (center_y + box_height / 2.0f) / img_height;
-
-                if (clip_) {
-                    xmin = std::min(std::max(xmin, 0.0f), 1.0f);
-                    ymin = std::min(std::max(ymin, 0.0f), 1.0f);
-                    xmax = std::min(std::max(xmax, 0.0f), 1.0f);
-                    ymax = std::min(std::max(ymax, 0.0f), 1.0f);
-                }
-
-                top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 0] = ie::PrecisionUtils::f32tof16(xmin);
-                top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 1] = ie::PrecisionUtils::f32tof16(ymin);
-                top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 2] = ie::PrecisionUtils::f32tof16(xmax);
-                top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 3] = ie::PrecisionUtils::f32tof16(ymax);
-
-                for (int j = 0; j < var_size; j++) {
-                    auto index = h * layer_width * num_priors_ * var_size + w * num_priors_ * var_size + s * var_size + j;
-                    top_data_1[index] = ie::PrecisionUtils::f32tof16(variance_[j]);
-                }
-            }
-        });
-    }
-
-private:
-    DataDesc _inDesc0;
-    DataDesc _inDesc1;
-    DataDesc _outDesc;
-    ie::CNNLayerPtr _layer;
-};
-
  //
  // UnusedDataRemover class deletes data that has no consumers,
  // and also recursively deletes all its unused predecessors, including
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp

index c52d275..675fad4 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/replace_with_reduce_mean.cpp
@@ -5,6 +5,7 @@
  #include <vpu/middleend/pass_manager.hpp>
  #include <vpu/middleend/sw/utility.hpp>
  #include <vpu/model/data.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
  
  #include <precision_utils.h>
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_conv3d_into_2d.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_conv3d_into_2d.cpp

index d5224a0..5ff9b9e 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_conv3d_into_2d.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_conv3d_into_2d.cpp
@@ -4,6 +4,8 @@
  
  #include "vpu/middleend/pass_manager.hpp"
  #include "vpu/utils/numeric.hpp"
+#include "vpu/model/data_contents/ie_blob_content.hpp"
+
  #include "precision_utils.h"
  #include "ie_memcpy.h"
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_grouped_conv.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_grouped_conv.cpp

index f0c6ea5..57d7d92 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_grouped_conv.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_grouped_conv.cpp
@@ -4,13 +4,14 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
-#include <vector>
-#include <set>
-#include <memory>
+#include <vpu/utils/numeric.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
  
  #include <precision_utils.h>
  
-#include <vpu/utils/numeric.hpp>
+#include <vector>
+#include <set>
+#include <memory>
  
  namespace vpu {
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_conv_and_pool.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_conv_and_pool.cpp

index f03f90b..dfa85e1 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_conv_and_pool.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_conv_and_pool.cpp
@@ -4,14 +4,15 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/compile_env.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <vector>
  #include <set>
  #include <memory>
  #include <array>
  
-#include <vpu/compile_env.hpp>
-#include <vpu/middleend/hw/utility.hpp>
-
  namespace vpu {
  
  namespace {
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_depth_convolution.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_depth_convolution.cpp

index 908be06..7d93920 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_depth_convolution.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_hw_depth_convolution.cpp
@@ -4,6 +4,12 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/middleend/hw/tiling.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <precision_utils.h>
+
  #include <memory>
  #include <array>
  #include <string>
@@ -14,11 +20,6 @@
  #include <tuple>
  #include <limits>
  
-#include <precision_utils.h>
-
-#include <vpu/middleend/hw/tiling.hpp>
-#include <vpu/middleend/hw/utility.hpp>
-
  namespace vpu {
  
  namespace {
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_pool3d_into_2d.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_pool3d_into_2d.cpp

index fe9c67a..2a5a550 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_pool3d_into_2d.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/split_pool3d_into_2d.cpp
@@ -6,6 +6,7 @@
  #include "vpu/stage_builder.hpp"
  #include "vpu/utils/numeric.hpp"
  #include "precision_utils.h"
+#include "vpu/model/data_contents/ie_blob_content.hpp"
  
  #include <memory>
  #include <set>
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_conv_adaptation.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_conv_adaptation.cpp

index 08a1de7..769949d 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_conv_adaptation.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_conv_adaptation.cpp
@@ -3,6 +3,10 @@
  //
  
  #include <vpu/middleend/pass_manager.hpp>
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/model/data_contents/conv_weights_contents.hpp>
+#include <vpu/model/data_contents/default_sw_weights_content.hpp>
+
  #include <limits>
  
  #include <vector>
@@ -11,53 +15,12 @@
  #include <unordered_set>
  #include <set>
  
-#include <vpu/middleend/sw/utility.hpp>
-
  #define REFERENCE_CONVOLUTION 0
  
  namespace vpu {
  
  namespace {
  
-class ConvIm2ColWeightsContent final : public CalculatedDataContent {
-public:
-    explicit ConvIm2ColWeightsContent(const DataContent::Ptr& origContent) :
-            CalculatedDataContent({origContent}) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(ConvIm2ColWeightsContent);
-        kchw_to_khwc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), desc());
-    }
-};
-
-class Conv3x3WeightsContent final : public CalculatedDataContent {
-public:
-    explicit Conv3x3WeightsContent(const DataContent::Ptr& origContent) :
-            CalculatedDataContent({origContent}) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(Conv3x3WeightsContent);
-        kchw_to_hwkc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), desc());
-    }
-};
-
-class ConvCHWWeightsContent final : public CalculatedDataContent {
-public:
-    explicit ConvCHWWeightsContent(const DataContent::Ptr& origContent) :
-            CalculatedDataContent({origContent}) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(ConvCHWWeightsContent);
-        kchw_to_hwkc(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), desc());
-    }
-};
-
  class ConvStage final : public StageNode {
  public:
      using StageNode::StageNode;
@@ -124,7 +87,7 @@ private:
                      weights,
                      "@SW",
                      newWeightsDesc,
-                    std::make_shared<DefaultSwWeightsContent>(weights->content()));
+                    std::make_shared<DefaultSwWeightsContent>(weights->content(), newWeightsDesc));
  
                  weights->attrs().set<Data>("swWeights", swWeights);
              }
@@ -149,7 +112,7 @@ private:
                          weights,
                          "@SW",
                          newWeightsDesc,
-                        std::make_shared<DefaultSwWeightsContent>(weights->content()));
+                        std::make_shared<DefaultSwWeightsContent>(weights->content(), newWeightsDesc));
                  } else if (isConv1x1) {
                      swWeights = model()->duplicateData(
                          weights,
@@ -161,13 +124,13 @@ private:
                          weights,
                          "@SW",
                          newWeightsDesc,
-                        std::make_shared<Conv3x3WeightsContent>(weights->content()));
+                        std::make_shared<Conv3x3WeightsContent>(weights->content(), newWeightsDesc));
                  } else {
                      swWeights = model()->duplicateData(
                          weights,
                          "@SW",
                          newWeightsDesc,
-                        std::make_shared<ConvIm2ColWeightsContent>(weights->content()));
+                        std::make_shared<ConvIm2ColWeightsContent>(weights->content(), newWeightsDesc));
  
                      double im2ColBufSizeF = static_cast<double>(kernelSizeX) * kernelSizeY *
                          output->desc().dim(Dim::W) * output->desc().dim(Dim::H) * input->desc().dim(Dim::C)
@@ -215,7 +178,7 @@ private:
                          weights,
                          "@SW",
                          newWeightsDesc,
-                        std::make_shared<ConvCHWWeightsContent>(weights->content()));
+                        std::make_shared<ConvCHWWeightsContent>(weights->content(), newWeightsDesc));
                  }
  
                  weights->attrs().set<Data>("swWeights", swWeights);
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp

index e402756..a8d9553 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_deconv_adaptation.cpp
@@ -4,168 +4,22 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/utils/numeric.hpp>
+#include <vpu/model/data_contents/deconvolution_contents.hpp>
+
+#include <ie_parallel.hpp>
+
  #include <vector>
  #include <string>
  #include <memory>
  #include <unordered_set>
  #include <set>
  
-#include <ie_parallel.hpp>
-
-#include <vpu/middleend/sw/utility.hpp>
-#include <vpu/utils/numeric.hpp>
-
  namespace vpu {
  
  namespace {
  
-void depthDeconvolutionRelayoutCHW(
-        const fp16_t* src, int src_size,
-        fp16_t* dst, int dst_size,
-        int KX, int KY,
-        int channels) {
-    ie::parallel_for3d(channels, KY, KX, [=](int c, int ky, int kx) {
-        int iidx = c * KX * KY + ky * KX + kx;
-        IE_ASSERT(iidx >= 0 && iidx < src_size);
-
-        int inv_kx = KX - kx - 1;
-        int inv_ky = KY - ky - 1;
-        int oidx = c * KX * KY + inv_ky * KX + inv_kx;
-        IE_ASSERT(oidx >= 0 && oidx < dst_size);
-
-        dst[oidx] = src[iidx];
-    });
-}
-
-class DepthDeconvolutionCHWWeightsContent final : public CalculatedDataContent {
-public:
-    DepthDeconvolutionCHWWeightsContent(
-            const DataContent::Ptr& origContent,
-            int KX, int KY, int channels) :
-            CalculatedDataContent({origContent}),
-            _KX(KX), _KY(KY), _channels(channels) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(DepthDeconvolutionCHWWeightsContent);
-        depthDeconvolutionRelayoutCHW(
-            baseContents[0]->get<fp16_t>(), desc().totalDimSize(),
-            static_cast<fp16_t*>(tempBuf), desc().totalDimSize(),
-            _KX, _KY, _channels);
-    }
-
-private:
-    int _KX;
-    int _KY;
-    int _channels;
-};
-
-void depthDeconvolutionRelayoutHWC(
-        const fp16_t* src, int src_size,
-        fp16_t* dst, int dst_size,
-        int KX, int KY,
-        int channels) {
-    ie::parallel_for3d(channels, KY, KX, [=](int c, int ky, int kx) {
-        int iidx = c * KX * KY + ky * KX + kx;
-        IE_ASSERT(iidx < src_size);
-
-        int inv_kx = KX - kx - 1;
-        int inv_ky = KY - ky - 1;
-        int oidx = inv_ky * KX * channels + inv_kx * channels + c;
-        IE_ASSERT(oidx < dst_size);
-
-        dst[oidx] = src[iidx];
-    });
-}
-
-class DepthDeconvolutionHWCWeightsContent final : public CalculatedDataContent {
-public:
-    DepthDeconvolutionHWCWeightsContent(
-            const DataContent::Ptr& origContent,
-            int KX, int KY, int channels) :
-            CalculatedDataContent({origContent}),
-            _KX(KX), _KY(KY), _channels(channels) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(DepthDeconvolutionHWCWeightsContent);
-        depthDeconvolutionRelayoutHWC(
-            baseContents[0]->get<fp16_t>(), desc().totalDimSize(),
-            static_cast<fp16_t*>(tempBuf), desc().totalDimSize(),
-            _KX, _KY, _channels);
-    }
-
-private:
-    int _KX;
-    int _KY;
-    int _channels;
-};
-
-void deconvolutionRelayout(
-    const fp16_t* src, int src_size,
-    fp16_t* dst, int dst_size,
-    int KX, int KY,
-    int IC, int OC) {
-    ie::parallel_for4d(OC, IC, KY, KX, [=](int oc, int ic, int ky, int kx) {
-        int iidx = ic * OC * KY * KX
-                 + oc * KY * KX
-                 + ky * KX
-                 + kx;
-        IE_ASSERT(iidx >= 0 && iidx < src_size);
-
-        int inv_kx = KX - kx - 1;
-        int inv_ky = KY - ky - 1;
-        int oidx = oc * IC * KY * KX
-                 + ic * KY * KX
-                 + inv_ky * KX
-                 + inv_kx;
-        IE_ASSERT(oidx >=  0 && oidx < dst_size);
-
-        dst[oidx] = src[iidx];
-    });
-}
-
-class DeconvolutionWeightsContent final : public CalculatedDataContent {
-public:
-    DeconvolutionWeightsContent(
-            const DataContent::Ptr& origContent,
-            int KX, int KY,
-            int IC, int OC) :
-            CalculatedDataContent({origContent}),
-            _KX(KX), _KY(KY),
-            _IC(IC), _OC(OC) {
-    }
-
-protected:
-    size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const override {
-        return 2 * desc().totalDimSize() * sizeof(fp16_t);
-    }
-
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(DeconvolutionWeightsContent);
-
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-        auto dstPtr2 = dstPtr + desc().totalDimSize();
-
-        deconvolutionRelayout(
-            baseContents[0]->get<fp16_t>(), desc().totalDimSize(),
-            dstPtr2, desc().totalDimSize(),
-            _KX, _KY,
-            _IC, _OC);
-
-        kchw_to_hwkc(dstPtr2, dstPtr, desc());
-    }
-
-private:
-    int _KX;
-    int _KY;
-    int _IC;
-    int _OC;
-};
-
  class DeconvStage final : public StageNode {
  public:
      using StageNode::StageNode;
@@ -287,6 +141,7 @@ private:
                      newWeightsDesc,
                      std::make_shared<DeconvolutionWeightsContent>(
                          weights->content(),
+                        newWeightsDesc,
                          kernelSizeX, kernelSizeY,
                          input->desc().dim(Dim::C),
                          output->desc().dim(Dim::C)));
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_fc_adaptation.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_fc_adaptation.cpp

index 61c11b8..fcbb10d 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_fc_adaptation.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/sw_fc_adaptation.cpp
@@ -4,13 +4,14 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/model/data_contents/default_sw_weights_content.hpp>
+
  #include <vector>
  #include <memory>
  #include <string>
  #include <set>
  
-#include <vpu/middleend/sw/utility.hpp>
-
  namespace vpu {
  
  namespace {
@@ -46,7 +47,7 @@ private:
                  weights,
                  "@SW",
                  weights->desc(),
-                std::make_shared<DefaultSwWeightsContent>(weights->content()));
+                std::make_shared<DefaultSwWeightsContent>(weights->content(), weights->desc()));
  
              weights->attrs().set<Data>("swWeights", swWeights);
          }
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp

index d27f135..cffe70a 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
@@ -4,6 +4,14 @@
  
  #include <vpu/middleend/pass_manager.hpp>
  
+#include <vpu/utils/numeric.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/model/data_contents/replicated_data_content.hpp>
+#include <vpu/model/data_contents/scaled_content.hpp>
+
+#include <details/caseless.hpp>
+#include <precision_utils.h>
+
  #include <cmath>
  
  #include <sstream>
@@ -18,13 +26,6 @@
  #include <list>
  #include <set>
  
-#include <precision_utils.h>
-
-#include <vpu/utils/numeric.hpp>
-#include <vpu/compile_env.hpp>
-
-#include <details/caseless.hpp>
-
  namespace vpu {
  
  namespace {
@@ -198,7 +199,9 @@ void addScaleInput(const Model& model, const Stage& stage, float scale) {
      IE_ASSERT(stage->output(0)->desc().dims().has(Dim::C));
      const auto outputChannels = stage->output(0)->desc().dims()[Dim::C];
  
-    auto scaleInput = model->addConstData(stage->name() + "@scales", DataDesc{{outputChannels}}, replicateContent(1.0f / scale, outputChannels));
+    auto scaleInput = model->addConstData(stage->name() + "@scales",
+                                          DataDesc{{outputChannels}},
+                                          replicateContent(1.0f / scale, outputChannels, DataDesc{outputChannels}));
      model->replaceStageInput(stage->inputEdge(SCALES_IDX), scaleInput);
  }
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/sw/utility.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/sw/utility.cpp

index aeae81a..645c69d 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/sw/utility.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/sw/utility.cpp
@@ -15,23 +15,6 @@
  namespace vpu {
  
  //
-// DefaultSwWeightsContent
-//
-
-DefaultSwWeightsContent::DefaultSwWeightsContent(const DataContent::Ptr& origContent) :
-        CalculatedDataContent({origContent}) {
-}
-
-void DefaultSwWeightsContent::fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const {
-    VPU_PROFILE(DefaultSwWeightsContent);
-
-    IE_ASSERT(desc().type() == DataType::FP16);
-    IE_ASSERT(baseContents.size() == 1);
-
-    kchw_to_hwck(baseContents[0]->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), desc());
-}
-
-//
  // getOneOfSingleNextStage
  //
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp

index 3c41b86..cbb6247 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data.cpp
@@ -4,6 +4,16 @@
  
  #include <vpu/model/data.hpp>
  
+#include <vpu/model/edges.hpp>
+#include <vpu/model/stage.hpp>
+#include <vpu/backend/backend.hpp>
+#include <vpu/utils/ie_helpers.hpp>
+#include <vpu/utils/numeric.hpp>
+#include <vpu/compile_env.hpp>
+
+#include <precision_utils.h>
+#include <ie_parallel.hpp>
+
  #include <array>
  #include <algorithm>
  #include <queue>
@@ -15,276 +25,9 @@
  #include <set>
  #include <utility>
  
-#include <precision_utils.h>
-#include <ie_parallel.hpp>
-
-#include <vpu/model/edges.hpp>
-#include <vpu/model/stage.hpp>
-#include <vpu/backend/backend.hpp>
-#include <vpu/utils/ie_helpers.hpp>
-#include <vpu/utils/numeric.hpp>
-#include <vpu/compile_env.hpp>
-
  namespace vpu {
  
  //
-// DataContent
-//
-
-DataContent::~DataContent() = default;
-
-const void* CalculatedDataContent::getRaw() const {
-    if (_temp.empty()) {
-        _temp.resize(getTempBufSize(_baseContents));
-        fillTempBuf(_baseContents, _temp.data());
-        _baseContents.clear();
-    }
-    return _temp.data();
-}
-
-size_t CalculatedDataContent::getTempBufSize(const SmallVector<DataContent::Ptr, 2>&) const {
-    return checked_cast<size_t>(desc().totalDimSize()) *
-           checked_cast<size_t>(desc().elemSize());
-}
-
-namespace {
-
-class IeBlobContent final : public DataContent {
-public:
-    IeBlobContent(const ie::Blob::Ptr& blob, int repeat) : _blob(blob), _repeat(repeat) {}
-
-protected:
-    const void* getRaw() const override {
-        if (desc().type() == DataType::FP16) {
-            if (_blobFp16 == nullptr) {
-                _blobFp16 = getBlobFP16(_blob);
-                _blob.reset();
-            }
-
-            if (_repeat == 1) {
-                return _blobFp16->cbuffer();
-            } else {
-                if (_tempFp16.empty()) {
-                    VPU_PROFILE(IeBlobContent);
-
-                    IE_ASSERT(desc().totalDimSize() % _repeat == 0);
-
-                    auto origNumElems = desc().totalDimSize() / _repeat;
-                    IE_ASSERT(checked_cast<size_t>(origNumElems) <= _blobFp16->size());
-
-                    auto origPtr = _blobFp16->cbuffer().as<const fp16_t*>();
-                    IE_ASSERT(origPtr != nullptr);
-
-                    _tempFp16.resize(checked_cast<size_t>(desc().totalDimSize()));
-
-                    ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
-                        std::copy_n(origPtr, origNumElems, _tempFp16.data() + i * origNumElems);
-                    });
-                }
-
-                return _tempFp16.data();
-            }
-        } else if (desc().type() == DataType::S32) {
-            if (_repeat == 1) {
-                return _blob->cbuffer();
-            } else {
-                if (_tempS32.empty()) {
-                    VPU_PROFILE(IeBlobContent);
-
-                    IE_ASSERT(desc().totalDimSize() % _repeat == 0);
-
-                    auto origNumElems = desc().totalDimSize() / _repeat;
-                    IE_ASSERT(checked_cast<size_t>(origNumElems) <= _blob->size());
-
-                    auto origPtr = _blob->cbuffer().as<const int32_t*>();
-                    IE_ASSERT(origPtr != nullptr);
-
-                    _tempS32.resize(checked_cast<size_t>(desc().totalDimSize()));
-
-                    ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
-                        std::copy_n(origPtr, origNumElems, _tempS32.data() + i * origNumElems);
-                    });
-                }
-
-                return _tempS32.data();
-            }
-        } else {
-            VPU_THROW_EXCEPTION << "Unsupported data type " << desc().type();
-        }
-    }
-
-private:
-    mutable ie::Blob::Ptr _blob;
-    int _repeat = 0;
-
-    mutable ie::Blob::Ptr _blobFp16;
-    mutable std::vector<fp16_t> _tempFp16;
-    mutable std::vector<int32_t> _tempS32;
-};
-
-}  // namespace
-
-DataContent::Ptr ieBlobContent(const ie::Blob::Ptr& blob, int repeat) {
-    return std::make_shared<IeBlobContent>(blob, repeat);
-}
-
-namespace {
-
-class ReplicatedContent final : public CalculatedDataContent {
-public:
-    ReplicatedContent(float val, int count) : _factor{val}, _count(count) {}
-
-    ReplicatedContent(DataContent::Ptr origContent, int count) :
-        CalculatedDataContent({std::move(origContent)}), _count(count) {
-    }
-
-protected:
-    size_t getTempBufSize(const SmallVector<DataContent::Ptr, 2>& baseContents) const override {
-        if (baseContents.empty()) {
-            return checked_cast<size_t>(_count) * sizeof(fp16_t);
-        } else {
-            IE_ASSERT(baseContents.size() == 1);
-            IE_ASSERT(desc().totalDimSize() % _count == 0);
-
-            return checked_cast<size_t>(desc().totalDimSize()) * sizeof(fp16_t);
-        }
-    }
-
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(ReplicatedContent);
-
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        if (baseContents.empty()) {
-            std::fill_n(dstPtr, _count, ie::PrecisionUtils::f32tof16(_factor));
-        } else {
-            IE_ASSERT(baseContents.size() == 1);
-            IE_ASSERT(desc().totalDimSize() % _count == 0);
-
-            auto origCount = desc().totalDimSize() / _count;
-            auto origPtr = baseContents[0]->get<fp16_t>();
-            IE_ASSERT(origPtr != nullptr);
-
-            ie::parallel_for(_count, [origPtr, origCount, dstPtr](int i) {
-                std::copy_n(origPtr, origCount, dstPtr + i * origCount);
-            });
-        }
-    }
-
-private:
-    float _factor = 1.0f;
-    int _count = 0;
-};
-
-}  // namespace
-
-DataContent::Ptr replicateContent(float val, int count) {
-    return std::make_shared<ReplicatedContent>(val, count);
-}
-
-DataContent::Ptr replicateContent(const DataContent::Ptr& origContent, int count) {
-    return std::make_shared<ReplicatedContent>(origContent, count);
-}
-
-namespace {
-
-class ScaledContent final : public CalculatedDataContent {
-public:
-    ScaledContent(const DataContent::Ptr& origContent, float scale) :
-        CalculatedDataContent({origContent}), _factor(scale) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(ScaledContent);
-
-        IE_ASSERT(baseContents.size() == 1);
-
-        auto totalSize = desc().totalDimSize();
-
-        auto origDesc = baseContents[0]->desc();
-        IE_ASSERT(origDesc.type() == DataType::FP16);
-        IE_ASSERT(origDesc.totalDimSize() == totalSize);
-
-        auto srcPtr = baseContents[0]->get<fp16_t>();
-        IE_ASSERT(srcPtr != nullptr);
-
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        ie::parallel_for(totalSize, [this, srcPtr, dstPtr](int i) {
-            dstPtr[i] = ie::PrecisionUtils::f32tof16(ie::PrecisionUtils::f16tof32(srcPtr[i]) * _factor);
-        });
-    }
-
-private:
-    float _factor = 1.0f;
-};
-
-}  // namespace
-
-DataContent::Ptr scaleContent(const DataContent::Ptr& origContent, float scale) {
-    return std::make_shared<ScaledContent>(origContent, scale);
-}
-
-namespace {
-
-class ScaledChannelContent final : public CalculatedDataContent {
-public:
-    ScaledChannelContent(
-            const DataContent::Ptr& origContent,
-            const DataContent::Ptr& scaleContent) :
-            CalculatedDataContent({origContent, scaleContent}) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(ScaledChannelContent);
-
-        IE_ASSERT(baseContents.size() == 2);
-
-        auto totalSize = desc().totalDimSize();
-
-        IE_ASSERT(desc().numDims() == 4 && desc().dimsOrder() == DimsOrder::NCHW);
-        auto numN = desc().dim(Dim::N);
-        auto numC = desc().dim(Dim::C);
-        auto numH = desc().dim(Dim::H);
-        auto numW = desc().dim(Dim::W);
-
-        auto origDesc = baseContents[0]->desc();
-        IE_ASSERT(origDesc.type() == DataType::FP16);
-        IE_ASSERT(origDesc.totalDimSize() == totalSize);
-        IE_ASSERT(baseContents[1]->desc().totalDimSize() == numN);
-
-        auto srcPtr = baseContents[0]->get<fp16_t>();
-        IE_ASSERT(srcPtr != nullptr);
-
-        auto scale = baseContents[1]->get<fp16_t>();
-        IE_ASSERT(scale != nullptr);
-
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        for (int n = 0; n < numN; n++) {
-            for (int c = 0; c < numC; c++) {
-               for (int h = 0; h < numH; h++) {
-                   for (int w = 0; w < numW; w++) {
-                       dstPtr[n * numC * numH * numW + c * numH * numW + h * numW + w] =
-                               srcPtr[n * numC * numH * numW + c * numH * numW + h * numW + w] * scale[n];
-                   }
-               }
-            }
-        }
-    }
-};
-
-}  // namespace
-
-DataContent::Ptr scaledChannelContent(
-        const DataContent::Ptr& origContent,
-        const DataContent::Ptr& scaleContent) {
-    return std::make_shared<ScaledChannelContent>(origContent, scaleContent);
-}
-
-//
  // DataNode
  //
  
@@ -380,8 +123,7 @@ void DataNode::updateRequiredStrides(const StridesRequirement& newReqs) {
  }
  
  void DataNode::clearAllocation() {
-    _location = DataLocation::None;
-    _memoryOffset = 0;
+    _dataLocation = defaultDataLocation;
      attrs().erase("ioBufferOffset");
  }
  
@@ -393,69 +135,64 @@ void DataNode::setMemReqs(MemoryType mem) {
      _memReqs = mem;
  }
  
-void DataNode::setIOInfo(DataLocation location, int ioBufferOffset) {
-    IE_ASSERT(_usage == DataUsage::Input || _usage == DataUsage::Output);
+void DataNode::setIOInfo(Location location, int ioBufferOffset) {
+    VPU_INTERNAL_CHECK(_usage == DataUsage::Input || _usage == DataUsage::Output,
+        "Data {} failed: setIOInfo called for non IO data, actual usage is {}",
+        name(), usage());
  
      if (_usage == DataUsage::Input) {
-        IE_ASSERT(location == DataLocation::Input);
+        VPU_INTERNAL_CHECK(location == Location::Input,
+            "Input data {} failed: setIOInfo called with non input location, actual location is {}",
+            name(), location);
      } else if (_usage == DataUsage::Output) {
-        IE_ASSERT(location == DataLocation::Output);
+        VPU_INTERNAL_CHECK(location == Location::Output,
+            "Output data {} failed: setIOInfo called with non output location, actual location is {}",
+            name(), location);
      }
  
-    _location = location;
-    _memoryOffset = 0;
+    _dataLocation = {location, 0};
      attrs().set<int>("ioBufferOffset", ioBufferOffset);
  }
  
-void DataNode::setAllocationInfo(DataLocation location, int memoryOffset) {
-    IE_ASSERT(_usage == DataUsage::Const || _usage == DataUsage::Intermediate || _usage == DataUsage::Temp);
+void DataNode::setDataAllocationInfo(const DataLocation& dataLocation) {
+    VPU_INTERNAL_CHECK(_usage == DataUsage::Const || _usage == DataUsage::Intermediate || _usage == DataUsage::Temp,
+        "Data {} failed: setDataAllocationInfo called for data with incorrect usage, actual usage: {} "
+        "valid usages: {}, {}, {}", name(), usage(), DataUsage::Const, DataUsage::Intermediate, DataUsage::Temp);
  
      if (_usage == DataUsage::Const) {
-        IE_ASSERT(location == DataLocation::Blob);
+        VPU_INTERNAL_CHECK(dataLocation.location == Location::Blob,
+            "Const data {} failed: setDataAllocationInfo called with non blob location, actual location is {}",
+            name(), dataLocation.location);
      } else if (_usage == DataUsage::Temp) {
-        IE_ASSERT(location == DataLocation::BSS);
+        VPU_INTERNAL_CHECK(dataLocation.location == Location::BSS,
+            "Temp data {} failed: setDataAllocationInfo called with non bss location, actual location is {}",
+            name(), dataLocation.location);
      }
  
-    _location = location;
-    _memoryOffset = memoryOffset;
+    _dataLocation = dataLocation;
  }
  
-void DataNode::serializeBuffer(
-        BlobSerializer& serializer,
-        DimsOrder newOrder) {
-    if (newOrder.numDims() == 0) {
-        serializeBufferImpl(serializer, _desc, this->strides());
-    } else {
-        IE_ASSERT(newOrder.numDims() >= _desc.dimsOrder().numDims());
-
-        auto newDims = _desc.dims();
-        auto newStrides = this->strides();
-        auto newPerm = newOrder.toPermutation();
+void DataNode::setShapeAllocationInfo(const ShapeLocation& shapeLocation) {
+    _shapeLocation = shapeLocation;
+}
  
-        auto origOrder = _desc.dimsOrder();
-        auto origPerm = origOrder.toPermutation();
+void DataNode::serializeBuffer(
+        BlobSerializer& serializer) {
+    serializeDescImpl(serializer, _desc, this->strides());
  
-        size_t origPermInd = 0;
-        for (size_t i = 0; i < newPerm.size(); i++) {
-            auto d = newPerm[i];
+    serializer.append(checked_cast<uint32_t>(_dataLocation.location));
  
-            if (origPermInd < origPerm.size() && origPerm[origPermInd] == d) {
-                ++origPermInd;
-                continue;
-            }
+    if (_dataLocation.location == Location::Input || _dataLocation.location == Location::Output) {
+        auto topParent = getTopParentData();
  
-            newDims.set(d, 1);
-            if (i == 0) {
-                newStrides.set(d, _desc.elemSize());
-            } else {
-                newStrides.set(d, newStrides[newPerm[i - 1]] * newDims[newPerm[i - 1]]);
-            }
-        }
-        IE_ASSERT(origPermInd == origPerm.size());
+        auto ioIdx = topParent->attrs().get<int>("ioIdx");
+        serializer.append(checked_cast<uint32_t>(ioIdx));
  
-        DataDesc newDesc(_desc.type(), newOrder, newDims);
-        serializeBufferImpl(serializer, newDesc, newStrides);
+        auto parentByteSize = topParent->totalByteSize();
+        serializer.append(checked_cast<uint32_t>(parentByteSize));
      }
+
+    serializer.append(checked_cast<uint32_t>(_dataLocation.offset));
  }
  
  void DataNode::serializeIOInfo(BlobSerializer& serializer) const {
@@ -485,8 +222,6 @@ void DataNode::serializeDescImpl(
          const DimValues& storedStrides) const {
      IE_ASSERT(storedDesc.numDims() <= MAX_DIMS_32);
  
-    const auto& storedDims = storedDesc.dims();
-
      auto storedDimsOrder = storedDesc.dimsOrder();
  
      auto storedPerm = storedDimsOrder.toPermutation();
@@ -496,33 +231,13 @@ void DataNode::serializeDescImpl(
      serializer.append(checked_cast<uint32_t>(storedDimsOrder.code()));
  
      serializer.append(checked_cast<uint32_t>(storedPerm.size()));
-    for (auto d : storedPerm) {
-        serializer.append(checked_cast<uint32_t>(storedDims[d]));
-    }
-    for (auto d : storedPerm) {
-        serializer.append(checked_cast<uint32_t>(storedStrides[d]));
-    }
-}
-
-void DataNode::serializeBufferImpl(
-        BlobSerializer& serializer,
-        const DataDesc& storedDesc,
-        const DimValues& storedStrides) const {
-    serializeDescImpl(serializer, storedDesc, storedStrides);
  
-    serializer.append(checked_cast<uint32_t>(_location));
-
-    if (_location == DataLocation::Input || _location == DataLocation::Output) {
-        auto topParent = getTopParentData();
-
-        auto ioIdx = topParent->attrs().get<int>("ioIdx");
-        serializer.append(checked_cast<uint32_t>(ioIdx));
-
-        auto parentByteSize = topParent->totalByteSize();
-        serializer.append(checked_cast<uint32_t>(parentByteSize));
-    }
+    const auto& shape = shapeLocation();
  
-    serializer.append(checked_cast<uint32_t>(_memoryOffset));
+    serializer.append(checked_cast<uint32_t>(shape.dimsLocation));
+    serializer.append(checked_cast<uint32_t>(shape.dimsOffset));
+    serializer.append(checked_cast<uint32_t>(shape.stridesLocation));
+    serializer.append(checked_cast<uint32_t>(shape.stridesOffset));
  }
  
  void printTo(std::ostream& os, const Data& data) {
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/batch_norm_contents.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/batch_norm_contents.cpp

new file mode 100644 (file)

index 0000000..eb5eebb
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/batch_norm_contents.cpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/batch_norm_contents.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+#include <precision_utils.h>
+
+namespace vpu {
+
+namespace ie = InferenceEngine;
+
+//
+// BatchNormalizationWeightsContent
+//
+
+BatchNormalizationWeightsContent::BatchNormalizationWeightsContent(const DataContent::Ptr& origContent,
+                                                                   float epsilon) :
+        _origContent(origContent), _epsilon(epsilon) {}
+
+size_t BatchNormalizationWeightsContent::byteSize() const {
+    return _origContent->byteSize();
+}
+
+void BatchNormalizationWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(BatchNormalizationWeightsContent);
+
+    auto srcPtr = _origContent->get<fp16_t>();
+    auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    ie::parallel_for(_origContent->byteSize() / sizeof(fp16_t), [this, srcPtr, dstPtr](int i) {
+        float val = ie::PrecisionUtils::f16tof32(srcPtr[i]) + _epsilon;
+        val = 1.0f / std::sqrt(val);
+        dstPtr[i] = ie::PrecisionUtils::f32tof16(val);
+    });
+}
+
+//
+// BatchNormalizationBiasesContent
+//
+
+BatchNormalizationBiasesContent::BatchNormalizationBiasesContent(const DataContent::Ptr& origContent,
+                                                                 const DataContent::Ptr& weightsContent) :
+        _origContent(origContent), _weightsContent(weightsContent) {}
+
+size_t BatchNormalizationBiasesContent::byteSize() const {
+    return _origContent->byteSize();
+}
+
+void BatchNormalizationBiasesContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(BatchNormalizationBiasesContent);
+
+    auto origPtr = _origContent->get<fp16_t>();
+    auto weightsPtr = _weightsContent->get<fp16_t>();
+
+    auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    ie::parallel_for(_origContent->byteSize() / sizeof(fp16_t), [origPtr, weightsPtr, dstPtr](int i) {
+        // TODO : need to be extracted from IE layer.
+        float beta = 0.0f;
+
+        auto wVal = ie::PrecisionUtils::f16tof32(weightsPtr[i]);
+        dstPtr[i] = ie::PrecisionUtils::f32tof16(beta - wVal * ie::PrecisionUtils::f16tof32(origPtr[i]));
+    });
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/calculated_data_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/calculated_data_content.cpp

new file mode 100644 (file)

index 0000000..647b22f
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/calculated_data_content.cpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/calculated_data_content.hpp>
+
+namespace vpu {
+
+const void* CalculatedDataContent::getRaw() const {
+    if (_temp.empty()) {
+        _temp.resize(byteSize());
+        fillTempBuf(_temp.data());
+    }
+    return _temp.data();
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/conv_weights_contents.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/conv_weights_contents.cpp

new file mode 100644 (file)

index 0000000..6c0eebc
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/conv_weights_contents.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/conv_weights_contents.hpp>
+
+#include <vpu/middleend/sw/utility.hpp>
+#include <vpu/utils/profiling.hpp>
+
+namespace vpu {
+
+//
+// ConvIm2ColWeightsContent
+//
+
+ConvIm2ColWeightsContent::ConvIm2ColWeightsContent(const DataContent::Ptr& origContent, DataDesc desc) :
+        _origContent(origContent), _desc(desc) {}
+
+size_t ConvIm2ColWeightsContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+void ConvIm2ColWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(ConvIm2ColWeightsContent);
+    kchw_to_khwc(_origContent->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
+}
+
+//
+// Conv3x3WeightsContent
+//
+
+Conv3x3WeightsContent::Conv3x3WeightsContent(const DataContent::Ptr& origContent, DataDesc desc) :
+        _origContent(origContent), _desc(desc) {
+}
+
+size_t Conv3x3WeightsContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+void Conv3x3WeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(Conv3x3WeightsContent);
+    kchw_to_hwkc(_origContent->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
+}
+
+//
+// ConvCHWWeightsContent
+//
+
+ConvCHWWeightsContent::ConvCHWWeightsContent(const DataContent::Ptr& origContent, DataDesc desc) :
+        _origContent(origContent), _desc(desc) {
+}
+
+size_t ConvCHWWeightsContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+void ConvCHWWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(ConvCHWWeightsContent);
+    kchw_to_hwkc(_origContent->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/data_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/data_content.cpp

new file mode 100644 (file)

index 0000000..4fbe41a
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/data_content.cpp
@@ -0,0 +1,11 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/data_content.hpp>
+
+namespace vpu {
+
+DataContent::~DataContent() = default;
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/deconvolution_contents.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/deconvolution_contents.cpp

new file mode 100644 (file)

index 0000000..5e04f32
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/deconvolution_contents.cpp
@@ -0,0 +1,174 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/deconvolution_contents.hpp>
+
+#include <vpu/utils/profiling.hpp>
+#include <vpu/middleend/sw/utility.hpp>
+
+#include <ie_parallel.hpp>
+
+namespace vpu {
+
+//
+// DeconvolutionToConvolutionContent
+//
+
+DeconvolutionToConvolutionContent::DeconvolutionToConvolutionContent(
+        const DataContent::Ptr& origContent, const DataDesc& desc) :
+        _origContent(origContent), _desc(desc) {
+}
+
+size_t DeconvolutionToConvolutionContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+void DeconvolutionToConvolutionContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(DeconvolutionToConvolutionContent);
+
+    IE_ASSERT(_desc.type() == DataType::FP16);
+
+    deconv_to_conv(_origContent->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
+}
+
+//
+// DepthDeconvolutionCHWWeightsContent
+//
+
+void depthDeconvolutionRelayoutCHW(
+        const fp16_t* src, int src_size,
+        fp16_t* dst, int dst_size,
+        int KX, int KY,
+        int channels) {
+    ie::parallel_for3d(channels, KY, KX, [=](int c, int ky, int kx) {
+        int iidx = c * KX * KY + ky * KX + kx;
+        IE_ASSERT(iidx >= 0 && iidx < src_size);
+
+        int inv_kx = KX - kx - 1;
+        int inv_ky = KY - ky - 1;
+        int oidx = c * KX * KY + inv_ky * KX + inv_kx;
+        IE_ASSERT(oidx >= 0 && oidx < dst_size);
+
+        dst[oidx] = src[iidx];
+    });
+}
+
+DepthDeconvolutionCHWWeightsContent::DepthDeconvolutionCHWWeightsContent(
+        const DataContent::Ptr& origContent,
+        int KX, int KY, int channels) :
+        _origContent(origContent),
+        _KX(KX), _KY(KY), _channels(channels) {}
+
+void DepthDeconvolutionCHWWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(DepthDeconvolutionCHWWeightsContent);
+    depthDeconvolutionRelayoutCHW(
+            _origContent->get<fp16_t>(), _origContent->byteSize() / sizeof(fp16_t),
+            static_cast<fp16_t*>(tempBuf), _origContent->byteSize() / sizeof(fp16_t),
+            _KX, _KY, _channels);
+}
+
+size_t DepthDeconvolutionCHWWeightsContent::byteSize() const {
+    return _origContent->byteSize();
+}
+
+//
+// DepthDeconvolutionHWCWeightsContent
+//
+
+void depthDeconvolutionRelayoutHWC(
+        const fp16_t* src, int src_size,
+        fp16_t* dst, int dst_size,
+        int KX, int KY,
+        int channels) {
+    ie::parallel_for3d(channels, KY, KX, [=](int c, int ky, int kx) {
+        int iidx = c * KX * KY + ky * KX + kx;
+        IE_ASSERT(iidx < src_size);
+
+        int inv_kx = KX - kx - 1;
+        int inv_ky = KY - ky - 1;
+        int oidx = inv_ky * KX * channels + inv_kx * channels + c;
+        IE_ASSERT(oidx < dst_size);
+
+        dst[oidx] = src[iidx];
+    });
+}
+
+DepthDeconvolutionHWCWeightsContent::DepthDeconvolutionHWCWeightsContent(
+        const DataContent::Ptr& origContent,
+        int KX, int KY, int channels) :
+        _origContent(origContent),
+        _KX(KX), _KY(KY), _channels(channels) {
+}
+
+void DepthDeconvolutionHWCWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(DepthDeconvolutionHWCWeightsContent);
+    depthDeconvolutionRelayoutHWC(
+            _origContent->get<fp16_t>(), _origContent->byteSize() / sizeof(fp16_t),
+            static_cast<fp16_t*>(tempBuf), _origContent->byteSize() / sizeof(fp16_t),
+            _KX, _KY, _channels);
+}
+
+size_t DepthDeconvolutionHWCWeightsContent::byteSize() const {
+    return _origContent->byteSize();
+}
+
+//
+// DeconvolutionWeightsContent
+//
+
+void deconvolutionRelayout(
+        const fp16_t* src, int src_size,
+        fp16_t* dst, int dst_size,
+        int KX, int KY,
+        int IC, int OC) {
+    ie::parallel_for4d(OC, IC, KY, KX, [=](int oc, int ic, int ky, int kx) {
+        int iidx = ic * OC * KY * KX
+                   + oc * KY * KX
+                   + ky * KX
+                   + kx;
+        IE_ASSERT(iidx >= 0 && iidx < src_size);
+
+        int inv_kx = KX - kx - 1;
+        int inv_ky = KY - ky - 1;
+        int oidx = oc * IC * KY * KX
+                   + ic * KY * KX
+                   + inv_ky * KX
+                   + inv_kx;
+        IE_ASSERT(oidx >=  0 && oidx < dst_size);
+
+        dst[oidx] = src[iidx];
+    });
+}
+
+DeconvolutionWeightsContent::DeconvolutionWeightsContent(
+        const DataContent::Ptr& origContent,
+        DataDesc desc,
+        int KX, int KY,
+        int IC, int OC) :
+        _origContent(origContent), _desc(desc),
+        _intermBuf(_desc.totalDimSize()),
+        _KX(KX), _KY(KY),
+        _IC(IC), _OC(OC) {
+}
+
+size_t DeconvolutionWeightsContent::byteSize() const {
+    return _desc.totalDimSize() * sizeof(fp16_t);
+}
+
+void DeconvolutionWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(DeconvolutionWeightsContent);
+
+    auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    deconvolutionRelayout(
+            _origContent->get<fp16_t>(), _desc.totalDimSize(),
+            _intermBuf.data(), _desc.totalDimSize(),
+            _KX, _KY,
+            _IC, _OC);
+
+    kchw_to_hwkc(_intermBuf.data(), dstPtr, _desc);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/default_sw_weights_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/default_sw_weights_content.cpp

new file mode 100644 (file)

index 0000000..4e43c32
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/default_sw_weights_content.cpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/default_sw_weights_content.hpp>
+
+#include <vpu/utils/profiling.hpp>
+#include <vpu/middleend/sw/utility.hpp>
+
+namespace vpu {
+
+DefaultSwWeightsContent::DefaultSwWeightsContent(const DataContent::Ptr& origContent, const DataDesc& desc) :
+        _origContent(origContent), _desc(desc) {
+}
+
+size_t DefaultSwWeightsContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+void DefaultSwWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(DefaultSwWeightsContent);
+
+    IE_ASSERT(_desc.type() == DataType::FP16);
+
+    kchw_to_hwck(_origContent->get<fp16_t>(), static_cast<fp16_t*>(tempBuf), _desc);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_const_data_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_const_data_content.cpp

new file mode 100644 (file)

index 0000000..b3c393a
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_const_data_content.cpp
@@ -0,0 +1,101 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/hw_const_data_content.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+
+namespace vpu {
+
+HwConstData::HwConstData(
+        const DataContent::Ptr& origContent,
+        const DataDesc& origDesc,
+        const DataDesc& resDesc,
+        const std::map<Dim, Slice> dimSlices) :
+        _origContent(origContent),
+        _origDesc(origDesc),
+        _resDesc(resDesc),
+        _dimSlices(dimSlices) {}
+
+size_t HwConstData::byteSize() const {
+    return checked_cast<size_t>(_resDesc.totalDimSize()) *
+           checked_cast<size_t>(_resDesc.elemSize());
+}
+
+void HwConstData::fillTempBuf(void* outBuf) const {
+    VPU_PROFILE(HwConstData);
+
+    VPU_THROW_UNLESS(
+        _resDesc.type() == DataType::FP16,
+        "Constant data has {} data type while only {} is supported",
+        _resDesc.type(), DataType::FP16);
+
+    const auto srcData = _origContent->get<fp16_t>();
+    auto dstData = static_cast<fp16_t*>(outBuf);
+
+    VPU_THROW_UNLESS(srcData != nullptr,
+        "Source buffer for constant data has null address");
+
+    auto getDimSlice = [this](const Dim dim) {
+        auto it = _dimSlices.find(dim);
+        if (it != _dimSlices.end()) {
+            return it->second;
+        }
+
+        const int startInd = 0;
+        const size_t size = _origDesc.dim(dim);
+
+        return Slice(startInd, size);
+    };
+
+    if (_origDesc.numDims() == 4) {
+        Slice slice = getDimSlice(Dim::N);
+
+        int startOC = slice.start;
+        size_t numOC = slice.size;
+
+        const auto IC = _origDesc.dim(Dim::C);
+        const auto K = _origDesc.dim(Dim::H);
+        const auto V = _origDesc.dim(Dim::W);
+
+        const auto kernelStride     = V;
+        const auto inChannelStride  = K * kernelStride;
+        const auto outerStride      = IC * inChannelStride;
+
+        ie::parallel_for(numOC, [=](int oc) {
+            const auto ocSlice = oc;
+            oc += startOC;
+
+            const auto ocInner = oc % V;
+            const auto ocOuter = oc / V;
+            const auto ocSliceInner = ocSlice % V;
+            const auto ocSliceOuter = ocSlice / V;
+
+            const auto ocSrc = ocInner + ocOuter * outerStride;
+            const auto ocDst = ocSliceInner + ocSliceOuter * outerStride;
+
+            for (int ic = 0; ic < IC; ++ic)
+                for (int k = 0; k < K; ++k) {
+                    const auto srcInd = ocSrc +
+                                        k * kernelStride +
+                                        ic * inChannelStride;
+                    const auto dstInd = ocDst +
+                                        k * kernelStride +
+                                        ic * inChannelStride;
+
+                    dstData[dstInd] = srcData[srcInd];
+                }
+        });
+    } else if (_origDesc.numDims() == 1) {
+        Slice slice = getDimSlice(Dim::C);
+
+        std::copy(srcData + slice.start, srcData + slice.start + slice.size, dstData);
+    } else {
+        THROW_IE_EXCEPTION << "Invalid number of dimensions " << _origDesc.numDims();
+    }
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_weights_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_weights_content.cpp

new file mode 100644 (file)

index 0000000..1d5e33c
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/hw_weights_content.cpp
@@ -0,0 +1,104 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/hw_weights_content.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+
+namespace vpu {
+
+HwWeightsContent::HwWeightsContent(const DataContent::Ptr& origContent,
+                                   const DataDesc& origWeightsDesc,
+                                   const DataDesc& resDesc,
+                                   int numInputChannels,
+                                   int channelStartIndex) :
+        _origContent(origContent),
+        _origDesc(origWeightsDesc),
+        _resDesc(resDesc),
+        _numInputChannels(numInputChannels),
+        _channelStartIndex(channelStartIndex) {
+}
+
+size_t HwWeightsContent::byteSize() const {
+    return checked_cast<size_t>(_resDesc.totalDimSize()) *
+           checked_cast<size_t>(_resDesc.elemSize());
+}
+
+void HwWeightsContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(HwWeightsContent);
+
+    IE_ASSERT(_resDesc.type() == DataType::FP16);
+
+    const auto KX = _origDesc.dim(Dim::W);
+    const auto KY = _origDesc.dim(Dim::H);
+    const auto IC = _origDesc.dim(Dim::C);
+    const auto OC = _origDesc.dim(Dim::N);
+    const auto origTotalSize = _origDesc.totalDimSize();
+
+    const auto HW_OC_inner = _resDesc.dim(Dim::W);
+    const auto HW_OC_outer = _resDesc.dim(Dim::N);
+    IE_ASSERT(HW_OC_outer * HW_OC_inner >= OC);
+
+    const auto HW_K = _resDesc.dim(Dim::H);
+    IE_ASSERT(HW_K == KX * KY);
+
+    IE_ASSERT(_channelStartIndex < IC);
+    const auto HW_IC = _resDesc.dim(Dim::C);
+    const auto HW_IC_real = std::min(_numInputChannels, IC - _channelStartIndex);
+
+    const auto srcData = _origContent->get<fp16_t>();
+    IE_ASSERT(srcData != nullptr);
+
+    auto dstData = static_cast<fp16_t*>(tempBuf);
+
+    IE_ASSERT((_channelStartIndex + HW_IC_real) * HW_K + (OC - 1) * HW_K * IC - 1 < origTotalSize);
+    IE_ASSERT((OC - 1) % HW_OC_inner +
+              (HW_K - 1) * HW_OC_inner +
+              (HW_IC_real - 1) * HW_OC_inner * HW_K +
+              ((OC - 1) / 8) * HW_OC_inner * HW_K * HW_IC < _resDesc.totalDimSize());
+
+    if (KX == 1 && KY == 1) {
+        ie::parallel_for(OC, [=](int oc) {
+            const auto oc_inner = oc % HW_OC_inner;
+            const auto oc_outer = oc / HW_OC_inner;
+            for (int ic = 0; ic < HW_IC_real; ++ic) {
+                const auto srcInd =
+                        (_channelStartIndex + ic) +
+                        oc * IC;
+                const auto dstInd =
+                        oc_inner +
+                        ic * HW_OC_inner * HW_K +
+                        oc_outer * HW_OC_inner * HW_K * HW_IC;
+
+                dstData[dstInd] = srcData[srcInd];
+            }
+        });
+    } else {
+        ie::parallel_for(OC, [=](int oc) {
+            const auto oc_inner = oc % HW_OC_inner;
+            const auto oc_outer = oc / HW_OC_inner;
+            for (int ic = 0; ic < HW_IC_real; ++ic) {
+                for (int ky = 0; ky < KY; ++ky) {
+                    for (int kx = 0; kx < KX; ++kx) {
+                        const auto srcInd =
+                                (kx + ky * KX) +
+                                (_channelStartIndex + ic) * HW_K +
+                                oc * HW_K * IC;
+                        const auto dstInd =
+                                oc_inner +
+                                (ky * KX + kx) * HW_OC_inner +
+                                ic * HW_OC_inner * HW_K +
+                                oc_outer * HW_OC_inner * HW_K * HW_IC;
+
+                        dstData[dstInd] = srcData[srcInd];
+                    }
+                }
+            }
+        });
+    }
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/ie_blob_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/ie_blob_content.cpp

new file mode 100644 (file)

index 0000000..4f61b98
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/ie_blob_content.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <vpu/utils/ie_helpers.hpp>
+
+namespace vpu {
+
+IeBlobContent::IeBlobContent(const ie::Blob::CPtr& blob, DataType resultDataType) : _blob(blob), _resultDataType(resultDataType) {
+    VPU_THROW_UNLESS(_resultDataType == DataType::FP16 || _resultDataType == DataType::S32,
+                     "IeBlobContent creation error: {} result type is unsupported, only {} and {} are supported",
+                     _resultDataType, DataType::FP16, DataType::S32);
+}
+
+size_t IeBlobContent::byteSize() const {
+    // Result can be converted into type with another size
+    const auto elementSize = _resultDataType == DataType::FP16 ? sizeof(fp16_t) : sizeof(int32_t);
+    return elementSize * _blob->size();
+}
+
+const void* IeBlobContent::getRaw() const {
+    if (_resultDataType == DataType::FP16) {
+        if (_blobFp16 == nullptr) {
+            _blobFp16 = _blob->getTensorDesc().getPrecision() == ie::Precision::FP16 ?
+                        _blob : convertBlobFP32toFP16(_blob);
+        }
+        return _blobFp16->cbuffer();
+    } else { // S32
+        return _blob->cbuffer();
+    }
+}
+
+DataContent::Ptr ieBlobContent(const ie::Blob::CPtr& blob, DataType resultDataType) {
+    return std::make_shared<IeBlobContent>(blob, resultDataType);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/kernel_binary_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/kernel_binary_content.cpp

new file mode 100644 (file)

index 0000000..be35e4f
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/kernel_binary_content.cpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/kernel_binary_content.hpp>
+
+#include <string>
+
+namespace vpu {
+
+KernelBinaryContent::KernelBinaryContent(const std::string& blob) : _blob(blob) {
+    IE_ASSERT(!_blob.empty());
+}
+
+size_t KernelBinaryContent::byteSize() const {
+    return _blob.size();
+}
+
+const void* KernelBinaryContent::getRaw() const {
+    return _blob.data();
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mean_contents.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mean_contents.cpp

new file mode 100644 (file)

index 0000000..0d09472
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mean_contents.cpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/mean_contents.hpp>
+
+#include <vpu/utils/profiling.hpp>
+#include <vpu/middleend/sw/utility.hpp>
+
+#include <ie_parallel.hpp>
+#include <precision_utils.h>
+
+namespace vpu {
+
+//
+// MeanImageContent
+//
+
+MeanImageContent::MeanImageContent(const ie::PreProcessInfo& info, const DataDesc& desc) : _info(info), _desc(desc) {}
+
+size_t MeanImageContent::byteSize() const {
+    size_t countElem = checked_cast<size_t>(_desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C));
+    if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
+        countElem *= 2;
+    }
+
+    return countElem * sizeof(fp16_t);
+}
+
+void MeanImageContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(MeanImageContent);
+
+    const size_t numOfChannel = _info.getNumberOfChannels();
+
+    const size_t imagePixels = checked_cast<size_t>(_desc.dim(Dim::W) * _desc.dim(Dim::H));
+    const size_t countElem = checked_cast<size_t>(_desc.dim(Dim::W) * _desc.dim(Dim::H) * _desc.dim(Dim::C));
+
+    const auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    auto dstPtr2 = dstPtr;
+    if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
+        dstPtr2 += countElem;
+    }
+
+    ie::parallel_for(numOfChannel, [=](size_t i) {
+        const auto meanDataBlob = _info[i]->meanData;
+
+        ie::PrecisionUtils::f32tof16Arrays(
+                dstPtr2 + i * imagePixels,
+                meanDataBlob->buffer().as<const float*>(),
+                imagePixels,
+                -1.0f);
+    });
+
+    if (_desc.dimsOrder() == DimsOrder::NHWC || _desc.dimsOrder() == DimsOrder::HWC) {
+        kchw_to_hwck(dstPtr2, dstPtr, _desc);
+    }
+}
+
+//
+// MeanValueContent
+//
+
+MeanValueContent::MeanValueContent(const ie::PreProcessInfo& info) : _info(info) {}
+
+size_t MeanValueContent::byteSize() const {
+    return _info.getNumberOfChannels() * sizeof(fp16_t);
+}
+
+void MeanValueContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(MeanValueContent);
+
+    const auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    ie::parallel_for(_info.getNumberOfChannels(), [dstPtr, this](size_t i) {
+        dstPtr[i] = ie::PrecisionUtils::f32tof16(-_info[i]->meanValue);
+    });
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/merge_fc_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/merge_fc_content.cpp

new file mode 100644 (file)

index 0000000..808aee6
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/merge_fc_content.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/merge_fc_content.hpp>
+
+#include <ie_parallel.hpp>
+
+#include <numeric>
+
+namespace vpu {
+
+MergeFullyConnectedContentsByChannels::MergeFullyConnectedContentsByChannels(const std::vector<DataContent::CPtr> contents,
+                                                                             const std::vector<DataDesc> inDescs,
+                                                                             const DataDesc& resDesc) :
+        _contents(contents), _inDescs(inDescs), _resDesc(resDesc) {}
+
+size_t MergeFullyConnectedContentsByChannels::byteSize() const {
+    return checked_cast<size_t>(_resDesc.totalDimSize()) *
+           checked_cast<size_t>(_resDesc.elemSize());
+}
+
+void MergeFullyConnectedContentsByChannels::fillTempBuf(void* temp) const {
+    IE_ASSERT(!_contents.empty());
+    // vpu::DataNode has content and vpu::DataDesc with dimensions' vector
+    // content has dimensions's vector as well
+    // they can be different so we extract channels number from contents
+    const auto dstC = std::accumulate(_inDescs.begin(), _inDescs.end(), 0, [](int reduction, const DataDesc& desc) {
+        return reduction + desc.dims()[Dim::C];});
+
+    for (std::size_t i = 0, dstChannelsOffset = 0; i < _inDescs.size(); ++i) {
+        const auto& content = _contents[i];
+        const auto& srcDesc = _inDescs[i];
+
+        const auto& srcDims = srcDesc.dims();
+        const auto& elemSize = srcDesc.elemSize();
+
+        const auto N = srcDims.get(Dim::N, 1);
+        const auto H = srcDims.get(Dim::H, 1);
+        const auto W = srcDims.get(Dim::W, 1) * elemSize;
+
+        const auto& srcC = srcDims[Dim::C];
+
+        const auto src = content->get<uint8_t>();
+        auto dst = static_cast<uint8_t*>(temp);
+
+        InferenceEngine::parallel_for4d(N, srcC, H, W, [dstChannelsOffset, N, H, W, src, dst, srcC, dstC](int n, int c, int h, int w) {
+            const auto& srcc = c;
+            const auto& dstc = dstChannelsOffset + c;
+
+            const auto& srcOffset = n * H * W * srcC + srcc * H * W + h * W + w;
+            const auto& dstOffset = n * H * W * dstC + dstc * H * W + h * W + w;
+            dst[dstOffset] = src[srcOffset];
+        });
+
+        dstChannelsOffset += srcC;
+    }
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mtcnn_blob_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mtcnn_blob_content.cpp

new file mode 100644 (file)

index 0000000..66558b3
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/mtcnn_blob_content.cpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/mtcnn_blob_content.hpp>
+
+namespace vpu {
+
+MTCNNBlobContent::MTCNNBlobContent(std::vector<char> blob) : _blob(std::move(blob)) {
+    IE_ASSERT(!_blob.empty());
+}
+
+size_t MTCNNBlobContent::byteSize() const {
+    return _blob.size();
+}
+
+const void* MTCNNBlobContent::getRaw() const {
+    return _blob.data();
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/prelu_blob_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/prelu_blob_content.cpp

new file mode 100644 (file)

index 0000000..fda51c7
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/prelu_blob_content.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/prelu_blob_content.hpp>
+
+#include <vpu/utils/ie_helpers.hpp>
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+
+namespace vpu {
+
+PReLUBlobContent::PReLUBlobContent(const ie::Blob::CPtr& blob, const DataDesc& desc, int repeat) :
+        _blob(blob), _desc(desc), _repeat(repeat) {
+    VPU_INTERNAL_CHECK(repeat >= 1,
+        "PReLUBlobContent only supports repeat value more than 1, actual is {}", repeat);
+}
+
+size_t PReLUBlobContent::byteSize() const {
+    return checked_cast<size_t>(_desc.totalDimSize()) *
+           checked_cast<size_t>(_desc.elemSize());
+}
+
+const void* PReLUBlobContent::getRaw() const {
+    if (_blobFp16 == nullptr) {
+        _blobFp16 = _blob->getTensorDesc().getPrecision() == ie::Precision::FP16 ?
+                    _blob : convertBlobFP32toFP16(_blob);
+    }
+
+    if (_repeat == 1) {
+        return _blobFp16->cbuffer();
+    }
+
+    if (_tempFp16.empty()) {
+        VPU_PROFILE(PReLUBlobContent);
+
+        IE_ASSERT(_desc.totalDimSize() % _repeat == 0);
+
+        auto origNumElems = _desc.totalDimSize() / _repeat;
+        IE_ASSERT(checked_cast<size_t>(origNumElems) <= _blobFp16->size());
+
+        auto origPtr = _blobFp16->cbuffer().as<const fp16_t*>();
+        IE_ASSERT(origPtr != nullptr);
+
+        _tempFp16.resize(checked_cast<size_t>(_desc.totalDimSize()));
+
+        ie::parallel_for(_repeat, [this, origPtr, origNumElems](int i) {
+            std::copy_n(origPtr, origNumElems, _tempFp16.data() + i * origNumElems);
+        });
+    }
+
+    return _tempFp16.data();
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/priorbox_contents.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/priorbox_contents.cpp

new file mode 100644 (file)

index 0000000..94f8162
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/priorbox_contents.cpp
@@ -0,0 +1,381 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/priorbox_contents.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <precision_utils.h>
+#include <ie_layers.h>
+#include <ie_parallel.hpp>
+
+namespace vpu {
+
+//
+// PriorBoxContent
+//
+
+PriorBoxContent::PriorBoxContent(
+        const DataDesc& inDesc0,
+        const DataDesc& inDesc1,
+        const DataDesc& outDesc,
+        const ie::CNNLayerPtr &layer) :
+        _inDesc0(inDesc0), _inDesc1(inDesc1), _outDesc(outDesc),
+        _layer(layer) {
+    IE_ASSERT(layer != nullptr);
+}
+
+size_t PriorBoxContent::byteSize() const {
+    return checked_cast<size_t>(_outDesc.totalDimSize()) *
+           checked_cast<size_t>(_outDesc.elemSize());
+}
+
+void PriorBoxContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(PriorBoxContent);
+
+    auto tempPtr = static_cast<fp16_t*>(tempBuf);
+
+    auto _min_sizes = _layer->GetParamAsFloats("min_size", {});
+    auto _max_sizes = _layer->GetParamAsFloats("max_size", {});
+    auto aspect_ratios = _layer->GetParamAsFloats("aspect_ratio");
+    auto _flip = static_cast<bool>(_layer->GetParamAsInt("flip"));
+    auto _clip = static_cast<bool>(_layer->GetParamAsInt("clip"));
+    auto _variance = _layer->GetParamAsFloats("variance");
+    auto _img_h = _layer->GetParamAsInt("img_h", 0);
+    auto _img_w = _layer->GetParamAsInt("img_w", 0);
+    auto _step = _layer->GetParamAsFloat("step", 0);
+    auto _offset = _layer->GetParamAsFloat("offset", 0);
+    auto _scale_all_sizes = static_cast<bool>(_layer->GetParamAsInt("scale_all_sizes", 1));
+
+    auto _fixed_sizes = _layer->GetParamAsFloats("fixed_size", {});
+    auto _fixed_ratios = _layer->GetParamAsFloats("fixed_ratio", {});
+    auto _densitys = _layer->GetParamAsFloats("density", {});
+
+    SmallVector<float> _aspect_ratios;
+    _aspect_ratios.reserve(aspect_ratios.size() + 1);
+
+    _aspect_ratios.push_back(1.0f);
+    for (const auto& aspect_ratio : aspect_ratios) {
+        bool exist = false;
+
+        for (const auto& _aspect_ratio : _aspect_ratios) {
+            if (fabsf(aspect_ratio - _aspect_ratio) < 1e-6) {
+                exist = true;
+                break;
+            }
+        }
+        if (!exist) {
+            _aspect_ratios.push_back(aspect_ratio);
+            if (_flip) {
+                if (isFloatEqual(aspect_ratio, 0.f)) {
+                    THROW_IE_EXCEPTION << "[VPU] PriorBox has 0.0 aspect ratio param in flip mode, "
+                                       << " possible division by zero";
+                }
+                _aspect_ratios.push_back(1.0f / aspect_ratio);
+            }
+        }
+    }
+
+    int _num_priors;
+    if (_scale_all_sizes) {
+        _num_priors = static_cast<int>(_aspect_ratios.size() * _min_sizes.size());
+    } else {
+        _num_priors = static_cast<int>(_aspect_ratios.size() + _min_sizes.size() - 1);
+    }
+
+    if (!_fixed_sizes.empty()) {
+        _num_priors = static_cast<int>(_aspect_ratios.size() * _fixed_sizes.size());
+    }
+
+    if (!_densitys.empty()) {
+        for (const auto& _density : _densitys) {
+            if (!_fixed_ratios.empty()) {
+                _num_priors += _fixed_ratios.size() * (static_cast<int>(pow(_density, 2)) - 1);
+            } else {
+                _num_priors += _aspect_ratios.size() * (static_cast<int>(pow(_density, 2)) - 1);
+            }
+        }
+    }
+
+    _num_priors += _max_sizes.size();
+
+    auto W  = _inDesc0.dim(Dim::W);
+    auto H  = _inDesc0.dim(Dim::H);
+    auto IW = _img_w == 0 ? _inDesc1.dim(Dim::W) : _img_w;
+    auto IH = _img_h == 0 ? _inDesc1.dim(Dim::H) : _img_h;
+    auto IWI = 1.0f / static_cast<float>(IW);
+    auto IHI = 1.0f / static_cast<float>(IH);
+
+    auto OW = (_outDesc.numDims() >= 4) ? _outDesc.dim(Dim::N) : 1;
+    auto OH = _outDesc.dim(Dim::W);
+
+    float step_x = 0.0f;
+    float step_y = 0.0f;
+
+    if (_step == 0) {
+        step_x = static_cast<float>(IW) / W;
+        step_y = static_cast<float>(IH) / H;
+    } else {
+        step_x = _step;
+        step_y = _step;
+    }
+
+    auto dst_data = tempPtr;
+
+    int dim = H * W * _num_priors * 4;
+    float center_x = 0.0f;
+    float center_y = 0.0f;
+
+    float box_width = 0.0f;
+    float box_height = 0.0f;
+
+    if (_outDesc.dim(Dim::W) != dim || _outDesc.dim(Dim::H) != 2) {
+        THROW_IE_EXCEPTION << "[VPU] PriorBox output have invalid dimension, exptected " << dim << "x2"
+                           << ", got " << _outDesc.dim(Dim::W) << "x" << _outDesc.dim(Dim::H)
+                           << ", layer name is: " << _layer->name;
+    }
+
+    auto max_fp16 = [](const float value, const float min) {
+        return ie::PrecisionUtils::f32tof16(value > min ? value : min);
+    };
+
+    auto min_fp16 = [](const float value, const float max) {
+        return ie::PrecisionUtils::f32tof16(value < max ? value : max);
+    };
+
+    size_t idx = 0;
+    for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W;  ++w) {
+            if (_step == 0) {
+                center_x = (static_cast<float>(w) + 0.5f) * step_x;
+                center_y = (static_cast<float>(h) + 0.5f) * step_y;
+            } else {
+                center_x = (_offset + static_cast<float>(w)) * _step;
+                center_y = (_offset + static_cast<float>(h)) * _step;
+            }
+
+            for (size_t s = 0; s < _fixed_sizes.size(); ++s) {
+                auto fixed_size_ = static_cast<size_t>(_fixed_sizes[s]);
+                box_width = box_height = fixed_size_ * 0.5f;
+
+                int density_ = 0;
+                int shift = 0;
+                if (s < _densitys.size()) {
+                    density_ = static_cast<size_t>(_densitys[s]);
+                    shift = static_cast<int>(_fixed_sizes[s] / density_);
+                }
+
+                if (!_fixed_ratios.empty()) {
+                    for (const auto& fr : _fixed_ratios) {
+                        const auto box_width_ratio = _fixed_sizes[s] * 0.5f * std::sqrt(fr);
+                        const auto box_height_ratio = _fixed_sizes[s] * 0.5f / std::sqrt(fr);
+
+                        for (size_t r = 0; r < density_; ++r) {
+                            for (size_t c = 0; c < density_; ++c) {
+                                const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
+                                const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
+
+                                dst_data[idx++] = max_fp16((center_x_temp - box_width_ratio) * IWI, 0.f);
+                                dst_data[idx++] = max_fp16((center_y_temp - box_height_ratio) * IHI, 0.f);
+                                dst_data[idx++] = min_fp16((center_x_temp + box_width_ratio) * IWI, 1.f);
+                                dst_data[idx++] = min_fp16((center_y_temp + box_height_ratio) * IHI, 1.f);
+                            }
+                        }
+                    }
+                } else {
+                    if (!_densitys.empty()) {
+                        for (int r = 0; r < density_; ++r) {
+                            for (int c = 0; c < density_; ++c) {
+                                const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
+                                const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
+
+                                dst_data[idx++] = max_fp16((center_x_temp - box_width) * IWI, 0.f);
+                                dst_data[idx++] = max_fp16((center_y_temp - box_height) * IHI, 0.f);
+                                dst_data[idx++] = min_fp16((center_x_temp + box_width) * IWI, 1.f);
+                                dst_data[idx++] = min_fp16((center_y_temp + box_height) * IHI, 1.f);
+                            }
+                        }
+                    }
+                    //  Rest of priors
+                    for (const auto& ar : _aspect_ratios) {
+                        if (fabs(ar - 1.) < 1e-6) {
+                            continue;
+                        }
+
+                        const auto box_width_ratio = _fixed_sizes[s] * 0.5f * std::sqrt(ar);
+                        const auto box_height_ratio = _fixed_sizes[s] * 0.5f / std::sqrt(ar);
+                        for (int r = 0; r < density_; ++r) {
+                            for (int c = 0; c < density_; ++c) {
+                                const auto center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift;
+                                const auto center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift;
+
+                                dst_data[idx++] = max_fp16((center_x_temp - box_width_ratio) * IWI, 0.f);
+                                dst_data[idx++] = max_fp16((center_y_temp - box_height_ratio) * IHI, 0.f);
+                                dst_data[idx++] = min_fp16((center_x_temp + box_width_ratio) * IWI, 1.f);
+                                dst_data[idx++] = min_fp16((center_y_temp + box_height_ratio) * IHI, 1.f);
+                            }
+                        }
+                    }
+                }
+            }
+
+            for (size_t msIdx = 0; msIdx < _min_sizes.size(); msIdx++) {
+                box_width = _min_sizes[msIdx];
+                box_height = _min_sizes[msIdx];
+
+                dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
+                dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
+                dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
+                dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
+
+                if (_max_sizes.size() > msIdx) {
+                    box_width = box_height = std::sqrt(_min_sizes[msIdx] * _max_sizes[msIdx]);
+
+                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
+                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
+                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
+                    dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
+                }
+
+                if (_scale_all_sizes || (!_scale_all_sizes && (msIdx == _min_sizes.size() - 1))) {
+                    size_t sIdx = _scale_all_sizes ? msIdx : 0;
+                    for (const auto& ar : _aspect_ratios) {
+                        if (std::fabs(ar - 1.0f) < 1e-6) {
+                            continue;
+                        }
+
+                        box_width = _min_sizes[sIdx] * std::sqrt(ar);
+                        box_height = _min_sizes[sIdx] / std::sqrt(ar);
+
+                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x - box_width / 2.0f) / IW);
+                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y - box_height / 2.0f) / IH);
+                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_x + box_width / 2.0f) / IW);
+                        dst_data[idx++] = ie::PrecisionUtils::f32tof16((center_y + box_height / 2.0f) / IH);
+                    }
+                }
+            }
+        }
+    }
+
+    if (_clip) {
+        for (int d = 0; d < dim; ++d) {
+            dst_data[d] = (std::min)((std::max)(dst_data[d], ie::PrecisionUtils::f32tof16(0.0f)), ie::PrecisionUtils::f32tof16(1.0f));
+        }
+    }
+
+    int channel_size = OH * OW;
+
+    dst_data += channel_size;
+
+    if (_variance.size() == 1) {
+        ie::parallel_for(channel_size, [&](int i) {
+            dst_data[i] = ie::PrecisionUtils::f32tof16(_variance[0]);
+        });
+    } else {
+        ie::parallel_for4d(H, W, _num_priors, 4, [&](int h, int w, int i, int j) {
+            dst_data[j + 4 * (i + _num_priors * (w + W * h))] = ie::PrecisionUtils::f32tof16(_variance[j]);
+        });
+    }
+}
+
+//
+// PriorBoxClusteredContent
+//
+
+PriorBoxClusteredContent::PriorBoxClusteredContent(
+        const DataDesc& inDesc0,
+        const DataDesc& inDesc1,
+        const DataDesc& outDesc,
+        const ie::CNNLayerPtr& layer) :
+        _inDesc0(inDesc0), _inDesc1(inDesc1), _outDesc(outDesc),
+        _layer(layer) {
+    IE_ASSERT(layer != nullptr);
+}
+
+size_t PriorBoxClusteredContent::byteSize() const {
+    return checked_cast<size_t>(_outDesc.totalDimSize()) *
+           checked_cast<size_t>(_outDesc.elemSize());
+}
+
+void PriorBoxClusteredContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(PriorBoxClusteredContent);
+
+    auto tempPtr = static_cast<fp16_t*>(tempBuf);
+
+    auto widths_ = _layer->GetParamAsFloats("width");
+    auto heights_ = _layer->GetParamAsFloats("height");
+    auto clip_ = _layer->GetParamAsInt("clip");
+    auto variance_ = _layer->GetParamAsFloats("variance");
+    auto img_h_ = _layer->GetParamAsInt("img_h", 0);
+    auto img_w_ = _layer->GetParamAsInt("img_w", 0);
+    auto step_ = _layer->GetParamAsFloat("step", 0);
+    auto step_h_ = _layer->GetParamAsFloat("step_h", 0);
+    auto step_w_ = _layer->GetParamAsFloat("step_w", 0);
+    auto offset_ = _layer->GetParamAsFloat("offset", 0);
+
+    auto num_priors_ = widths_.size();
+
+    if (variance_.empty()) {
+        variance_.push_back(0.1);
+    }
+
+    auto layer_width  = _inDesc0.dim(Dim::W);
+    auto layer_height = _inDesc0.dim(Dim::H);
+
+    auto img_width  = img_w_ == 0 ? _inDesc1.dim(Dim::W) : img_w_;
+    auto img_height = img_h_ == 0 ? _inDesc1.dim(Dim::H) : img_h_;
+
+    auto step_w = step_w_ == 0 ? step_ : step_w_;
+    auto step_h = step_h_ == 0 ? step_ : step_h_;
+    if (step_w == 0 || step_h == 0) {
+        step_w = static_cast<float>(img_width) / layer_width;
+        step_h = static_cast<float>(img_height) / layer_height;
+    }
+
+    auto expetected_output_dimx = layer_height * layer_width * num_priors_ * 4;
+    if (_outDesc.dim(Dim::W) != expetected_output_dimx || _outDesc.dim(Dim::H) != 2) {
+        THROW_IE_EXCEPTION << "PriorBoxClustered output has invalid dimension, exptected " << expetected_output_dimx << "x2"
+                           << ", got " << _outDesc.dim(Dim::W) << "x" << _outDesc.dim(Dim::H) << ", layer name is: " << _layer->name;
+    }
+
+    auto offset = _outDesc.dim(Dim::W);
+    auto var_size = variance_.size();
+
+    auto top_data_0 = tempPtr;
+    auto top_data_1 = top_data_0 + offset;
+
+    ie::parallel_for2d(layer_height, layer_width, [=](int h, int w) {
+        auto center_x = (w + offset_) * step_w;
+        auto center_y = (h + offset_) * step_h;
+
+        for (int s = 0; s < num_priors_; ++s) {
+            auto box_width  = widths_[s];
+            auto box_height = heights_[s];
+
+            auto xmin = (center_x - box_width  / 2.0f) / img_width;
+            auto ymin = (center_y - box_height / 2.0f) / img_height;
+            auto xmax = (center_x + box_width  / 2.0f) / img_width;
+            auto ymax = (center_y + box_height / 2.0f) / img_height;
+
+            if (clip_) {
+                xmin = std::min(std::max(xmin, 0.0f), 1.0f);
+                ymin = std::min(std::max(ymin, 0.0f), 1.0f);
+                xmax = std::min(std::max(xmax, 0.0f), 1.0f);
+                ymax = std::min(std::max(ymax, 0.0f), 1.0f);
+            }
+
+            top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 0] = ie::PrecisionUtils::f32tof16(xmin);
+            top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 1] = ie::PrecisionUtils::f32tof16(ymin);
+            top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 2] = ie::PrecisionUtils::f32tof16(xmax);
+            top_data_0[h * layer_width * num_priors_ * 4 + w * num_priors_ * 4 + s * 4 + 3] = ie::PrecisionUtils::f32tof16(ymax);
+
+            for (int j = 0; j < var_size; j++) {
+                auto index = h * layer_width * num_priors_ * var_size + w * num_priors_ * var_size + s * var_size + j;
+                top_data_1[index] = ie::PrecisionUtils::f32tof16(variance_[j]);
+            }
+        }
+    });
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/replicated_data_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/replicated_data_content.cpp

new file mode 100644 (file)

index 0000000..da01e72
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/replicated_data_content.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/replicated_data_content.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+#include <precision_utils.h>
+
+namespace vpu {
+
+ReplicatedContent::ReplicatedContent(float val, int count, const DataDesc& desc) :
+        _factor{val}, _count(count), _desc(desc) {}
+
+ReplicatedContent::ReplicatedContent(DataContent::Ptr origContent, int count, const DataDesc& desc) :
+        _origContent(origContent), _count(count), _desc(desc) {}
+
+size_t ReplicatedContent::byteSize() const {
+    if (!_origContent) {
+        return checked_cast<size_t>(_count) * sizeof(fp16_t);
+    } else {
+        IE_ASSERT(_desc.totalDimSize() % _count == 0);
+
+        return checked_cast<size_t>(_desc.totalDimSize()) * sizeof(fp16_t);
+    }
+}
+
+void ReplicatedContent::fillTempBuf(void* tempBuf) const {
+    VPU_PROFILE(ReplicatedContent);
+
+    auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    if (!_origContent) {
+        std::fill_n(dstPtr, _count, ie::PrecisionUtils::f32tof16(_factor));
+    } else {
+        IE_ASSERT(_desc.totalDimSize() % _count == 0);
+
+        auto origCount = _desc.totalDimSize() / _count;
+        auto origPtr = _origContent->get<fp16_t>();
+        IE_ASSERT(origPtr != nullptr);
+
+        ie::parallel_for(_count, [origPtr, origCount, dstPtr](int i) {
+            std::copy_n(origPtr, origCount, dstPtr + i * origCount);
+        });
+    }
+}
+
+DataContent::Ptr replicateContent(float val, int count, const DataDesc& desc) {
+    return std::make_shared<ReplicatedContent>(val, count, desc);
+}
+
+DataContent::Ptr replicateContent(const DataContent::Ptr& origContent, int count, const DataDesc& desc) {
+    return std::make_shared<ReplicatedContent>(origContent, count, desc);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/data_contents/scaled_content.cpp b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/scaled_content.cpp

new file mode 100644 (file)

index 0000000..c96635a
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/model/data_contents/scaled_content.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/model/data_contents/scaled_content.hpp>
+
+#include <vpu/utils/profiling.hpp>
+
+#include <ie_parallel.hpp>
+#include <precision_utils.h>
+
+namespace vpu {
+
+ScaledContent::ScaledContent(const DataContent::Ptr& origContent, float scale) :
+        _origContent(origContent), _factor(scale) {
+}
+
+size_t ScaledContent::byteSize() const {
+    return _origContent->byteSize();
+}
+
+void ScaledContent::fillTempBuf(void *tempBuf) const {
+    VPU_PROFILE(ScaledContent);
+
+    const auto totalSize = _origContent->byteSize() / sizeof(fp16_t);
+
+    auto srcPtr = _origContent->get<fp16_t>();
+    IE_ASSERT(srcPtr != nullptr);
+
+    auto dstPtr = static_cast<fp16_t*>(tempBuf);
+
+    ie::parallel_for(totalSize, [this, srcPtr, dstPtr](int i) {
+        dstPtr[i] = ie::PrecisionUtils::f32tof16(ie::PrecisionUtils::f16tof32(srcPtr[i]) * _factor);
+    });
+}
+
+DataContent::Ptr scaleContent(const DataContent::Ptr& origContent, float scale) {
+    return std::make_shared<ScaledContent>(origContent, scale);
+}
+
+} // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp

index 0371064..3645c59 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp
@@ -4,6 +4,14 @@
  
  #include <vpu/model/model.hpp>
  
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/auto_scope.hpp>
+#include <vpu/utils/profiling.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
+#include <details/caseless.hpp>
+#include "blob_factory.hpp"
+
  #include <cctype>
  #include <memory>
  #include <string>
@@ -11,14 +19,6 @@
  #include <exception>
  #include <algorithm>
  
-#include <details/caseless.hpp>
-
-#include <vpu/compile_env.hpp>
-#include <vpu/utils/auto_scope.hpp>
-#include <vpu/utils/profiling.hpp>
-
-#include "blob_factory.hpp"
-
  namespace vpu {
  
  //
@@ -96,6 +96,11 @@ Data ModelObj::addConstData(
          const DataContent::Ptr& content) {
      IE_ASSERT(content != nullptr);
  
+    VPU_THROW_UNLESS(desc.totalDimSize() * desc.elemSize() == content->byteSize(),
+        "duplicateData error: while duplicating {} Const data got different "
+        "newDesc and content byte sizes ({} and {} respectively)",
+        name, desc.totalDimSize() * desc.elemSize(), content->byteSize());
+
      std::shared_ptr<DataNode> data(new DataNode);
  
      data->_name = name;
@@ -104,7 +109,6 @@ Data ModelObj::addConstData(
      data->_model = this;
  
      data->_content = content;
-    content->_desc = desc;
  
      data->_ptrPosInModel = _dataPtrList.emplace(_dataPtrList.end(), data);
      _dataList.push_back(data);
@@ -120,7 +124,7 @@ Data ModelObj::addConstData(const std::string& name, const DataDesc& descriptor,
      if (generator) {
          generator(ieBlob);
      }
-    return addConstData(name, descriptor, ieBlobContent(ieBlob));
+    return addConstData(name, descriptor, ieBlobContent(ieBlob, descriptor.type()));
  }
  
  Data ModelObj::addNewData(
@@ -183,10 +187,15 @@ Data ModelObj::duplicateData(
      newData->_model = this;
  
      if (newDataUsage == DataUsage::Const) {
-        newData->_content = newContent != nullptr ? newContent : origData->content();
-        if (newContent != nullptr) {
-            newContent->_desc = newData->_desc;
-        }
+        const auto& content = newContent != nullptr ? newContent : origData->content();
+        const auto& desc = newDesc != DataDesc() ? newDesc : origData->desc();
+
+        VPU_THROW_UNLESS(desc.totalDimSize() * desc.elemSize() == content->byteSize(),
+            "duplicateData error: while duplicating {} Const data got different "
+            "desc and content byte sizes ({} and {} respectively)",
+            origData->name(), desc.totalDimSize() * desc.elemSize(), content->byteSize());
+
+        newData->_content = content;
      }
  
      newData->attrs().copyFrom(origData->attrs());
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp

index 4ed95a7..1efb458 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/batch_norm.cpp
@@ -4,76 +4,19 @@
  
  #include <vpu/frontend/frontend.hpp>
  
-#include <cmath>
-
-#include <vector>
-#include <memory>
-
-#include <precision_utils.h>
-#include <ie_parallel.hpp>
-
  #include <vpu/utils/ie_helpers.hpp>
  #include <vpu/utils/numeric.hpp>
  #include <vpu/utils/profiling.hpp>
+#include <vpu/model/data_contents/batch_norm_contents.hpp>
  
-namespace vpu {
-
-namespace {
-
-class BatchNormalizationWeightsContent final : public CalculatedDataContent {
-public:
-    BatchNormalizationWeightsContent(
-            const DataContent::Ptr& origContent,
-            float epsilon) :
-            CalculatedDataContent({origContent}), _epsilon(epsilon) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(BatchNormalizationWeightsContent);
-
-        auto srcPtr = baseContents[0]->get<fp16_t>();
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        ie::parallel_for(desc().totalDimSize(), [this, srcPtr, dstPtr](int i) {
-            float val = ie::PrecisionUtils::f16tof32(srcPtr[i]) + _epsilon;
-            val = 1.0f / std::sqrt(val);
-            dstPtr[i] = ie::PrecisionUtils::f32tof16(val);
-        });
-    }
-
-private:
-    float _epsilon;
-};
-
-class BatchNormalizationBiasesContent final : public CalculatedDataContent {
-public:
-    BatchNormalizationBiasesContent(
-            const DataContent::Ptr& origContent,
-            const DataContent::Ptr& weightsContent) :
-            CalculatedDataContent({origContent, weightsContent}) {
-    }
-
-protected:
-    void fillTempBuf(const SmallVector<DataContent::Ptr, 2>& baseContents, void* tempBuf) const override {
-        VPU_PROFILE(BatchNormalizationBiasesContent);
-
-        auto origPtr = baseContents[0]->get<fp16_t>();
-        auto weightsPtr = baseContents[1]->get<fp16_t>();
-
-        auto dstPtr = static_cast<fp16_t*>(tempBuf);
-
-        ie::parallel_for(desc().totalDimSize(), [origPtr, weightsPtr, dstPtr](int i) {
-            // TODO : need to be extracted from IE layer.
-            float beta = 0.0f;
+#include <precision_utils.h>
+#include <ie_parallel.hpp>
  
-            auto wVal = ie::PrecisionUtils::f16tof32(weightsPtr[i]);
-            dstPtr[i] = ie::PrecisionUtils::f32tof16(beta - wVal * ie::PrecisionUtils::f16tof32(origPtr[i]));
-        });
-    }
-};
+#include <cmath>
+#include <vector>
+#include <memory>
  
-}  // namespace
+namespace vpu {
  
  void FrontEnd::parseBatchNorm(const Model& model, const ie::CNNLayerPtr& _layer, const DataVector& inputs, const DataVector& outputs) const {
      IE_ASSERT(inputs.size() == 1);
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp

index 3b5db2c..55844e0 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/custom.cpp
@@ -4,6 +4,11 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/frontend/custom_layer.hpp>
+#include <vpu/utils/simple_math.hpp>
+#include <vpu/model/data_contents/kernel_binary_content.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <vector>
  #include <memory>
  #include <string>
@@ -13,31 +18,12 @@
  #include <algorithm>
  #include <tuple>
  
-#include <vpu/frontend/custom_layer.hpp>
-#include <vpu/utils/simple_math.hpp>
-
-
  namespace vpu {
  
  static void calcSizesFromParams(const DataDesc &desc, const SmallVector<std::string> &bufferSizeRules, SmallVector<int, 3> &sizes);
  
  namespace {
  
-class KernelBinaryContent final : public DataContent {
-public:
-    explicit KernelBinaryContent(const std::string& blob) : _blob(blob) {
-        IE_ASSERT(!_blob.empty());
-    }
-
-    const void* getRaw() const override {
-        IE_ASSERT(desc().totalDimSize() * desc().elemSize() == _blob.length());
-        return _blob.data();
-    }
-
-private:
-    std::string _blob;
-};
-
  class CustomStage final : public StageNode {
  public:
      using StageNode::StageNode;
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp

index 80d35c0..8d8f6d3 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp
@@ -195,10 +195,10 @@ private:
          auto input2 = inputEdge(2)->input();
          auto output = outputEdge(0)->output();
  
-        input0->serializeBuffer(serializer, output->desc().dimsOrder());
+        input0->serializeBuffer(serializer);
          output->serializeBuffer(serializer);
-        input1->serializeBuffer(serializer, output->desc().dimsOrder());
-        input2->serializeBuffer(serializer, output->desc().dimsOrder());
+        input1->serializeBuffer(serializer);
+        input2->serializeBuffer(serializer);
      }
  };
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/exp_generateproposals.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/exp_generateproposals.cpp

index a59540b..1c601ed 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/exp_generateproposals.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/exp_generateproposals.cpp
@@ -1,6 +1,5 @@
  // Copyright (C) 2019-2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
-//
  
  #include <vpu/frontend/frontend.hpp>
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/exp_priorgridgenerator.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/exp_priorgridgenerator.cpp

index 006e1a4..6cf6c1a 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/exp_priorgridgenerator.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/exp_priorgridgenerator.cpp
@@ -1,6 +1,5 @@
  // Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
-//
  
  #include <vpu/frontend/frontend.hpp>
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/exp_topkrois.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/exp_topkrois.cpp

new file mode 100644 (file)

index 0000000..e1ff644
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/exp_topkrois.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2019-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <memory>
+
+namespace vpu {
+
+namespace {
+
+class ExpTopKROIsStage final : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<ExpTopKROIsStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, StridesRequirement::compact());
+        }
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, StridesRequirement::compact());
+        }
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+             {{DataType::FP16}, {DataType::FP16}},
+             {{DataType::FP16}});
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        const auto& params = attrs().get<int32_t>("max_rois");
+
+        serializer.append(params);
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        input(0)->serializeBuffer(serializer);
+        input(1)->serializeBuffer(serializer);
+        output(0)->serializeBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseExpTopKROIs(
+        const Model& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) const {
+    VPU_THROW_UNLESS(inputs.size() == 2, "Layer %s must have 2 input tensors.", layer->name);
+    VPU_THROW_UNLESS(outputs.size() == 1, "Layer %s must have 1 output tensor.", layer->name);
+
+    int32_t max_rois = layer->GetParamAsInt("max_rois", 0);
+
+    auto inputRois  = inputs[0];
+    auto inputProbs = inputs[1];
+    auto outputRois = outputs[0];
+
+    VPU_THROW_UNLESS((inputRois->desc().dims().size() == 2) &&
+                     (inputRois->desc().dim(Dim::C) == 4),
+                     "Wrong shape for input 0 of layer %s, expected (N, 4), got: dims size = %lu, dim C = %d",
+                     layer->name, inputRois->desc().dims().size(), inputRois->desc().dim(Dim::C));
+
+    VPU_THROW_UNLESS(inputProbs->desc().dims().size() == 1,
+                     "Wrong shape for input 1 of layer %s, expected dim size = 1, got: %lu",
+                     layer->name, inputProbs->desc().dims().size());
+
+    VPU_THROW_UNLESS(inputProbs->desc().dim(Dim::C) == inputRois->desc().dim(Dim::N),
+                     "Layer %s: input0 dim N and input1 dim C must be equal, got: input0 (N = %d), input1 (C = %d)",
+                     layer->name, inputProbs->desc().dim(Dim::N), inputProbs->desc().dim(Dim::C));
+
+    VPU_THROW_UNLESS((outputRois->desc().dims().size() == 2) &&
+                     (outputRois->desc().dim(Dim::C) == 4),
+                     "Wrong shape for output 0 of layer %s, expected (N, 4), got: dims size = %lu, dim C = %d",
+                     layer->name, outputRois->desc().dims().size(), outputRois->desc().dim(Dim::C));
+
+    VPU_THROW_UNLESS(outputRois->desc().dim(Dim::N) == max_rois,
+                     "Wrong shape for output 0 of layer %s, expected dim N = %d, got: dim N = %d",
+                     layer->name, static_cast<int>(max_rois), outputRois->desc().dim(Dim::N));
+
+    auto stage = model->addNewStage<ExpTopKROIsStage>(
+        layer->name,
+        StageType::ExpTopKROIs,
+        layer,
+        inputs,
+        outputs);
+
+    stage->attrs().set("max_rois", max_rois);
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/gather.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/gather.cpp

index ff89891..901f9f8 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/gather.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/gather.cpp
@@ -63,7 +63,7 @@ protected:
      }
  
      StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
-        return StageSHAVEsRequirements::OnlyOne;
+        return StageSHAVEsRequirements::NotNeeded;
      }
  
      void initialCheckImpl() const override {
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/mtcnn.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/mtcnn.cpp

index 1e18a98..8562df3 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/mtcnn.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/mtcnn.cpp
@@ -4,6 +4,13 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/graph_transformer.hpp>
+#include <vpu/compile_env.hpp>
+#include <vpu/utils/file_system.hpp>
+#include <vpu/model/data_contents/mtcnn_blob_content.hpp>
+
+#include <cpp/ie_cnn_net_reader.h>
+
  #include <vector>
  #include <fstream>
  #include <string>
@@ -11,12 +18,6 @@
  #include <memory>
  #include <set>
  
-#include <cpp/ie_cnn_net_reader.h>
-
-#include <vpu/graph_transformer.hpp>
-#include <vpu/compile_env.hpp>
-#include <vpu/utils/file_system.hpp>
-
  namespace vpu {
  
  // Must be synchronized with MvTensor
@@ -94,21 +95,6 @@ private:
      }
  };
  
-class MTCNNBlobContent final : public DataContent {
-public:
-    explicit MTCNNBlobContent(std::vector<char>&& blob) : _blob(std::forward<std::vector<char>>(blob)) {
-        IE_ASSERT(!_blob.empty());
-    }
-
-    const void* getRaw() const override {
-        IE_ASSERT(desc().totalDimSize() * desc().elemSize() == _blob.size());
-        return _blob.data();
-    }
-
-private:
-    std::vector<char> _blob;
-};
-
  std::pair<int, int> getResolution(const std::string& str) {
      std::istringstream stream(str);
      std::string output;
@@ -264,7 +250,7 @@ void FrontEnd::parseMTCNN(const Model& model, const ie::CNNLayerPtr& layer, cons
      auto innerGraphsDesc = DataDesc({mergedBlob.size()});
      innerGraphsDesc.setType(DataType::U8);
  
-    auto innerGraphs = model->addConstData(layer->name + "@innerGraphs", innerGraphsDesc, std::make_shared<MTCNNBlobContent>(std::move(mergedBlob)));
+    auto innerGraphs = model->addConstData(layer->name + "@innerGraphs", innerGraphsDesc, std::make_shared<MTCNNBlobContent>(mergedBlob));
  
      auto stage = model->addNewStage<MTCNNStage>(layer->name, StageType::MTCNN, layer, {input, innerGraphs}, {output});
      stage->attrs().set("pyramid", pyramid);
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/mx_stage.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/mx_stage.cpp

index bd83d35..536093f 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/mx_stage.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/mx_stage.cpp
@@ -101,17 +101,17 @@ void MyriadXHwStage::finalCheckImpl() const {
      const auto input = inputEdge(0)->input();
      const auto output = outputEdge(0)->output();
  
-    IE_ASSERT(input->memoryOffset() % 16 == 0);
-    IE_ASSERT(output->memoryOffset() % 16 == 0);
+    IE_ASSERT(input->dataLocation().offset % 16 == 0);
+    IE_ASSERT(output->dataLocation().offset % 16 == 0);
  
      if (attrs().get<HwOpType>("hwOpType") != HwOpType::POOL) {
          const auto weights = inputEdge(1)->input();
          const auto biases = inputEdge(2)->input();
          const auto scales = inputEdge(3)->input();
  
-        IE_ASSERT(weights->memoryOffset() % 16 == 0);
-        IE_ASSERT(biases->memoryOffset() % 16 == 0);
-        IE_ASSERT(scales->memoryOffset() % 16 == 0);
+        IE_ASSERT(weights->dataLocation().offset % 16 == 0);
+        IE_ASSERT(biases->dataLocation().offset % 16 == 0);
+        IE_ASSERT(scales->dataLocation().offset % 16 == 0);
      }
  }
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/nonzero.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/nonzero.cpp

new file mode 100644 (file)

index 0000000..010337b
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/nonzero.cpp
@@ -0,0 +1,120 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+#include <precision_utils.h>
+#include <memory>
+#include <set>
+
+namespace vpu {
+
+namespace {
+
+class NonZero : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<NonZero>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        auto inputStrides = input(0)->requiredStrides();
+        auto outIndicesStrides = output(0)->requiredStrides();
+        auto outDimsStrides = output(1)->requiredStrides();
+
+        stridesInfo.setInput(inputEdge(0), inputStrides.add(0, DimStride::Compact));
+        stridesInfo.setOutput(outputEdge(0), outIndicesStrides.add(0, DimStride::Compact));
+        stridesInfo.setOutput(outputEdge(1), outDimsStrides.add(0, DimStride::Compact));
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+                                 {{DataType::FP16, DataType::U8, DataType::S32}},
+                                 {{DataType::S32}, {DataType::S32}});
+    }
+
+    void finalCheckImpl() const override {
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        VPU_INTERNAL_CHECK(numInputs() == 1,
+                           "Nonzero stage with name %s must have only 1 input, "
+                           "actually provided %d", name(), numInputs());
+        VPU_INTERNAL_CHECK(numOutputs() == 2,
+                           "Nonzero stage with name %s must have only 2 outputs, "
+                           "actually provided %d", name(), numOutputs());
+
+        input(0)->serializeBuffer(serializer);
+        output(0)->serializeBuffer(serializer);
+        output(1)->serializeBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseNonZero(
+        const Model& model,
+        const ie::CNNLayerPtr& layer,
+        const DataVector& inputs,
+        const DataVector& outputs) const {
+    VPU_THROW_UNLESS(inputs.size() == 1,
+                     "Nonzero layer with name %s must have only 1 input, actually provided %d",
+                     layer->name, inputs.size());
+    VPU_THROW_UNLESS(outputs.size() == 2,
+                     "Nonzero layer with name %s must have only 2 outputs, actually provided %d",
+                     layer->name, outputs.size());
+
+    const auto input = inputs[0];
+    const auto inputNumDims = input->desc().numDims();
+    const auto totalIndicesDimSize = input->desc().totalDimSize();
+
+    const auto outIndicesDesc = outputs[0]->desc();
+    const auto outIndicesPerm = outIndicesDesc.dimsOrder().toPermutation();
+    const auto minorIndicesDim = outIndicesDesc.dim(outIndicesPerm.at(0));
+    const auto majorIndicesDim = outIndicesDesc.dim(outIndicesPerm.at(1));
+    VPU_THROW_UNLESS(outIndicesDesc.numDims() == 2,
+                     "NonZero layer with name %s must have 2D output Indices tensor, "
+                     "actually provided %dD tensor",
+                     layer->name, outIndicesDesc.numDims());
+    VPU_THROW_UNLESS(minorIndicesDim >= totalIndicesDimSize,
+                     "NonZero layer with name %s must have output Indices tensor with minor dim "
+                     "size >= total amount of elements of input tensor, actually provided %d >= %d",
+                     layer->name, minorIndicesDim, totalIndicesDimSize);
+    VPU_THROW_UNLESS(majorIndicesDim == inputNumDims,
+                     "NonZero layer with name %s must have output Indices tensor with major dim "
+                     "size == number of dimensions of input tensor, actually provided %d == %d",
+                     layer->name, majorIndicesDim, inputNumDims);
+
+    const auto outDimsDesc = outputs[1]->desc();
+    const auto outDimsPerm = outDimsDesc.dimsOrder().toPermutation();
+    const auto minorDimsDim = outDimsDesc.dim(outDimsPerm.at(0));
+    VPU_THROW_UNLESS(outDimsDesc.numDims() == 1,
+                     "NonZero layer with name %s must have 1D output Dims tensor, "
+                     "actually provided %dD tensor",
+                     layer->name, outDimsDesc.numDims());
+    VPU_THROW_UNLESS(minorDimsDim >= 2,
+                     "NonZero layer with name %s must have output Dims tensor with minor dim "
+                     "size >= 2, actually provided %d",
+                     layer->name, minorDimsDim);
+
+    model->addNewStage<NonZero>(
+            layer->name,
+            StageType::NonZero,
+            layer,
+            inputs,
+            outputs);
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/normalize.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/normalize.cpp

index 2779a99..c4f1653 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/normalize.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/normalize.cpp
@@ -4,6 +4,8 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <vector>
  #include <map>
  #include <unordered_set>
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp

index c7e74e1..8a5d448 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/prelu.cpp
@@ -4,11 +4,16 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/stages/post_op_stage.hpp>
+#include <vpu/utils/ie_helpers.hpp>
+#include <vpu/utils/profiling.hpp>
+#include <vpu/model/data_contents/prelu_blob_content.hpp>
+
+#include <ie_parallel.hpp>
+
  #include <vector>
  #include <memory>
  
-#include <vpu/stages/post_op_stage.hpp>
-
  namespace vpu {
  
  namespace {
@@ -47,7 +52,8 @@ void FrontEnd::parsePReLU(const Model& model, const ie::CNNLayerPtr& layer, cons
      auto weights = model->addConstData(
          layer->name + "@weights",
          DataDesc({output->desc().dim(Dim::C)}),
-        ieBlobContent(weightsBlob, channelShared ? output->desc().dim(Dim::C) : 1));
+        std::make_shared<PReLUBlobContent>(weightsBlob, DataDesc({output->desc().dim(Dim::C)}),
+                                           channelShared ? output->desc().dim(Dim::C) : 1));
  
      model->addNewStage<PReluStage>(layer->name, StageType::PRelu, layer, {inputs[0], weights}, outputs);
  }
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp

index cc88e73..6d211d3 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/reduce.cpp
@@ -4,6 +4,8 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <algorithm>
  #include <memory>
  #include <set>
@@ -91,7 +93,7 @@ private:
              input1,
              "",
              DataDesc(),
-            ieBlobContent(newIndicesBlob));
+            ieBlobContent(newIndicesBlob, DataType::S32));
  
          model()->replaceStageInput(inputEdge(1), newList);
      }
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/rnn.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/rnn.cpp

index dba670d..893a4ec 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/rnn.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/rnn.cpp
@@ -4,13 +4,14 @@
  
  #include <vpu/frontend/frontend.hpp>
  
+#include <vpu/utils/numeric.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
+
  #include <vector>
  #include <string>
  #include <memory>
  #include <set>
  
-#include <vpu/utils/numeric.hpp>
-
  namespace vpu {
  
  namespace {
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/roi_align.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/roi_align.cpp

new file mode 100644 (file)

index 0000000..2241dfe
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/roi_align.cpp
@@ -0,0 +1,109 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <vector>
+#include <string>
+#include <unordered_set>
+#include <memory>
+#include <set>
+
+namespace vpu {
+
+VPU_DECLARE_ENUM(ROIAlignMode,
+    Average = 0,
+    Max = 1
+)
+
+static const std::string s_mode = "mode";
+static const std::string s_pooled_w = "pooled_w";
+static const std::string s_pooled_h = "pooled_h";
+static const std::string s_sampling_ratio = "sampling_ratio";
+static const std::string s_spatial_scale = "spatial_scale";
+
+namespace {
+
+class ROIAlignStage final : public StageNode {
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<ROIAlignStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        orderInfo.setInput(inputEdge(0), inputEdge(0)->input()->desc().dimsOrder().createMovedDim(Dim::C, 2));
+        orderInfo.setOutput(outputEdge(0), outputEdge(0)->output()->desc().dimsOrder().createMovedDim(Dim::C, 2));
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        for (const auto& inEdge : inputEdges()) {
+            stridesInfo.setInput(inEdge, StridesRequirement::compact());
+        }
+        for (const auto& outEdge : outputEdges()) {
+            stridesInfo.setOutput(outEdge, StridesRequirement::compact());
+        }
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& batchInfo) override {
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::S32}}, {{DataType::FP16}});
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+        const auto pooled_w = attrs().get<int>(s_pooled_w);
+        const auto pooled_h = attrs().get<int>(s_pooled_h);
+        const auto sampling_ratio = attrs().get<int>(s_sampling_ratio);
+        const auto spatial_scale = attrs().get<float>(s_spatial_scale);
+        const auto mode = attrs().get<ROIAlignMode>(s_mode);
+
+        serializer.append(static_cast<uint32_t>(pooled_w));
+        serializer.append(static_cast<uint32_t>(pooled_h));
+        serializer.append(static_cast<uint32_t>(sampling_ratio));
+        serializer.append(static_cast<float>(spatial_scale));
+        serializer.append(static_cast<ROIAlignMode>(mode));
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        for (int i = 0; i < numInputs(); i++) {
+            inputEdge(i)->input()->serializeBuffer(serializer);
+        }
+
+        outputEdge(0)->output()->serializeBuffer(serializer);
+    }
+};
+
+}  // namespace
+
+void FrontEnd::parseROIAlign(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const {
+    VPU_THROW_UNLESS(inputs.size() == 3,
+                    "ROIAlign stage with name {} has invalid number of inputs: expected 3, "
+                    "actually provided {}", layer->name, inputs.size());
+
+    VPU_THROW_UNLESS(outputs.size() == 1,
+                    "ROIAlign stage with name {} has invalid number of outputs: expected 1, "
+                    "actually provided {}", layer->name, outputs.size());
+
+    const auto stage = model->addNewStage<ROIAlignStage>(layer->name, StageType::ROIAlign, layer, inputs, outputs);
+    const auto mode = layer->GetParamAsString("mode", "");
+
+    if (mode == "avg") {
+        stage->attrs().set<ROIAlignMode>(s_mode, ROIAlignMode::Average);
+    } else if (mode == "max") {
+        stage->attrs().set<ROIAlignMode>(s_mode, ROIAlignMode::Max);
+    } else {
+        VPU_THROW_FORMAT("Layer with name {} supports only (avg, max) mode", layer->name);
+    }
+
+    stage->attrs().set<int>(s_pooled_w, layer->GetParamAsInt("pooled_w"));
+    stage->attrs().set<int>(s_pooled_h, layer->GetParamAsInt("pooled_h"));
+    stage->attrs().set<int>(s_sampling_ratio, layer->GetParamAsInt("sampling_ratio"));
+    stage->attrs().set<float>(s_spatial_scale, layer->GetParamAsFloat("spatial_scale"));
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/scatter_update.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/scatter_update.cpp

new file mode 100644 (file)

index 0000000..6e5ec13
--- /dev/null
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/scatter_update.cpp
@@ -0,0 +1,261 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vpu/frontend/frontend.hpp>
+
+#include <memory>
+#include <string>
+
+namespace vpu {
+
+using InferenceEngine::CNNLayerPtr;
+
+//----------------------------------------------------------------------
+
+namespace {
+
+class ScatterUpdateStage final : public StageNode {
+public:
+    using StageNode::StageNode;
+
+private:
+    StagePtr cloneImpl() const override {
+        return std::make_shared<ScatterUpdateStage>(*this);
+    }
+
+    void propagateDataOrderImpl(StageDataInfo<DimsOrder>& orderInfo) override {
+        const auto data = inputEdge(0)->input();
+        const auto indices = inputEdge(1)->input();
+        const auto updates = inputEdge(2)->input();
+        const auto axis = inputEdge(3)->input();
+        const auto output = outputEdge(0)->output();
+        orderInfo.setInput(inputEdge(0), DimsOrder::fromNumDims(data->desc().numDims()));
+        orderInfo.setInput(inputEdge(1), DimsOrder::fromNumDims(indices->desc().numDims()));
+        orderInfo.setInput(inputEdge(2), DimsOrder::fromNumDims(updates->desc().numDims()));
+        orderInfo.setInput(inputEdge(3), DimsOrder::fromNumDims(axis->desc().numDims()));
+        orderInfo.setOutput(outputEdge(0), DimsOrder::fromNumDims(output->desc().numDims()));
+    }
+
+    void getDataStridesRequirementsImpl(StageDataInfo<StridesRequirement>& stridesInfo) override {
+        stridesInfo.setInput(inputEdge(0), StridesRequirement::compact());    // `data`    tensor
+        stridesInfo.setInput(inputEdge(2), StridesRequirement::compact());    // `updates` tensor
+        stridesInfo.setOutput(outputEdge(0), StridesRequirement::compact());  // `output`  tensor
+    }
+
+    void finalizeDataLayoutImpl() override {
+    }
+
+    void getBatchSupportInfoImpl(StageDataInfo<BatchSupport>& /*batchInfo*/) override {
+    }
+
+    StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
+        return StageSHAVEsRequirements::NotNeeded;
+    }
+
+    void initialCheckImpl() const override {
+        assertInputsOutputsTypes(this,
+            // `data`        ,  `indices`     , `updates`       , `axis` tensor
+            {{DataType::FP16}, {DataType::S32}, {DataType::FP16}, {DataType::S32}},
+            {{DataType::FP16}});
+    }
+
+    void serializeDataImpl(BlobSerializer& serializer) const override {
+        auto data    = input(0);
+        auto indices = input(1);
+        auto updates = input(2);
+        auto axis    = input(3);
+        auto out = output(0);
+
+        data->serializeBuffer(serializer);
+        out->serializeBuffer(serializer);
+        indices->serializeBuffer(serializer);
+        updates->serializeBuffer(serializer);
+        axis->serializeBuffer(serializer);
+    }
+
+    void serializeParamsImpl(BlobSerializer& serializer) const override {
+    }
+};
+
+}  // namespace
+
+//----------------------------------------------------------------------
+
+static
+void checkTensorShapes(const vpu::Data& input,
+                       const vpu::Data& output,
+                       const vpu::Data& indices,
+                       const vpu::Data& updates,
+                       const vpu::Data& axis) {
+    const DataDesc& inputDesc = input->desc();
+    const DataDesc& outputDesc = output->desc();
+    const DataDesc& indicesDesc = indices->desc();
+    const DataDesc& updatesDesc = updates->desc();
+    const DataDesc& axisDesc = axis->desc();
+
+    const auto inputType = inputDesc.type();
+    const auto outputType = outputDesc.type();
+    const auto indicesType = indicesDesc.type();
+    const auto updatesType = updatesDesc.type();
+    const auto axisType = axisDesc.type();
+
+    VPU_THROW_UNLESS(inputType == DataType::FP16, "input type is invalid");
+    VPU_THROW_UNLESS(outputType == DataType::FP16, "output type is invalid");
+    VPU_THROW_UNLESS(indicesType == DataType::S32, "indices type is invalid");
+    VPU_THROW_UNLESS(updatesType == DataType::FP16, "updates type is invalid");
+    VPU_THROW_UNLESS(axisType == DataType::S32, "axis type is invalid");
+
+    const int inputNDims = inputDesc.numDims();
+    const int outputNDims = outputDesc.numDims();
+    const int indicesNDims = indicesDesc.numDims();
+    const int updatesNDims = updatesDesc.numDims();
+    const int axisNDims = axisDesc.numDims();
+
+    VPU_THROW_UNLESS(inputNDims > 0, "input tensor must not be 0-dimensional");
+    VPU_THROW_UNLESS(outputNDims > 0, "output tensor must not be 0-dimensional");
+    VPU_THROW_UNLESS(indicesNDims > 0, "indices tensor must not be 0-dimensional");
+    VPU_THROW_UNLESS(updatesNDims > 0, "updates tensor must not be 0-dimensional");
+    VPU_THROW_UNLESS(axisNDims > 0, "axis tensor must not be 0-dimensional");
+
+    VPU_THROW_UNLESS(inputNDims == outputNDims,
+                     "input and output have different shapes: inputNDims={}, outputNDims={}",
+                     inputNDims, outputNDims);
+
+    VPU_THROW_UNLESS(updatesNDims == indicesNDims + outputNDims - 1,
+                     "incompatible shapes: indicesNDims=%d, updatesNDims={}, outputNDims={}",
+                     indicesNDims, updatesNDims, outputNDims);
+
+    VPU_THROW_UNLESS(axisNDims == 1,
+                     "axis tensor must be 1-dimensional, but axisNDims={}",
+                     axisNDims);
+
+    const DimsOrder inputDimsOrder = inputDesc.dimsOrder();
+    const DimsOrder outputDimsOrder = outputDesc.dimsOrder();
+    const DimsOrder indicesDimsOrder = indicesDesc.dimsOrder();
+    const DimsOrder updatesDimsOrder = updatesDesc.dimsOrder();
+    const DimsOrder axisDimsOrder = axisDesc.dimsOrder();
+
+    VPU_THROW_UNLESS(inputDimsOrder == outputDimsOrder, "input/output must have same layout"
+                     ", but inputDimsOrder = \"{}\", and outputDimsOrder = \"{}\"",
+                     inputDimsOrder, outputDimsOrder);
+
+    // Check if tensor shapes fit each other, e.g.:
+    //    {N, C, H, W} could be shape of `input` and `output`
+    // {I, J, C, H, W} could be shape of `update` tensor
+    // {I, J}          could be shape of `indices`
+
+    const DimValues& inputDims = inputDesc.dims();
+    const DimValues& outputDims = outputDesc.dims();
+    const DimValues& indicesDims = indicesDesc.dims();
+    const DimValues& updatesDims = updatesDesc.dims();
+    const DimValues& axisDims = axisDesc.dims();
+
+    VPU_THROW_UNLESS(inputDims == outputDims, "input/output tensors must have same lengths"
+                     ", but inputDims = \"{}\", and outputDims = \"{}\"", inputDims, outputDims);
+
+    // Permutation is array of dims, from minor to major
+    const DimVector inputPerm = inputDimsOrder.toPermutation();
+    const DimVector indicesPerm = indicesDimsOrder.toPermutation();
+    const DimVector updatesPerm = updatesDimsOrder.toPermutation();
+
+    // Check if the updates fits the input, e.g.:
+    //    {N, C, H, W} could be shape of `input` and `output`
+    // {I, J, C, H, W} could be shape of `update` tensor
+    for (int i = 0; i < inputNDims - 1; i++) {
+        const Dim inputDim = inputPerm[i];
+        const Dim updatesDim = updatesPerm[i];
+        const int inputSize = inputDims[inputDim];
+        const int updatesSize = updatesDims[updatesDim];
+        VPU_THROW_UNLESS(inputSize == updatesSize,
+                         "updates size must fit input along corresponding axes, "
+                         "but for axis={}: input size={}, updates size={}",
+                         i, inputSize, updatesSize);
+    }
+
+    // Check if the updates fits the indices, e.g.:
+    // {I, J, C, H, W} could be shape of `update` tensor
+    // {I, J}          could be shape of `indices`
+    for (int i = inputNDims - 1; i < updatesNDims; i++) {
+        const int i0 = i - (inputNDims - 1);
+        const Dim indicesDim = indicesPerm[i0];
+        const Dim updatesDim = updatesPerm[i];
+        const int indicesSize = indicesDims[indicesDim];
+        const int updatesSize = updatesDims[updatesDim];
+        VPU_THROW_UNLESS(indicesSize == updatesSize,
+                         "updates size must fit indices along corresponding axes, "
+                         "but for axis={}: indices size={}, updates size={}",
+                         i, indicesSize, updatesSize);
+    }
+
+    // Note, that for a 1D tensor the layout is "C"
+    VPU_THROW_UNLESS(axisDimsOrder == DimsOrder::C,
+                     "axis must be 1D tensor, but its dims order is {}",
+                     axisDimsOrder);
+    VPU_THROW_UNLESS(axisDims[Dim::C] == 1,
+                     "axis tensor must be 1D array of 1 element, but axis length = %d",
+                     axisDims[Dim::C]);
+}
+
+void FrontEnd::parseScatterUpdate(const Model      & model,
+                                  const CNNLayerPtr& layer,
+                                  const DataVector & inputs,
+                                  const DataVector & outputs) const {
+    VPU_THROW_UNLESS(inputs.size() == 4, "invalid number of inputs: %lu", inputs.size());
+    VPU_THROW_UNLESS(outputs.size() == 1, "invalid number of outputs: %lu", outputs.size());
+
+    const auto& input   = inputs[0];  // `data` tensor
+    const auto& indices = inputs[1];
+    const auto& updates = inputs[2];
+    const auto& axis    = inputs[3];
+    const auto& output = outputs[0];
+
+    checkTensorShapes(input, output, indices, updates, axis);
+
+    auto scatterUpdateLayer = std::dynamic_pointer_cast<ie::ScatterUpdateLayer>(layer);
+
+    VPU_THROW_UNLESS(scatterUpdateLayer != nullptr,
+                     "this layer is not an instance of ScatterUpdateLayer: "
+                     "layer name = \"%s\", layer type = \"%s\"",
+                     layer->name.c_str(), layer->type.c_str());
+
+    auto stage = model->addNewStage<ScatterUpdateStage>(layer->name,
+                                                        StageType::ScatterUpdate,
+                                                        layer,
+                                                        {input, indices, updates, axis},
+                                                        {output});
+
+    VPU_THROW_UNLESS(stage != nullptr,
+                     "failed to create ScatterUpdateStage: "
+                     "layer name = \"%s\", layer type = \"%s\"",
+                     layer->name.c_str(), layer->type.c_str());
+}
+
+//----------------------------------------------------------------------
+
+Stage StageBuilder::addScatterUpdateStage(
+        const Model& model,
+        const std::string& name,
+        const ie::CNNLayerPtr& layer,
+        const Data& input,
+        const Data& output,
+        const Data& indices,
+        const Data& updates,
+        const Data& axis) {
+    checkTensorShapes(input, output, indices, updates, axis);
+
+    auto stage = model->addNewStage<ScatterUpdateStage>(name,
+                                                        StageType::ScatterUpdate,
+                                                        layer,
+                                                        {input, indices, updates, axis},
+                                                        {output});
+
+    VPU_THROW_UNLESS(stage != nullptr,
+                     "failed to create ScatterUpdateStage: "
+                     "layer name = \"%s\", layer type = \"%s\"",
+                     layer->name.c_str(), layer->type.c_str());
+
+    return stage;
+}
+
+}  // namespace vpu
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/screlu.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/screlu.cpp

index ca5119d..2c3ad81 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/screlu.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/screlu.cpp
@@ -79,10 +79,10 @@ private:
          auto inputBiases = inputEdge(2)->input();
          auto output = outputEdge(0)->output();
  
-        input->serializeBuffer(serializer, output->desc().dimsOrder());
+        input->serializeBuffer(serializer);
          output->serializeBuffer(serializer);
-        inputScales->serializeBuffer(serializer, output->desc().dimsOrder());
-        inputBiases->serializeBuffer(serializer, output->desc().dimsOrder());
+        inputScales->serializeBuffer(serializer);
+        inputBiases->serializeBuffer(serializer);
      }
  };
  
diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp

index 0dedeb1..fa4a3b0 100644 (file)
--- a/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/stages/tensor_iterator.cpp
@@ -7,6 +7,7 @@
  #include "vpu/utils/auto_scope.hpp"
  #include "vpu/compile_env.hpp"
  #include "graph_transformer.h"
+#include "vpu/model/data_contents/ie_blob_content.hpp"
  
  #include "ie_layers_internal.hpp"
  #include "net_pass.h"
@@ -96,8 +97,8 @@ void FrontEnd::parseTensorIterator(const Model& model, const ie::CNNLayerPtr& la
          VPU_THROW_UNLESS(isConst(original), "VPU const data object can be created only from const IE data object");
  
          const auto& creator = original->getCreatorLayer().lock();
-        const auto& blob = ieBlobContent(creator->blobs.begin()->second);
          const auto& descriptor = createDescriptor(original->getTensorDesc());
+        const auto& blob = ieBlobContent(creator->blobs.begin()->second, descriptor.type());
  
          return model->addConstData(original->getName(), descriptor, blob);
      };
diff --git a/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt b/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt

index b35910f..bf30d12 100644 (file)
--- a/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
+++ b/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
@@ -43,7 +43,7 @@ endif()
  # "mvnc" must be the first library in the link list
  target_link_libraries(${TARGET_NAME}
      PRIVATE
-        mvnc ${INTEL_ITT_LIBS} ${NGRAPH_LIBRARIES} inference_engine vpu_graph_transformer)
+        mvnc ${INTEL_ITT_LIBS} inference_engine vpu_graph_transformer)
  
  # install
  
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp

index 6d2c5a7..76978c8 100644 (file)
--- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp
@@ -95,10 +95,12 @@ ExecutableNetwork::ExecutableNetwork(
  void ExecutableNetwork::Import(std::istream& strm,
                                 std::vector<DevicePtr> &devicePool,
                                 const MyriadConfig& config) {
-    std::ostringstream blobContentStream;
-    blobContentStream << strm.rdbuf();
-    const std::string& blobContentString = blobContentStream.str();
-    std::copy(blobContentString.begin(), blobContentString.end(), std::back_inserter(_graphBlob));
+    auto currentPos = strm.tellg();
+    strm.seekg(0, strm.end);
+    auto blobSize = strm.tellg() - currentPos;
+    _graphBlob.resize(static_cast<size_t>(blobSize));
+    strm.seekg(currentPos, strm.beg);
+    strm.read(&_graphBlob[0], blobSize);
  
      if (!_device->isBooted()) {
          return;
diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp

index a4b575d..65044cf 100644 (file)
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@@ -8,7 +8,7 @@
  #include <utility>
  
  #include <ie_metric_helpers.hpp>
-#include <cnn_network_ngraph_impl.hpp>
+#include <cpp/ie_cnn_network.h>
  #include <cpp_interfaces/base/ie_plugin_base.hpp>
  #include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
  
@@ -16,6 +16,8 @@
  #include <vpu/parsed_config.hpp>
  #include <vpu/utils/profiling.hpp>
  #include <vpu/utils/error.hpp>
+#include <vpu/ngraph/transformations/dynamic_to_static_shape.hpp>
+#include <generic_ie.hpp>
  
  #include "myriad_plugin.h"
  
@@ -33,12 +35,10 @@ ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(
      auto parsedConfigCopy = _parsedConfig;
      parsedConfigCopy.update(config);
  
-    std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
-
-    if (auto networkNGraph = dynamic_cast<const CNNNetworkNGraphImpl*>(&network)) {
-        clonedNetwork = networkNGraph->cloneNGraphImpl();
-    } else {
-        clonedNetwork = cloneNet(network);
+    std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
+    if (auto func = clonedNetwork->getFunction()) {
+        ngraph::op::GenericIE::DisableReshape noReshape(func);
+        ngraph::pass::DynamicToStaticShape().run_on_function(func);
      }
  
      return std::make_shared<ExecutableNetwork>(*clonedNetwork, _devicePool, parsedConfigCopy);
@@ -76,6 +76,12 @@ void Engine::QueryNetwork(
      auto parsedConfigCopy = _parsedConfig;
      parsedConfigCopy.update(config);
  
+    const auto deviceName = parsedConfigCopy.deviceName();
+    if (!deviceName.empty()) {
+        const auto deviceIDs = GetMetric(METRIC_KEY(AVAILABLE_DEVICES), {}).as<std::vector<std::string>>();
+        VPU_THROW_UNLESS(!(std::find(deviceIDs.begin(), deviceIDs.end(), deviceName) == deviceIDs.end()), "Myriad device: {} not found.", deviceName);
+    }
+
      const auto log = std::make_shared<Logger>(
          "GraphCompiler",
          parsedConfigCopy.logLevel(),
diff --git a/inference-engine/tests/functional/inference_engine/extension_lib/include/extension.hpp b/inference-engine/tests/functional/inference_engine/extension_lib/include/extension.hpp

index 0994744..16bf089 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/extension_lib/include/extension.hpp
+++ b/inference-engine/tests/functional/inference_engine/extension_lib/include/extension.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/extension_lib/src/extension.cpp b/inference-engine/tests/functional/inference_engine/extension_lib/src/extension.cpp

index c0370c9..3994a67 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/extension_lib/src/extension.cpp
+++ b/inference-engine/tests/functional/inference_engine/extension_lib/src/extension.cpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #include <extension.hpp>
  #include <ngraph/opsets/opset.hpp>
  #include <ngraph/factory.hpp>
diff --git a/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp b/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp

index 97eb623..e894913 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/ie_extension_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/ie_irelease_test.cpp b/inference-engine/tests/functional/inference_engine/ie_irelease_test.cpp

index 2314394..ad69466 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ie_irelease_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/ie_irelease_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp

index e368038..ea1a9c8 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/net_reader_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/net_reader_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/network_serializer_test.cpp b/inference-engine/tests/functional/inference_engine/network_serializer_test.cpp

index 66cbee9..e65a0c7 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/network_serializer_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/network_serializer_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/ngraph_reader_tests.hpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/ngraph_reader_tests.hpp

index 6d077fc..6af8e76 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/ngraph_reader_tests.hpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/ngraph_reader_tests.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp

index 168080d..11cc59d 100644 (file)
--- a/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp
@@ -5,7 +5,6 @@
  #include <gtest/gtest.h>
  
  #include <cpp/ie_cnn_network.h>
-#include <cnn_network_ngraph_impl.hpp>
  #include <string>
  #include <sstream>
  #include <fstream>
@@ -635,11 +634,11 @@ TEST_F(NGraphReshapeTests, TestInterpParameters) {
      auto ngraph_function = std::make_shared<ngraph::Function>(ngraph::ResultVector{output},
                             ngraph::ParameterVector{inp});
  
-    InferenceEngine::details::CNNNetworkNGraphImpl cnn(ngraph_function);
-    auto icnn = cnn.getCNNNetwork();
+    CNNNetwork cnn(ngraph_function);
+    cnn.begin();
      std::map<std::string, InferenceEngine::SizeVector> inShape;
      inShape["test"] = {1, 3, 4, 5};
-    icnn->reshape(inShape, nullptr);
+    cnn.reshape(inShape);
  }
  
  TEST_F(NGraphReshapeTests, genericNodeWithDynShape) {
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp

new file mode 100644 (file)

index 0000000..3f02f6c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bf16_network_restoring.cpp
@@ -0,0 +1,211 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class BF16NetworkRestore1 : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //   +   Power1(FP32)
+        //        |
+        //   +  AvgPooling1(FP32)
+        //        |
+        //   + Convolution1(BF16)
+        //        |
+        //   +    ReLU1(Fused)
+        //        |------------------------
+        //        |                        \
+        //   +   Convolution2(BF16)      Convolution 3 (BF16)
+        //           |                     /              \
+        //   +        |                  ReLU2(FP32)     Normalize (FP32)
+        //            \              /                      |
+        //              Eltwise (Fused to Conv2)     ------/
+        //                |                         /
+        //              ReLU3  (Fused to Conv2)   /
+        //                |                     /
+        //             MaxPooling1 (FP32)      /
+        //                   \            /
+        //                      Eltwise
+        //                         |
+
+
+        // STAGE1: construction of the GRAPH
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 224, 224});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Power1");
+
+        // AvgPooling
+        auto avgpoolNode = std::make_shared<opset1::AvgPool>(addNode,
+                                                             Strides{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{2, 2},
+                                                             true,
+                                                             op::RoundingType::FLOOR);
+        avgpoolNode->set_friendly_name("AvgPooling1");
+
+        // convolution1
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            avgpoolNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution1");
+
+        // ReLU1
+        auto reluNode = std::make_shared<opset1::Relu>(convNode1);
+        reluNode->set_friendly_name("ReLU1");
+
+        // convolution2
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            reluNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution2");
+
+        // convolution3
+        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
+            reluNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode3->set_friendly_name("Convolution3");
+
+        // ReLU1
+        auto reluNode2 = std::make_shared<opset1::Relu>(convNode3);
+        reluNode2->set_friendly_name("ReLU2");
+
+        // Norm1
+        // normalize
+        const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
+        float eps{1e-6f};
+        auto eps_mode = op::EpsMode::ADD;
+
+        auto normNode =  std::make_shared<opset1::NormalizeL2>(convNode3, axes, eps, eps_mode);
+        normNode->set_friendly_name("Norm1");
+
+
+
+        // Eltwise1
+        auto eltNode1 = std::make_shared<opset1::Add>(convNode2, reluNode2);
+        eltNode1->set_friendly_name("Eltwise1");
+
+        // ReLU3
+        auto reluNode3 = std::make_shared<opset1::Relu>(eltNode1);
+        reluNode3->set_friendly_name("ReLU3");
+
+        // maxPooling1
+        auto maxPoolNode = std::make_shared<opset1::MaxPool>(reluNode3,
+                                                             Strides{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{0, 0},
+                                                             Shape{2, 2},
+                                                             op::RoundingType::FLOOR);
+        maxPoolNode->set_friendly_name("maxPooling1");
+
+        // Eltwise2
+        auto eltNode2 = std::make_shared<opset1::Add>(maxPoolNode, normNode);
+        eltNode2->set_friendly_name("Eltwise2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        threshold = 0.4f;  // max value in the latest tensor for FP32 network is 10.83
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Power1"] = "FP32";
+        expectedPrecisions["AvgPooling1"] = "FP32";
+        expectedPrecisions["Convolution1"] = "BF16";
+        expectedPrecisions["ReLU1"] = "ndef";
+        expectedPrecisions["Convolution2"] = "BF16";
+        expectedPrecisions["Convolution3"] = "BF16";
+        expectedPrecisions["ReLU2"] = "FP32";
+        expectedPrecisions["Norm1"] = "FP32";
+        expectedPrecisions["Eltwise1"] = "ndef";
+        expectedPrecisions["ReLU3"] = "ndef";
+        expectedPrecisions["maxPooling1"] = "FP32";
+        expectedPrecisions["Eltwise2"] = "FP32";
+    }
+};
+
+TEST_P(BF16NetworkRestore1, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, BF16NetworkRestore1,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 224, 224 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        BF16NetworkRestore1::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp

new file mode 100644 (file)

index 0000000..01e2519
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/bfloat16_helpers.hpp
@@ -0,0 +1,276 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <inference_engine.hpp>
+#include "ie_common.h"
+#include <ie_blob.h>
+#include <math.h>
+#include <map>
+#include <string>
+#include <utility>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "ngraph/opsets/opset1.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include <ie_system_conf.h>
+
+namespace LayerTestsDefinitions {
+
+/**
+ * class providing static helpers for bfloat16 functional tests
+ * using functions you can fill the tensor content by some periodic law or compare
+ *
+ */
+class BFloat16Helpers {
+public:
+    static void fillInputsBySinValues(float* data, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = sin(static_cast<float>(i));
+        }
+    }
+
+    static void fillInputsBySinValues(short *data, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = reducePrecisionBitwiseS(sin(static_cast<float>(i)));
+        }
+    }
+
+    static void fillInputsByCosValues(float* data, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = cos(static_cast<float>(i));
+        }
+    }
+
+    static int fillInputsBySinValues(InferenceEngine::Blob::Ptr blob) {
+        InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
+        if (!mblob) {
+            return -1;
+        }
+        if (mblob->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP32) {
+            return -2;
+        }
+        auto lm = mblob->rwmap();
+        fillInputsBySinValues(lm.as<float*>(), mblob->size());
+        return 0;
+    }
+
+    static std::pair<std::string, std::string> matchPerfCountPrecisionVsExpected(
+        const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfCounts,
+        const std::map<std::string, std::string>& expected) {
+        for (auto e : expected) {
+            auto it = perfCounts.find(e.first);
+            if (it == perfCounts.end()) {
+                return std::pair<std::string, std::string>(e.first, "NOT_FOUND_IN_PERF_COUNTS");
+            }
+            // get the latest n symbols by number of e.second
+            std::string execType = it->second.exec_type;
+            std::string pfPrecision = execType.substr(execType.length() - e.second.length(), e.second.length());
+            if (pfPrecision != e.second) {
+                return std::pair<std::string, std::string>(e.first, pfPrecision);
+            }
+        }
+        return std::pair<std::string, std::string>("", "");
+    }
+
+    static float getMaxAbsValue(const float* data, size_t size) {
+        float maxVal = 0.f;
+        for (size_t i = 0; i < size; i++) {
+            if (fabs(data[i] > maxVal)) {
+                maxVal = fabs(data[i]);
+            }
+        }
+        return maxVal;
+    }
+
+    static float reducePrecisionBitwise(const float in) {
+        float f = in;
+        int* i = reinterpret_cast<int*>(&f);
+        int t2 = *i & 0xFFFF0000;
+        float ft1 = *(reinterpret_cast<float*>(&t2));
+        if ((*i & 0x8000) && (*i & 0x007F0000) != 0x007F0000) {
+            t2 += 0x10000;
+            ft1 = *(reinterpret_cast<float*>(&t2));
+        }
+        return ft1;
+    }
+
+    static short reducePrecisionBitwiseS(const float in) {
+        float f = reducePrecisionBitwise(in);
+        int intf = *reinterpret_cast<int*>(&f);
+        intf = intf >> 16;
+        short s = intf;
+        return s;
+    }
+};
+
+
+typedef std::tuple<
+                   InferenceEngine::Precision,
+                   InferenceEngine::Precision,
+                   InferenceEngine::SizeVector,
+                   InferenceEngine::SizeVector,
+                   std::string> basicParams;
+
+
+/**
+ * Base class for bf16 tests
+ * the flow in this test assume to load network in FP32 and in BF16 modes and verify
+ * 1. difference between outptut's tensor with some treshold.
+ * 2. which preciosion was selected for layers described in runtime info of performance counters
+ *
+ * To develop new test you need to
+ * 1. define class inherriten from  BasicBF16Test and implement SetUp(). For example:
+ *
+ * class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
+ * protected:
+ * void SetUp()override {
+ *  fnPtr = std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
+
+        // STAGE1:
+        threshold = 9e-1;
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Convolution_6"] = "BF16";
+        expectedPrecisions["Convolution_7"] = "BF16";
+        expectedPrecisions["Add_8"] = "ndef";
+ *      expectedPrecisions["Convolution_10"] = "BF16";
+ *      }
+ *      };
+ *
+ *  2. define test
+ *  TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
+    test();
+};
+ *  3. INSTANTIATE_TEST_CASE_P(bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
+
+ *
+ * In 3rd stage do not forget bfloat16 preffix!
+ */
+class BasicBF16Test : public LayerTestsUtils::LayerTestsCommonClass<basicParams> {
+protected:
+    virtual std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) = 0;
+
+public:
+    std::shared_ptr<ngraph::Function> fnPtr;
+    std::vector<float *> refOut;
+    InferenceEngine::SizeVector inputShapes, newInputShapes;
+    InferenceEngine::SizeVector refOutShape;
+    std::map<std::string, std::string> expectedPrecisions;
+    float threshold = 2e-2;  // Is enough for tensor having abs maximum values less than 1
+
+    static std::string getTestCaseName(testing::TestParamInfo<basicParams> obj) {
+        InferenceEngine::Precision inputPrecision, netPrecision;
+        InferenceEngine::SizeVector inputShapes, newInputShapes;
+        std::string targetDevice;
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        if (!newInputShapes.empty()) {
+            result << "Reshape_From=" << CommonTestUtils::vec2str(inputShapes);;
+            result << "_To=" << CommonTestUtils::vec2str(newInputShapes) << "_";
+        } else {
+            result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+        }
+        result << "inPRC=" << inputPrecision.name() << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    void test() {
+        if (!InferenceEngine::with_cpu_x86_bfloat16()) {
+            // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
+            // tests are useless on such platforms
+            return;
+        }
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        InferenceEngine::CNNNetwork cnnNet(fnPtr);
+
+        setNetInOutPrecision(cnnNet, inputPrecision);
+        std::string inputName = cnnNet.getInputsInfo().begin()->first;
+        std::string outputName = cnnNet.getOutputsInfo().begin()->first;
+        auto ie = InferenceEngine::Core();
+        // BF16 inference
+        std::map<std::string, std::string> options;
+        if (netPrecision == InferenceEngine::Precision::FP32) {
+            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
+        } else {
+            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
+        }
+        options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
+        options[InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT] = "egraph_test";
+
+        auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
+        auto req1 = exec_net1.CreateInferRequest();
+
+        InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
+        BFloat16Helpers::fillInputsBySinValues(inBlob1);
+
+        req1.Infer();
+        auto outBlobBF16 = req1.GetBlob(outputName);
+        InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
+        ASSERT_NE(mout1, nullptr);
+        auto lm1 = mout1->rmap();
+
+        // FP32 infrence
+        // if netPrecision is not eq to the FP32 - change network precision and recreate network
+        InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
+        std::string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
+        std::string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
+        setNetInOutPrecision(cnnNetFP32, inputPrecision);
+        auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
+                                        { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
+        auto req2 = exec_net2.CreateInferRequest();
+
+
+        req2.SetBlob(inputNameFP32, inBlob1);
+
+        req2.Infer();
+        auto outBlobFP32 = req2.GetBlob(outputNameFP32);
+        InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
+        ASSERT_NE(mout2, nullptr);
+        auto lm2 = mout2->rmap();
+
+        // debug to figure out the maximum value in output tensors:
+        // std::cout << "Max in bfloat16 network by output " << outputName << ": " <<
+        //      BFloat16Helpers::getMaxAbsValue(lm1.as<const float *>(), mout1->size()) << std::endl;
+        // std::cout << "Max in fp32 network by output " << outputNameFP32 << ": " <<
+        //     BFloat16Helpers::getMaxAbsValue(lm2.as<const float *>(), mout2->size()) << std::endl;
+
+        FuncTestUtils::compareRawBuffers(lm1.as<const float *>(),
+                                         lm2.as<const float *>(),
+                                         mout1->size(), mout2->size(),
+                                         threshold);
+
+        // Stage2: verification of performance counters
+        std::pair<std::string, std::string> wrongLayer =
+            BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
+        if (wrongLayer.first != std::string("")) {
+            std::string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
+            std::string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
+            ASSERT_EQ(layerInPerfCounts, layerExpected);
+        }
+        fnPtr.reset();
+    }
+};
+
+}  // namespace LayerTestsDefinitions
+
+
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp

new file mode 100644 (file)

index 0000000..a55c7e1
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_conv.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ConvConv : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //     ScaleShift (FP32)
+        //          |
+        //        Conv (BF16)
+        //          |
+        //        Conv (BF16)
+
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 3, 40, 40});
+        auto const1 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 2.0f });
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        auto const2 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 1.0f });
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        std::vector<float> weightValues;
+        weightValues.resize(3 * 3 * 3 * 3);
+        BFloat16Helpers::fillInputsBySinValues(weightValues.data(), weightValues.size());
+        auto weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Convolution
+        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        std::vector<float> weightValues2;
+        weightValues2.resize(3 * 3 * 3 * 3);
+        BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
+        auto weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape2, weightValues2);
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            convNode1, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        // the maximum values in the latest tensor for this test is 24.4. It would be safe to set threshold eq to 0.1
+        threshold = 0.3f;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "BF16";
+    }
+};
+
+TEST_P(ConvConv, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvConv,
+                        ::testing::Combine(
+                        ::testing::Values(Precision::FP32),
+                        ::testing::Values(Precision::FP32),
+                        ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                        ::testing::Values(SizeVector()),
+                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvConv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvConv::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp

new file mode 100644 (file)

index 0000000..1ab6f50
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_dwconv_relu.cpp
@@ -0,0 +1,148 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ConvDWConvReLU : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //             scaleshift (FP32)
+        //                |
+        //               Conv (BF16)
+        //                |
+        //            Depthwise Conv (BF16, assuming explicit separte execution of kernel, not fused into prev convolution)
+        //                |
+        //               ReLU (Fused Info DW convolution)
+
+
+        // multiply
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // DW convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValues2FP32;
+            weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
+        } else {
+            std::vector<short> weightValues2BF16;
+            weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::GroupConvolution>(
+            convNode1, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // ReLU
+        auto reluNode2 =  std::make_shared<opset1::Relu>(convNode2);
+        reluNode2->set_friendly_name("RELU");
+
+        return std::make_shared<ngraph::Function>(reluNode2, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 0.4f;  // maximum value in tensor is 54.89
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["RELU"] = "ndef";
+    }
+};
+
+TEST_P(ConvDWConvReLU, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvDWConvReLU,
+                            ::testing::Combine(
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                    ::testing::Values(SizeVector()),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvDWConvReLU::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvDWConvReLU,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvDWConvReLU::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp

new file mode 100644 (file)

index 0000000..8ec2cfd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/conv_relu_pool_conv_relu_pool.cpp
@@ -0,0 +1,199 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ConvReLUPoolConvReLUPool : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //    Convolution1  (FP32)
+        //        |
+        //       ReLU1      (Fused)
+        //        |
+        //     Pooling1     (FP32)
+        //        |
+        //    Convolution2  (BF16)
+        //        |
+        //       ReLU2      (Fused)
+        //        |
+        //     Pooling2     (BF16)
+        //        |
+        //    Convolution3  (BF16)
+
+
+        // STAGE1: construction of the GRAPH
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+
+        // convolution1
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
+            input1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode->set_friendly_name("Convolution_1");
+
+        // ReLU
+        auto reluNode = std::make_shared<opset1::Relu>(convNode);
+        reluNode->set_friendly_name("ReLU_1");
+
+        // Pooling
+        auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
+                                                             Strides{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{2, 2},
+                                                             true,
+                                                             op::RoundingType::FLOOR);
+        avgpoolNode->set_friendly_name("AvgPool_1");
+
+        // convolution2
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            avgpoolNode, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+
+        // ReLU
+        auto reluNode2 = std::make_shared<opset1::Relu>(convNode2);
+        reluNode2->set_friendly_name("ReLU_2");
+
+        // Pooling
+        auto maxpoolNode2 = std::make_shared<opset1::MaxPool>(reluNode2,
+                                                             Strides{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{0, 0},
+                                                             Shape{2, 2},
+                                                             op::RoundingType::FLOOR);
+        maxpoolNode2->set_friendly_name("MaxPool_2");
+
+        // convolution3
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
+        ngraph::Shape convFilterShape3 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
+            maxpoolNode2, weightsNode3,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode3->set_friendly_name("Convolution_3");
+
+
+
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override  {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        threshold = 0.2f;  // max value in the latest tensor for FP32 network is 9.8
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Convolution_1"] = "FP32";
+        expectedPrecisions["ReLU_1"] = "ndef";
+        expectedPrecisions["AvgPool_1"] = "FP32";
+        expectedPrecisions["Convolution_2"] = "BF16";
+        expectedPrecisions["ReLU_2"] = "ndef";
+        expectedPrecisions["MaxPool_2"] = "BF16";
+        expectedPrecisions["Convolution_3"] = "BF16";
+    }
+};
+
+TEST_P(ConvReLUPoolConvReLUPool, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvReLUPoolConvReLUPool::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ConvReLUPoolConvReLUPool::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp

new file mode 100644 (file)

index 0000000..efc9c6d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/faster_100_5_1_1_conv.cpp
@@ -0,0 +1,135 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Faster100_5_1_1_Conv : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //                     Power (FP32)
+        //                       |
+        //                     Convolution (BF16)
+
+        // STAGE1: constructin og the GRAPH
+        // multiply
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{10, 5, 1, 1});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_4");
+
+        // problematic convolution: 100x5x1x1
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 5, 5, 1, 1 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValues;
+            weightValues.resize(5 * 5 * 1 * 1, 0.f);
+            weightValues[0] = 1.0f;
+            weightValues[7] = 1.0f;
+            weightValues[11] = 1.0f;
+            weightValues[19] = 1.0f;
+            weightValues[23] = 1.0f;
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(5 * 5 * 1 * 1, BFloat16Helpers::reducePrecisionBitwiseS(0.0f));
+            weightValuesBF16[0] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
+            weightValuesBF16[7] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
+            weightValuesBF16[11] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
+            weightValuesBF16[19] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
+            weightValuesBF16[23] = BFloat16Helpers::reducePrecisionBitwiseS(1.0f);
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode->set_friendly_name("Convolution_6");
+
+
+        // ReLU
+        auto reluNode = std::make_shared<opset1::Relu>(convNode);
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Convolution_6"] = "BF16";
+    }
+};
+
+TEST_P(Faster100_5_1_1_Conv, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(bfloat16_NoReshape, Faster100_5_1_1_Conv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 10, 5, 1, 1 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            Faster100_5_1_1_Conv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Faster100_5_1_1_Conv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 10, 5, 1, 1 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Faster100_5_1_1_Conv::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp

new file mode 100644 (file)

index 0000000..e8eafcb
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/mobilenet_ssd_with_branching.cpp
@@ -0,0 +1,183 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class MobileNet_ssd_with_branching : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //                scaleshift
+        //                    |
+        //                   Conv1 (FP32)
+        //                  |           \
+        //               Conv2 (FP32 so far while we have not greedy mode. This must be fixed. Such pattern shouild have Conv2 in BF16)
+        //                |              |
+        //               relu(fused)     |
+        //                |          Normalize (not LRN)
+        //           Conv (DW)(BF16)     |
+        //                |              |
+        //               ReLU (Fused)    |
+        //                  \           /
+        //                    Concat
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // Conv1
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Conv2
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            convNode1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(convNode2);
+        reluNode->set_friendly_name("RELU_2");
+
+        // DW convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 3, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValues2FP32;
+            weightValues2FP32.resize(3 * 1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
+        } else {
+            std::vector<short> weightValues2BF16;
+            weightValues2BF16.resize(3 * 1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> dwConvNode = std::make_shared<ngraph::opset1::GroupConvolution>(
+            reluNode, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        dwConvNode->set_friendly_name("DW_CONV");
+
+        // ReLU
+        auto reluNode2 =  std::make_shared<opset1::Relu>(dwConvNode);
+        reluNode2->set_friendly_name("RELU_DW");
+
+        // normalize
+        const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
+        float eps{1e-6f};
+        auto eps_mode = op::EpsMode::ADD;
+
+        auto normNode =  std::make_shared<opset1::NormalizeL2>(convNode1, axes, eps, eps_mode);
+        normNode->set_friendly_name("NORM_1");
+
+        // Concat
+        ngraph::NodeVector concInputNodes = { reluNode2, normNode };
+        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
+        concNode->set_friendly_name("CONC_1");
+
+        return std::make_shared<ngraph::Function>(concNode, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 0.8f;  // max value in latest tensor is 87.67
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "FP32";
+        expectedPrecisions["RELU_2"] = "ndef";
+        expectedPrecisions["DW_CONV"] = "BF16";
+        expectedPrecisions["RELU_DW"] = "ndef";
+        expectedPrecisions["NORM_1"] = "FP32";
+        expectedPrecisions["CONC_1"] = "FP32";
+    }
+};
+
+TEST_P(MobileNet_ssd_with_branching, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, MobileNet_ssd_with_branching,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        MobileNet_ssd_with_branching::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, MobileNet_ssd_with_branching,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        MobileNet_ssd_with_branching::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp

new file mode 100644 (file)

index 0000000..e165f86
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_conv.cpp
@@ -0,0 +1,153 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConvEltwiseConv : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //        scaleshift (FP32)     Conv (FP32)
+        //                   \          /
+        //              Eltwise (Fused into Conv)
+        //                |
+        //               Conv (BF16)
+
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            input1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
+        eltNode->set_friendly_name("ELT_1");
+
+        // Convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValues2;
+            weightValues2.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
+        } else {
+            std::vector<short> weightValues2BF16;
+            weightValues2BF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            eltNode, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 0.2f;  // max value in the latest tensor for FP32 network is 37.77
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "FP32";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConvEltwiseConv, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseConv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseConv::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp

new file mode 100644 (file)

index 0000000..ba7c17b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
@@ -0,0 +1,159 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConvEltwiseReluConv : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //        scaleshift (FP32)     Conv (FP32_
+        //             \          /
+        //               Eltwise (Fused into conv)
+        //                |
+        //               ReLU (Fused into conv)
+        //                |
+        //               Conv (BF16)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            input1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
+        eltNode->set_friendly_name("ELT_1");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
+        reluNode->set_friendly_name("RELU_1");
+
+        // Convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValues2;
+            weightValues2.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
+        } else {
+            std::vector<short> weightValues2BF16;
+            weightValues2BF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            reluNode, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 9e-2;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "FP32";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["RELU_1"] = "ndef";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConvEltwiseReluConv, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseReluConv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseReluConv::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp

new file mode 100644 (file)

index 0000000..41f81cb
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
@@ -0,0 +1,152 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConvEltwiseScaleshift : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //                    scaleshift (FP32)
+        //                        |
+        //                       Conv (BF16)
+        //             \          /
+        //              Eltwise (Fused into Conv)
+        //                |
+        //            scaleshift (FP32)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(input1, convNode1);
+        eltNode->set_friendly_name("ELT_1");
+
+        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
+        reluNode->set_friendly_name("RELU_1");
+
+        // multiply
+        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
+        } else {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+        }
+        auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
+        addNode2->set_friendly_name("ADD_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 0.4;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["ADD_2"] = "FP32";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConvEltwiseScaleshift, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseScaleshift::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEltwiseScaleshift::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp

new file mode 100644 (file)

index 0000000..fc63492
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_elu_conv.cpp
@@ -0,0 +1,140 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConvEluConv : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //              scaleshift (FP32)
+        //                 |
+        //                Conv (BF16)
+        //                |
+        //                Elu (FP32 for now, this must be fixed and it must be fused into Conv)
+        //                 |
+        //                Conv (BF16)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // Elu
+        auto eluNode =  std::make_shared<opset1::Elu>(convNode1, 2);
+        eluNode->set_friendly_name("ELU_1");
+
+        // Conv
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            eluNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["ELU_1"] = "FP32";
+        expectedPrecisions["CONV_2"] = "BF16";
+    }
+};
+
+TEST_P(ScaleshiftConvEluConv, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConvEluConv,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEluConv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConvEluConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvEluConv::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp

new file mode 100644 (file)

index 0000000..6e5df70
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_relu.cpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConvRelu : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //              scaleshift (FP32)
+        //                  |
+        //                Conv (BF16)
+        //                  |
+        //                relu (Fused into convolution)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(convNode1);
+        reluNode->set_friendly_name("RELU_1");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 5e-2;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["RELU_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConvRelu, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConvRelu,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvRelu::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConvRelu,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConvRelu::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp

new file mode 100644 (file)

index 0000000..11dcb1e
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_concat_relu.cpp
@@ -0,0 +1,148 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConv_x2_ConcatRelu : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //              scaleshift
+        //             /         \
+        //           Conv      Conv
+        //             \       /
+        //              concat
+        //                |
+        //               relu
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // Concat
+        ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
+
+        // test is to be failed, if axis == 1 - TODO
+        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 2);
+        concNode->set_friendly_name("CONC_1");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(concNode);
+        reluNode->set_friendly_name("RELU_1");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 10e-1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["CONC_1"] = "FP32";
+        expectedPrecisions["RELU_1"] = "FP32";
+    }
+};
+
+TEST_P(ScaleshiftConv_x2_ConcatRelu, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp

new file mode 100644 (file)

index 0000000..82340d2
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_eltwise.cpp
@@ -0,0 +1,137 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConv_x2_Eltwise : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //              scaleshift (FP32)
+        //             /             \
+        //           Conv1 (BF16)     Conv1 (BF16)
+        //             \               /
+        //                eltwise (Fused into Conv1) produce FP32 output
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+        eltNode->set_friendly_name("ELT_1");
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 2e-1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConv_x2_Eltwise, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_Eltwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_Eltwise::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp

new file mode 100644 (file)

index 0000000..86bcc52
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
@@ -0,0 +1,137 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConv_x2_mixed1_Eltwise : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //         scaleshift (FP32)
+        //             |               |
+        //      Conv1(BF16)       Conv2(FP32)
+        //             \       /
+        //            eltwise(Fused into Conv1)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            input1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+        eltNode->set_friendly_name("ELT_1");
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 2e-1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["ADD_1"] = "FP32";
+        expectedPrecisions["CONV_1"] = "BF16";
+        expectedPrecisions["CONV_2"] = "FP32";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConv_x2_mixed1_Eltwise, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
+                            ::testing::Combine(
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                    ::testing::Values(SizeVector()),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp

new file mode 100644 (file)

index 0000000..1e483bf
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
@@ -0,0 +1,138 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConv_x2_mixed2_Eltwise : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //                   scaleshift (FP32)
+        //             |         |
+        //     Conv1 (FP32)      Conv2 (Bf16)
+        //             \       /
+        //             eltwise (Fused into Conv1)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            input1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("CONV_1");
+
+        // multiply
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("ADD_2");
+
+        // convolution
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("CONV_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+        eltNode->set_friendly_name("ELT_1");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 2e-1;
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["CONV_1"] = "FP32";
+        expectedPrecisions["ADD_2"] = "FP32";
+        expectedPrecisions["CONV_2"] = "BF16";
+        expectedPrecisions["ELT_1"] = "ndef";
+    }
+};
+
+TEST_P(ScaleshiftConv_x2_mixed2_Eltwise, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
+                            ::testing::Combine(
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(Precision::FP32),
+                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                    ::testing::Values(SizeVector()),
+                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp

new file mode 100644 (file)

index 0000000..361dc01
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_conv_x3_eltwise.cpp
@@ -0,0 +1,170 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //        scaleshift (FP32)
+        //
+        //        /        \
+        //
+        //       Conv1 (BF16)    Conv2 (BF16)
+        //
+        //        \        /
+        //
+        //        Eltwise (Fused to Conv1)
+        //
+        //          |
+        //
+        //         Conv3 (BF16)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
+        eltNode->set_friendly_name("ELT_1");
+
+
+        // Convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
+        ngraph::Shape convFilterShape3 = { 16, 16, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(16 * 16 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(16 * 16 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
+            eltNode, weightsNode3,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode3->set_friendly_name("Convolution_3");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 1.0f;  // max value in the latest tensor for FP32 network is 93.3
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Convolution_2"] = "BF16";
+        expectedPrecisions["ELT_1"] = "ndef";
+        expectedPrecisions["Convolution_3"] = "BF16";
+    }
+};
+
+TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp

new file mode 100644 (file)

index 0000000..3daece1
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Scaleshift_x2_Conv_x2_Eltwise : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //
+        //  scaleshift (FP32)    scaleshift (FP32)
+        //        \             /      \
+        //        Eltwise  (FP32)   Conv (BF16)
+        //          |                 |
+        //                          Conv (BF16)
+        //                            |
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_1");
+
+        // multiply
+        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
+        } else {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+        }
+        auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
+        addNode2->set_friendly_name("Add_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(addNode, addNode2);
+        eltNode->set_friendly_name("ELT_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode2, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            convNode1, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode, convNode2}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 1;
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Add_2"] = "FP32";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Convolution_2"] = "BF16";
+        expectedPrecisions["ELT_1"] = "FP32";
+    }
+};
+
+TEST_P(Scaleshift_x2_Conv_x2_Eltwise, CompareWithRefImpl) {
+    test();
+};
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp

new file mode 100644 (file)

index 0000000..3c22d18
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <functional>
+#include <map>
+#include <utility>
+
+#include <ie_core.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class Scaleshift_x3_ConvEltwiseRelu : public BasicBF16Test {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //
+        //   scaleshift (FP32)
+        //        |
+        //       Conv (BF16)   scaleshift (FP32
+        //
+        //         \         /
+        //
+        //        Eltwise (Fused to Conv)
+        //          |
+        //         ReLU   (Fused to Conv)
+        //           |
+        //       scaleshift  (FP32)
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_1");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(3 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
+            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode1->set_friendly_name("Convolution_1");
+
+        // multiply
+        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
+        } else {
+            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+        }
+        auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
+        addNode2->set_friendly_name("Add_2");
+
+        // Eltwise, i.e. Add
+        auto eltNode = std::make_shared<opset1::Add>(convNode1, addNode2);
+        eltNode->set_friendly_name("ELT_1");
+
+        // ReLU
+        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
+        reluNode->set_friendly_name("RELU_1");
+
+        // multiply
+        std::shared_ptr<ngraph::opset1::Constant> const5 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const5 = opset1::Constant::create(ntype, Shape{1}, { 4.0f });
+        } else {
+            const5 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(4.0f)) });
+        }
+        auto mulNode3 = std::make_shared<opset1::Multiply>(reluNode, const5);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const6 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const6 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
+        } else {
+            const6 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(3.0f)) });
+        }
+        auto addNode3 = std::make_shared<opset1::Add>(mulNode3, const6);
+        addNode3->set_friendly_name("Add_3");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode3}, ngraph::ParameterVector{input1});
+    }
+
+    void SetUp()override {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        // STAGE1:
+        threshold = 2e-1;
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_1"] = "FP32";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Add_2"] = "FP32";
+        expectedPrecisions["ELT_1"] = "ndef";
+        expectedPrecisions["RELU_1"] = "ndef";
+        expectedPrecisions["Add_3"] = "FP32";
+    }
+};
+
+    TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
+        test();
+    };
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
+                        ::testing::Combine(
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(Precision::FP32),
+                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                                ::testing::Values(SizeVector()),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
+
+
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp

new file mode 100644 (file)

index 0000000..3a99203
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/tail_fp32_optimization.cpp
@@ -0,0 +1,141 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class PoolingAfterConv : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //    Scaleshift   (FP32)
+        //        |
+        //    Convolution  (BF16)
+        //        |
+        //       ReLU      (Fused)
+        //        |
+        //     Pooling     (FP32) <- this layer can be be executed in bf16 if it passes data to next bf16 layer
+        //                           in other case there should be tail optimization and return Pooling to FP32
+
+        // STAGE1: construction of the GRAPH
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_4");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode->set_friendly_name("Convolution_6");
+
+        // ReLU
+        auto reluNode = std::make_shared<opset1::Relu>(convNode);
+
+        // Pooling
+        auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
+                                                             Strides{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{1, 1},
+                                                             Shape{2, 2},
+                                                             true,
+                                                             op::RoundingType::FLOOR);
+        avgpoolNode->set_friendly_name("AvgPool_8");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{avgpoolNode}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override  {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        threshold = 0.14f;  // max value in the latest tensor for FP32 network is 14.6448
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Convolution_6"] = "BF16";
+        expectedPrecisions["AvgPool_8"] = "FP32";
+    }
+};
+
+TEST_P(PoolingAfterConv, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, PoolingAfterConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                            PoolingAfterConv::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, PoolingAfterConv,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        PoolingAfterConv::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp

new file mode 100644 (file)

index 0000000..ef704c0
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/bfloat16/topk_inputs_i32.cpp
@@ -0,0 +1,165 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "bfloat16_helpers.hpp"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+#include <map>
+#include <functional>
+#include <utility>
+
+#include <ie_core.hpp>
+#include <ie_plugin_config.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+
+#include "ngraph/opsets/opset1.hpp"
+
+using namespace std;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+namespace LayerTestsDefinitions {
+
+class TopKInputsI32 : public BasicBF16Test  {
+protected:
+    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision)override {
+        //      Power   (FP32)
+        //        |
+        //      Convolution1 (BF16)       Const (I32)
+        //               |                |
+        //               \                /
+        //                  TopK (FP32)
+        //              (BF16)/        \ (I32)
+        //                   |
+        //         Convolution 2
+
+        // STAGE1: construction of the GRAPH
+
+        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
+        // multiply
+        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 40, 40});
+        input1->set_friendly_name("Input_1");
+        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
+        } else {
+            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(2.0f)) });
+        }
+        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
+
+        // add
+        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
+        if (netPrecision == Precision::FP32) {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
+        } else {
+            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(BFloat16Helpers::reducePrecisionBitwiseS(1.0f)) });
+        }
+        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
+        addNode->set_friendly_name("Add_4");
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
+        ngraph::Shape convFilterShape = { 16, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(16 * 3 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
+            addNode, weightsNode,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode->set_friendly_name("Convolution_1");
+
+        // TopK
+        const auto k = make_shared<op::Constant>(element::i32, Shape{}, vector<int>{1});
+        size_t axis = 1;
+        ngraph::op::v1::TopK::Mode mode = ngraph::op::v1::TopK::Mode::MAX;
+        ngraph::op::v1::TopK::SortType sort = ngraph::op::v1::TopK::SortType::NONE;
+        auto argmaxNode = std::make_shared<opset1::TopK>(convNode, k, axis, mode, sort);
+        argmaxNode->set_friendly_name("TopK_1");
+
+        auto goe0 = make_shared<op::GetOutputElement>(argmaxNode, 0);
+        auto goe1 = make_shared<op::GetOutputElement>(argmaxNode, 1);
+
+        // convolution
+        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
+        ngraph::Shape convFilterShape2 = { 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
+        if (netPrecision == Precision::FP32) {
+            std::vector<float> weightValuesFP32;
+            weightValuesFP32.resize(1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
+        } else {
+            std::vector<short> weightValuesBF16;
+            weightValuesBF16.resize(1 * 1 * 3 * 3);
+            BFloat16Helpers::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
+            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
+        }
+
+        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
+            goe0, weightsNode2,
+            ngraph::Strides({ 1, 1 }),   // strides
+            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
+            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
+            ngraph::Strides({ 1, 1 }),        // dilation
+            ngraph::op::PadType::EXPLICIT);   // pad type
+        convNode2->set_friendly_name("Convolution_2");
+
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2, goe1}, ngraph::ParameterVector{input1});
+    }
+    void SetUp()override  {
+        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
+        fnPtr = createGraph(netPrecision);
+
+        threshold = 0.14f;  // max value in the latest tensor for FP32 network is 22.6
+
+        // STAGE2:
+        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
+        // performance counters
+        expectedPrecisions["Add_4"] = "FP32";
+        expectedPrecisions["Convolution_1"] = "BF16";
+        expectedPrecisions["Convolution_2"] = "BF16";
+        expectedPrecisions["TopK_1"] = "FP32";
+    }
+};
+
+TEST_P(TopKInputsI32, CompareWithRefImpl) {
+    test();
+};
+
+
+INSTANTIATE_TEST_CASE_P(FP32_bfloat16_NoReshape, TopKInputsI32,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        TopKInputsI32::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(BF16_bfloat16_NoReshape, TopKInputsI32,
+                        ::testing::Combine(
+                            ::testing::Values(Precision::FP32),
+                            ::testing::Values(Precision::BF16),
+                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
+                            ::testing::Values(SizeVector()),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        TopKInputsI32::getTestCaseName);
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/configuration_tests.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/configuration_tests.cpp

index dd77853..9b6674b 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/configuration_tests.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/configuration_tests/configuration_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/unique_node_names.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/unique_node_names.cpp

index a1b29c2..d3b92af 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/unique_node_names.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/execution_graph_tests/unique_node_names.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp

index 136642c..8aa1fa7 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp

index 3e9f685..a42eab8 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_to_space.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_to_space.cpp

index db10542..3ea47a2 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_to_space.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/batch_to_space.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp

index 4525302..48d0a95 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp

index 0ba3d76..7d8b5f5 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp

index 59b4c7f..ec5e739 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp

new file mode 100644 (file)

index 0000000..d0976a7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/reshape.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+// Common params
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::U8
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+INSTANTIATE_TEST_CASE_P(ReshapeCheckDynBatch, ReshapeLayerTest,
+        ::testing::Combine(
+                ::testing::Values(true),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
+                ::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(std::map<std::string, std::string>({{CONFIG_KEY(DYN_BATCH_ENABLED), CONFIG_VALUE(YES)}}))),
+                ReshapeLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ReshapeCheck, ReshapeLayerTest,
+        ::testing::Combine(
+                ::testing::Values(true),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
+                ::testing::Values(std::vector<size_t>({10, 0, 100})),
+                ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                ::testing::Values(std::map<std::string, std::string>({}))),
+                ReshapeLayerTest::getTestCaseName);
+}  // namespace
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/softmax.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/softmax.cpp

new file mode 100644 (file)

index 0000000..09ddf01
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/softmax.cpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/softmax.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+};
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+    InferenceEngine::Precision::FP32,
+};
+
+const std::vector<InferenceEngine::Layout> inputLayouts2D = {
+    InferenceEngine::Layout::NC,
+};
+
+const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
+    InferenceEngine::SizeVector {1, 100},
+};
+
+const std::vector<size_t> axis2D = {
+    1
+};
+
+const auto params2D = testing::Combine(
+    testing::ValuesIn(netPrecisions),
+    testing::ValuesIn(inputPrecisions),
+    testing::ValuesIn(inputLayouts2D),
+    testing::ValuesIn(inputShapes2D),
+    testing::ValuesIn(axis2D),
+    testing::Values(CommonTestUtils::DEVICE_CPU),
+    testing::Values(std::map<std::string, std::string>())
+);
+
+INSTANTIATE_TEST_CASE_P(
+    SoftMax2D,
+    SoftMaxLayerTest,
+    params2D,
+    SoftMaxLayerTest::getTestCaseName
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/space_to_batch.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/space_to_batch.cpp

index 429cfd8..5b696fa 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/space_to_batch.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/space_to_batch.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp

index c24b566..f495ffe 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp

index a3c1af0..8db78c1 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp

index 936a822..ab13a9b 100644 (file)
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp

index f57d444..52406a2 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/activation.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp

index 1acc882..4c829be 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp

index e67cef0..47550d8 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/pooling.cpp

index 5eda5ef..958ef75 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp

index b6d4bb6..8d69d39 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp

index a3c1af0..8db78c1 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_conv_concat.cpp

index 1e86ab6..d361c60 100644 (file)
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt b/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt

index fdd7696..efc4ef9 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt
@@ -6,13 +6,17 @@
  set(TARGET_NAME gpuFuncTests)
  
  addIeTargetTest(
-        NAME ${TARGET_NAME}
-        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+        NAME
+            ${TARGET_NAME}
+        ROOT
+            ${CMAKE_CURRENT_SOURCE_DIR}
          DEPENDENCIES
              clDNNPlugin
          LINK_LIBRARIES
              funcSharedTests
+            ${CLDNN__IOCL_ICD_LIBPATH}
          ADD_CPPLINT
          LABELS
              GPU
-)
-\ No newline at end of file
+)
+target_include_directories(${TARGET_NAME} PRIVATE ${CLDNN__IOCL_ICD_INCDIRS})
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp

new file mode 100644 (file)

index 0000000..aa139fc
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -0,0 +1,258 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <utility>
+#include <vector>
+#include <memory>
+
+#include <cpp/ie_cnn_net_reader.h>
+#include <inference_engine.hpp>
+#include <ie_compound_blob.h>
+
+#include <cldnn/cldnn_config.hpp>
+
+#ifdef _WIN32
+# include <gpu/gpu_context_api_dx.hpp>
+#elif defined ENABLE_LIBVA
+# include <gpu/gpu_context_api_va.hpp>
+#endif
+#include <gpu/gpu_context_api_ocl.hpp>
+#include <common_test_utils/test_common.hpp>
+#include <functional_test_utils/plugin_cache.hpp>
+
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+
+using namespace ::testing;
+using namespace InferenceEngine;
+using namespace InferenceEngine::gpu;
+
+struct OpenCL {
+    cl::Context _context;
+    cl::Device _device;
+    cl::CommandQueue _queue;
+
+    explicit OpenCL(std::shared_ptr<std::vector<cl_context_properties>> media_api_context_properties = nullptr) {
+        // get Intel iGPU OCL device, create context and queue
+        {
+            const unsigned int refVendorID = 0x8086;
+            cl_uint n = 0;
+            cl_int err = clGetPlatformIDs(0, NULL, &n);
+
+            // Get platform list
+            std::vector<cl_platform_id> platform_ids(n);
+            err = clGetPlatformIDs(n, platform_ids.data(), NULL);
+
+            for (auto& id : platform_ids) {
+                cl::Platform platform = cl::Platform(id);
+                std::vector<cl::Device> devices;
+                platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
+                for (auto& d : devices) {
+                    if (refVendorID == d.getInfo<CL_DEVICE_VENDOR_ID>()) {
+                        _device = d;
+                        _context = cl::Context(_device);
+                        break;
+                    }
+                }
+            }
+            cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+            _queue = cl::CommandQueue(_context, _device, props);
+        }
+    }
+
+    explicit OpenCL(cl_context context) {
+        // user-supplied context handle
+        _context = cl::Context(context, true);
+        _device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);
+
+        cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+        _queue = cl::CommandQueue(_context, _device, props);
+    }
+};
+
+class RemoteBlob_Test : public CommonTestUtils::TestsCommon {
+protected:
+    std::shared_ptr<ngraph::Function> fn_ptr;
+    virtual void SetUp() {
+        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
+    }
+};
+
+TEST_F(RemoteBlob_Test, canInputUserBlob) {
+#if defined(_WIN32) || defined(ANDROID)
+    GTEST_SKIP();
+#endif
+    CNNNetwork net(fn_ptr);
+
+    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
+    net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
+
+    auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
+    // TODO: Issue: investigate issue with IECore
+    auto ie = InferenceEngine::Core();
+    auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU);
+
+    // regular inference
+    auto inf_req_regular = exec_net.CreateInferRequest();
+    InferenceEngine::Blob::Ptr fakeImageData = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
+    inf_req_regular.SetBlob(net.getInputsInfo().begin()->first, fakeImageData);
+
+    inf_req_regular.Infer();
+    auto outputBlob_regular = inf_req_regular.GetBlob(net.getOutputsInfo().begin()->first);
+
+    // inference using remote blob
+    auto inf_req_shared = exec_net.CreateInferRequest();
+    auto cldnn_context = exec_net.GetContext();
+    cl_context ctx = std::dynamic_pointer_cast<ClContext>(cldnn_context)->get();
+    auto ocl_instance = std::make_shared<OpenCL>(ctx);
+    cl_int err;
+
+    auto dims = net.getInputsInfo().begin()->second->getTensorDesc().getDims();
+    size_t imSize = dims[1] * dims[2] * dims[3];
+
+    cl::Buffer shared_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, imSize, NULL, &err);
+    {
+        void* buffer = fakeImageData->buffer();
+        ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, imSize, buffer);
+    }
+
+    Blob::Ptr shared_blob = make_shared_blob(net.getInputsInfo().begin()->second->getTensorDesc(), cldnn_context, shared_buffer);
+    inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, shared_blob);
+
+    inf_req_shared.Infer();
+    auto outputBlob_shared = inf_req_shared.GetBlob(net.getOutputsInfo().begin()->first);
+
+    // compare results
+    {
+        ASSERT_EQ(net.getOutputsInfo().begin()->second->getPrecision(), InferenceEngine::Precision::FP32);
+        ASSERT_EQ(outputBlob_regular->size(), outputBlob_shared->size());
+        auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+        FuncTestUtils::compareBlobs(outputBlob_regular, outputBlob_shared, thr);
+    }
+}
+
+TEST_F(RemoteBlob_Test, canInferOnUserContext) {
+#if defined _WIN32
+    GTEST_SKIP();
+#endif
+    auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
+    CNNNetwork net(fn_ptr);
+
+    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
+    net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
+
+    auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
+
+    auto ie = InferenceEngine::Core();
+    auto exec_net_regular = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU);
+
+    // regular inference
+    auto inf_req_regular = exec_net_regular.CreateInferRequest();
+    auto fakeImageData = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
+    inf_req_regular.SetBlob(net.getInputsInfo().begin()->first, fakeImageData);
+
+    inf_req_regular.Infer();
+    auto outputBlob_regular = inf_req_regular.GetBlob(net.getOutputsInfo().begin()->first);
+
+    // inference using remote blob
+    auto ocl_instance = std::make_shared<OpenCL>();
+    auto remote_context = make_shared_context(ie, CommonTestUtils::DEVICE_GPU, ocl_instance->_context.get());
+    auto exec_net_shared = ie.LoadNetwork(net, remote_context);
+    auto inf_req_shared = exec_net_shared.CreateInferRequest();
+    inf_req_shared.SetBlob(net.getInputsInfo().begin()->first, fakeImageData);
+
+    inf_req_shared.Infer();
+    auto outputBlob_shared = inf_req_shared.GetBlob(net.getOutputsInfo().begin()->first);
+
+    // compare results
+    {
+        ASSERT_EQ(net.getOutputsInfo().begin()->second->getPrecision(), InferenceEngine::Precision::FP32);
+        ASSERT_EQ(outputBlob_regular->size(), outputBlob_shared->size());
+        auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+        FuncTestUtils::compareBlobs(outputBlob_regular, outputBlob_shared, thr);
+    }
+}
+
+class TwoNets_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<size_t> {
+    void SetUp() override {
+        num_streams = this->GetParam();
+        fn_ptrs = {ngraph::builder::subgraph::makeSplitMultiConvConcat(),
+                   ngraph::builder::subgraph::makeMultiSingleConv()};
+    };
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<std::size_t> & obj) {
+        return "num_streams_" + std::to_string(obj.param);
+    }
+protected:
+    size_t num_streams;
+    std::vector<std::shared_ptr<ngraph::Function>> fn_ptrs;
+};
+
+TEST_P(TwoNets_Test, canInferTwoExecNets) {
+    std::vector<InferenceEngine::CNNNetwork> nets;
+    for (auto &fn_ptr : fn_ptrs) {
+        nets.push_back(CNNNetwork(fn_ptr));
+    }
+
+    auto ie = InferenceEngine::Core();
+
+    std::vector<std::string> outputs;
+    std::vector<InferRequest> irs;
+    std::vector<std::shared_ptr<float*>> ref;
+    std::vector<int> outElementsCount;
+
+    for (size_t i = 0; i < nets.size(); ++i) {
+        auto net = nets[i];
+
+        net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
+        net.getInputsInfo().begin()->second->setPrecision(Precision::FP32);
+
+        auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU,
+                               {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)}});
+
+        for (int j = 0; j < num_streams; j++) {
+            outputs.push_back(net.getOutputsInfo().begin()->first);
+
+            auto inf_req = exec_net.CreateInferRequest();
+            irs.push_back(inf_req);
+
+            auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
+            inf_req.SetBlob(net.getInputsInfo().begin()->first, blob);
+
+            outElementsCount.push_back(std::accumulate(begin(fn_ptrs[i]->get_output_shape(0)), end(fn_ptrs[i]->get_output_shape(0)), 1,
+                                                       std::multiplies<size_t>()));
+
+            std::shared_ptr<float*> reOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(
+                    fn_ptrs[i], {inf_req.GetBlob(net.getInputsInfo().begin()->first)->buffer()}).front();
+            ref.push_back(reOutData);
+        }
+    }
+
+    const int niter = 10;
+    for (int i = 0; i < niter; i++) {
+        for (auto ir : irs) {
+            ir.StartAsync();
+        }
+
+        for (auto ir : irs) {
+            ir.Wait(IInferRequest::RESULT_READY);
+        }
+    }
+
+    for (auto& net : nets) {
+        ASSERT_EQ(net.getOutputsInfo().begin()->second->getPrecision(), InferenceEngine::Precision::FP32);
+    }
+    auto thr = FuncTestUtils::GetComparisonThreshold(InferenceEngine::Precision::FP32);
+    for (size_t i = 0; i < irs.size(); ++i) {
+        ASSERT_EQ(outElementsCount[i], irs[i].GetBlob(outputs[i])->size());
+        FuncTestUtils::compareRawBuffers(irs[i].GetBlob(outputs[i])->buffer().as<float*>(), *ref[i], outElementsCount[i],
+                                         outElementsCount[i],
+                                         thr);
+    }
+}
+
+const std::vector<size_t> num_strems{1, 2};
+
+INSTANTIATE_TEST_CASE_P(RemoteBlob, TwoNets_Test, ::testing::ValuesIn(num_strems), TwoNets_Test::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp

index 09945ea..9a7a277 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp

index 945c083..75f288e 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp

index e4ea38a..ee7bf5e 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp

index 8a76dbd..23853c7 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp

index 4d5ecc8..eda432e 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp

new file mode 100644 (file)

index 0000000..6f858ac
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "single_layer_tests/reshape.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+// Common params
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+            InferenceEngine::Precision::FP32,
+            InferenceEngine::Precision::U8
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32,
+            InferenceEngine::Precision::FP16
+};
+
+//TODO: Issue : - 28981
+INSTANTIATE_TEST_CASE_P(DISABLE_ReshapeCheckDynBatch, ReshapeLayerTest,
+        ::testing::Combine(
+                ::testing::Values(true),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(std::vector<size_t>({1, 16, 16, 16})),
+                ::testing::Values(std::vector<size_t>({1, 0, 256})),
+                 ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                ::testing::Values(std::map<std::string, std::string>({{CONFIG_KEY(DYN_BATCH_ENABLED), CONFIG_VALUE(YES)}}))),
+                ReshapeLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(ReshapeCheck, ReshapeLayerTest,
+        ::testing::Combine(
+                ::testing::Values(true),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
+                ::testing::Values(std::vector<size_t>({10, 0, 100})),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                ::testing::Values(std::map<std::string, std::string>({}))),
+                ReshapeLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp

index 583f339..85e2ea9 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/strided_slice.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/strided_slice.cpp

new file mode 100644 (file)

index 0000000..348bf9b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/strided_slice.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/strided_slice.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+stridedSliceParamsTuple ss_only_test_cases[] = {
+        stridedSliceParamsTuple({ 2, 2, 2, 2 }, { 0, 0, 0, 0 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 },
+                       {1, 1, 1, 1}, {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 2, 2, 2, 2 }, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 },
+                       {0, 0, 0, 0}, {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 2, 2, 2, 2 }, { 1, 1, 1, 1 }, { 2, 2, 2, 2 }, { 1, 1, 1, 1 },
+                       {0, 0, 0, 0}, {0, 0, 0, 0},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 2, 2, 4, 3 }, { 0, 0, 0, 0 }, { 2, 2, 4, 3 }, { 1, 1, 2, 1 },
+                       {1, 1, 1, 1}, {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 2, 2, 4, 2 }, { 1, 0, 0, 1 }, { 2, 2, 4, 2 }, { 1, 1, 2, 1 },
+                       {0, 1, 1, 0}, {1, 1, 0, 0},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 1, 2, 4, 2 }, { 1, 0, 0, 0 }, { 1, 2, 4, 2 }, { 1, 1, -2, -1 },
+                       {1, 1, 1, 1}, {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+        stridedSliceParamsTuple({ 2, 2, 4, 2 }, { 1, 0, 0, 0 }, { 1, 2, 4, 2 }, { 1, 1, -2, -1 },
+                       {0, 1, 1, 1}, {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},  {1, 1, 1, 1},
+                                InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32,
+                                CommonTestUtils::DEVICE_GPU),
+};
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_CLDNN, StridedSliceLayerTest, ::testing::ValuesIn(ss_only_test_cases),
+        StridedSliceLayerTest::getTestCaseName);
+
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp

index a3c1af0..8db78c1 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp

index 2765118..8a9d3b2 100644 (file)
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/CMakeLists.txt b/inference-engine/tests/functional/plugin/myriad/CMakeLists.txt

index 9b430ac..62daa07 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/myriad/CMakeLists.txt
@@ -1,19 +1,23 @@
-# Copyright (C) 2019 Intel Corporation
+# Copyright (C) 2019-2020 Intel Corporation
  #
  # SPDX-License-Identifier: Apache-2.0
  #
  
  set(TARGET_NAME myriadFuncTests)
  
+disable_deprecated_warnings()
+
  addIeTargetTest(
          NAME ${TARGET_NAME}
          ROOT ${CMAKE_CURRENT_SOURCE_DIR}
          DEPENDENCIES
              myriadPlugin
          LINK_LIBRARIES
+            vpu_common_lib
+            vpu_graph_transformer
              funcSharedTests
          ADD_CPPLINT
          LABELS
              VPU
              MYRIAD
-)
-\ No newline at end of file
+)
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/conversions/dynamic_shape_resolver.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/conversions/dynamic_shape_resolver.cpp

new file mode 100644 (file)

index 0000000..0bdf012
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/conversions/dynamic_shape_resolver.cpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include "ngraph/op/parameter.hpp"
+#include "ngraph/function.hpp"
+
+#include "cpp/ie_cnn_network.h"
+#include "ie_common.h"
+
+#include "common_test_utils/test_common.hpp"
+
+#include <gtest/gtest.h>
+
+namespace {
+
+class DynamicShapeResolverTests : public CommonTestUtils::TestsCommon {
+public:
+    void SetUp() override {
+        const auto tensorType  = ngraph::element::f16;
+        const auto shapeType   = ngraph::element::u64;
+        const auto tensorShape = std::initializer_list<std::size_t>{1, 800};
+
+        const auto tensor = std::make_shared<ngraph::op::Parameter>(tensorType, ngraph::Shape{tensorShape});
+        const auto shape  = std::make_shared<ngraph::op::Parameter>(shapeType, ngraph::Shape{tensorShape.size()});
+        auto dynamicShapeResolver = std::make_shared<ngraph::op::DynamicShapeResolver>(tensor, shape);
+        dynamicShapeResolver->set_friendly_name(s_FriendlyName);
+        const auto function = std::make_shared<ngraph::Function>(ngraph::NodeVector{dynamicShapeResolver}, ngraph::ParameterVector{tensor, shape});
+
+        cnnNetwork = InferenceEngine::CNNNetwork{function};
+        triggerConversionToCNNNetwork();
+    }
+
+protected:
+    InferenceEngine::CNNLayerPtr getDynamicShapeResolverLayer() const {
+        return cnnNetwork.getLayerByName(s_FriendlyName.c_str());
+    }
+    InferenceEngine::CNNNetwork cnnNetwork;
+
+private:
+    void triggerConversionToCNNNetwork() {
+        cnnNetwork.begin();
+    }
+
+    static const std::string s_FriendlyName;
+};
+
+const std::string DynamicShapeResolverTests::s_FriendlyName = "DSR";
+
+TEST_F(DynamicShapeResolverTests, NGraphFunctionCanBeConvertedToCNNNetwork) {
+    ASSERT_EQ(cnnNetwork.getInputsInfo().size(), 2);
+    ASSERT_EQ(cnnNetwork.layerCount(), cnnNetwork.getInputsInfo().size() + 1);
+    ASSERT_EQ(cnnNetwork.getOutputsInfo().size(), 1);
+
+    const auto dynamicShapeResolver = getDynamicShapeResolverLayer();
+    ASSERT_EQ(dynamicShapeResolver->type, "DynamicShapeResolver");
+    ASSERT_EQ(dynamicShapeResolver->insData.size(), 2);
+    ASSERT_EQ(dynamicShapeResolver->outData.size(), 1);
+}
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/dynamic_shape_resolver.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/dynamic_shape_resolver.cpp

new file mode 100644 (file)

index 0000000..84d2112
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/dynamic_shape_resolver.cpp
@@ -0,0 +1,158 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/function.hpp>
+
+#include <gtest/gtest.h>
+#include <common_test_utils/test_common.hpp>
+#include <details/ie_exception.hpp>
+
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+namespace {
+
+using DataType  = ngraph::element::Type_t;
+using DimsType  = ngraph::element::Type_t;
+using DataShape = ngraph::Shape;
+
+class DynamicShapeResolverTests : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<std::tuple<DataType, DimsType, DataShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& dataType   = std::get<0>(parameters);
+        const auto& dimsType   = std::get<1>(parameters);
+        const auto& dataShape  = std::get<2>(parameters);
+
+        data = std::make_shared<ngraph::op::Parameter>(dataType, dataShape);
+        dims = std::make_shared<ngraph::op::Parameter>(dimsType, ngraph::Shape{dataShape.size()});
+    }
+
+protected:
+    std::shared_ptr<ngraph::op::Parameter> data;
+    std::shared_ptr<ngraph::op::Parameter> dims;
+};
+
+TEST_P(DynamicShapeResolverTests, CanValidateAndInferTypes) {
+    std::shared_ptr<ngraph::op::DynamicShapeResolver> dynamicShapeResolver;
+    ASSERT_NO_THROW(dynamicShapeResolver = std::make_shared<ngraph::op::DynamicShapeResolver>(data, dims));
+    ASSERT_NO_THROW(std::make_shared<ngraph::Function>(ngraph::NodeVector{dynamicShapeResolver}, ngraph::ParameterVector{data, dims}));
+}
+
+std::set<ngraph::element::Type_t> allNGraphTypes() {
+    return {
+        ngraph::element::dynamic,
+        ngraph::element::boolean,
+        ngraph::element::bf16,
+        ngraph::element::f16,
+        ngraph::element::f32,
+        ngraph::element::f64,
+        ngraph::element::i8,
+        ngraph::element::i16,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u1,
+        ngraph::element::u8,
+        ngraph::element::u16,
+        ngraph::element::u32,
+        ngraph::element::u64
+    };
+}
+
+std::set<ngraph::element::Type_t> allNGraphIntegralNumberTypes() {
+    return {
+        ngraph::element::i8,
+        ngraph::element::i16,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u1,
+        ngraph::element::u8,
+        ngraph::element::u16,
+        ngraph::element::u32,
+        ngraph::element::u64
+    };
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicShapeResolverTests, testing::Combine(
+    testing::ValuesIn(allNGraphTypes()),
+    testing::ValuesIn(allNGraphIntegralNumberTypes()),
+    testing::Values(DataShape{1, 800}, DataShape{1, 1})));
+
+
+using DataPartialShape = ngraph::PartialShape;
+using DimsPartialShape = ngraph::PartialShape;
+class DynamicShapeResolverNegativeTests
+    : public CommonTestUtils::TestsCommon
+    , public testing::WithParamInterface<std::tuple<DataType, DimsType, DataPartialShape, DimsPartialShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters = GetParam();
+        const auto& dataType   = std::get<0>(parameters);
+        const auto& dimsType   = std::get<1>(parameters);
+        const auto& dataPartialShape  = std::get<2>(parameters);
+        const auto& dimsPartialShape  = std::get<3>(parameters);
+
+        data = std::make_shared<ngraph::op::Parameter>(dataType, dataPartialShape);
+        dims = std::make_shared<ngraph::op::Parameter>(dimsType, dimsPartialShape);
+    }
+
+protected:
+    std::shared_ptr<ngraph::op::Parameter> data;
+    std::shared_ptr<ngraph::op::Parameter> dims;
+};
+
+class DynamicShapeResolverNegativeTestsDimsType : public DynamicShapeResolverNegativeTests {};
+TEST_P(DynamicShapeResolverNegativeTestsDimsType, ThrowsOnInvalidDimsType) {
+    ASSERT_THROW(std::make_shared<ngraph::op::DynamicShapeResolver>(data, dims), ngraph::ngraph_error);
+}
+
+std::set<ngraph::element::Type_t> allNGraphNotIntegralTypes() {
+    auto notIntegralTypes = std::set<ngraph::element::Type_t>{};
+    const auto& allTypes = allNGraphTypes();
+    const auto& allIntegralTypes = allNGraphIntegralNumberTypes();
+    std::set_difference(allTypes.cbegin(), allTypes.cend(), allIntegralTypes.cbegin(), allIntegralTypes.cend(),
+        std::inserter(notIntegralTypes, notIntegralTypes.begin()));
+    return notIntegralTypes;
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicShapeResolverNegativeTestsDimsType, testing::Combine(
+    testing::ValuesIn(allNGraphTypes()),
+    testing::ValuesIn(allNGraphNotIntegralTypes()),
+    testing::Values(DataPartialShape{1, 800}),
+    testing::Values(DataPartialShape{2})));
+
+class DynamicShapeResolverNegativeTestsDataShape : public DynamicShapeResolverNegativeTests {};
+TEST_P(DynamicShapeResolverNegativeTestsDataShape, ThrowsOnInvalidDimsType) {
+    ASSERT_THROW(std::make_shared<ngraph::op::DynamicShapeResolver>(data, dims), ngraph::ngraph_error);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicShapeResolverNegativeTestsDataShape, testing::Combine(
+    testing::ValuesIn(allNGraphTypes()),
+    testing::ValuesIn(allNGraphIntegralNumberTypes()),
+    testing::Values(
+        DataPartialShape::dynamic(),
+        DataPartialShape{{1, ngraph::Dimension::dynamic()}},
+        DataPartialShape{{ngraph::Dimension::dynamic(), 1}},
+        DataPartialShape{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()}}),
+    testing::Values(DataShape{2})));
+
+class DynamicShapeResolverNegativeTestsDimsShape : public DynamicShapeResolverNegativeTests {};
+TEST_P(DynamicShapeResolverNegativeTestsDimsShape, ThrowsOnInvalidDimsType) {
+    ASSERT_THROW(std::make_shared<ngraph::op::DynamicShapeResolver>(data, dims), ngraph::ngraph_error);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicShapeResolverNegativeTestsDimsShape, testing::Combine(
+    testing::ValuesIn(allNGraphTypes()),
+    testing::ValuesIn(allNGraphIntegralNumberTypes()),
+    testing::Values(DataShape{1, 800}),
+    testing::Values(
+        DataPartialShape::dynamic(),
+        DataPartialShape{{1, ngraph::Dimension::dynamic()}},
+        DataPartialShape{{ngraph::Dimension::dynamic(), 1}},
+        DataPartialShape{{ngraph::Dimension::dynamic(), ngraph::Dimension::dynamic()}},
+        DataPartialShape{0},
+        DataPartialShape{1},
+        DataPartialShape{3})));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp

new file mode 100644 (file)

index 0000000..45ba484
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/operations/static_shape_nonzero.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+
+#include <common_test_utils/test_common.hpp>
+
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/function.hpp>
+
+#include <details/ie_exception.hpp>
+
+#include <gtest/gtest.h>
+
+namespace {
+
+using TensorType  = ngraph::element::Type;
+using TensorShape = ngraph::PartialShape;
+
+class StaticShapeNonZeroTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+    void SetUp() override {
+        const auto& parameters  = GetParam();
+        const auto& tensorType  = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters);
+
+        m_param = std::make_shared<ngraph::op::Parameter>(tensorType, tensorShape);
+    }
+protected:
+    std::shared_ptr<ngraph::op::Parameter> m_param;
+};
+
+std::vector<ngraph::PartialShape> testStaticShapes {
+        TensorShape{1000},
+        TensorShape{4, 1000},
+        TensorShape{3, 128, 256},
+        TensorShape{2, 3, 128, 256},
+};
+
+std::vector<ngraph::PartialShape> testDynamicShapes {
+        TensorShape{ngraph::Dimension::dynamic()},
+        TensorShape{4, ngraph::Dimension::dynamic()},
+        TensorShape{3, ngraph::Dimension::dynamic(), 256},
+};
+
+std::vector<ngraph::element::Type> testNGraphNumericTypes {
+        ngraph::element::dynamic,
+        ngraph::element::bf16,
+        ngraph::element::f16,
+        ngraph::element::f32,
+        ngraph::element::f64,
+        ngraph::element::i8,
+        ngraph::element::i16,
+        ngraph::element::i32,
+        ngraph::element::i64,
+        ngraph::element::u1,
+        ngraph::element::u8,
+        ngraph::element::u16,
+        ngraph::element::u32,
+        ngraph::element::u64,
+};
+
+//
+// Positive tests
+//
+
+TEST_P(StaticShapeNonZeroTests, CanValidateAndInferTypes) {
+    std::shared_ptr<ngraph::op::StaticShapeNonZero> op;
+    ASSERT_NO_THROW(op = std::make_shared<ngraph::op::StaticShapeNonZero>(m_param));
+    ASSERT_NO_THROW(std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{op->output(0), op->output(1)},
+            ngraph::ParameterVector{m_param}));
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTests, testing::Combine(
+        testing::ValuesIn(testNGraphNumericTypes),
+        testing::ValuesIn(testStaticShapes))
+);
+
+//
+// Negative tests
+//
+
+class StaticShapeNonZeroTestsNegativeDataType : public StaticShapeNonZeroTests {};
+TEST_P(StaticShapeNonZeroTestsNegativeDataType, ThrowsOnInvalidDataType) {
+    std::shared_ptr<ngraph::op::StaticShapeNonZero> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::op::StaticShapeNonZero>(m_param),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataType, testing::Combine(
+        testing::Values(ngraph::element::boolean),
+        testing::ValuesIn(testStaticShapes))
+);
+
+class StaticShapeNonZeroTestsNegativeDataShape : public StaticShapeNonZeroTests {};
+TEST_P(StaticShapeNonZeroTestsNegativeDataShape, ThrowsOnInvalidDataShape) {
+    std::shared_ptr<ngraph::op::StaticShapeNonZero> op;
+    ASSERT_THROW(op = std::make_shared<ngraph::op::StaticShapeNonZero>(m_param),
+                 ngraph::NodeValidationFailure);
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, StaticShapeNonZeroTestsNegativeDataShape, testing::Combine(
+        testing::ValuesIn(testNGraphNumericTypes),
+        testing::ValuesIn(testDynamicShapes))
+);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp

new file mode 100644 (file)

index 0000000..51c090d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/transformations/dynamic_to_static_shape_nonzero.cpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/ngraph/transformations/dynamic_to_static_shape_nonzero.hpp"
+#include "vpu/ngraph/operations/static_shape_nonzero.hpp"
+#include "vpu/ngraph/operations/dynamic_shape_resolver.hpp"
+
+#include "../utils/ngraph_utils.h"
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+
+#include <cpp/ie_cnn_network.h>
+
+#include <common_test_utils/test_common.hpp>
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+
+namespace {
+
+using TensorType  = ngraph::element::Type_t;
+using TensorShape = ngraph::Shape;
+
+class DynamicToStaticShapeNonZeroTests
+        : public CommonTestUtils::TestsCommon,
+          public testing::WithParamInterface<std::tuple<TensorType, TensorShape>> {
+public:
+    void prepareFunctions() {
+        const auto& parameters = GetParam();
+        const auto& tensorType = std::get<0>(parameters);
+        const auto& tensorShape = std::get<1>(parameters);
+
+        // Create a function with only opset3::NonZero
+        // And then run conversion pass
+        {
+            const auto input = std::make_shared<ngraph::op::Parameter>(tensorType, tensorShape);
+
+            const auto nonZero = std::make_shared<ngraph::opset3::NonZero>(input);
+            nonZero->set_friendly_name(s_FriendlyName);
+
+            m_resfunction = std::make_shared<ngraph::Function>(
+                    ngraph::NodeVector{nonZero}, ngraph::ParameterVector{input});
+            ngraph::pass::DynamicToStaticShapeNonZero().run_on_function(m_resfunction);
+        }
+
+        // Create a reference function
+        {
+            const auto input = std::make_shared<ngraph::opset1::Parameter>(tensorType, tensorShape);
+
+            const auto staticShapeNonZero = std::make_shared<ngraph::op::StaticShapeNonZero>(input);
+            staticShapeNonZero->set_friendly_name(s_FriendlyName + "/static_shape");
+            const auto dynamicShapeResolver = std::make_shared<ngraph::op::DynamicShapeResolver>(
+                    staticShapeNonZero->output(0), staticShapeNonZero->output(1));
+            dynamicShapeResolver->set_friendly_name(s_FriendlyName + "/resolve_shape");
+
+            m_refFunction = std::make_shared<ngraph::Function>(
+                    ngraph::NodeVector{dynamicShapeResolver}, ngraph::ParameterVector{input});
+        }
+    }
+
+    void compareFunctions() {
+        FuncTestUtils::CompareFunctions(m_resfunction, m_refFunction);
+
+        auto actualResultNode = m_resfunction->get_output_op(0);
+        auto actualResolverNode = actualResultNode->input(0).get_source_output().get_node_shared_ptr();
+        auto actualNonZeroNode = actualResolverNode->input(0).get_source_output().get_node_shared_ptr();
+
+        auto expectedResultNode = m_refFunction->get_output_op(0);
+        auto expectedResolverNode = expectedResultNode->input(0).get_source_output().get_node_shared_ptr();
+        auto expectedNonZeroNode = expectedResolverNode->input(0).get_source_output().get_node_shared_ptr();
+
+        EXPECT_EQ(actualResolverNode->get_friendly_name(), expectedResolverNode->get_friendly_name());
+        EXPECT_EQ(actualNonZeroNode->get_friendly_name(), expectedNonZeroNode->get_friendly_name());
+    }
+
+protected:
+    std::shared_ptr<ngraph::Function> m_resfunction;
+    std::shared_ptr<ngraph::Function> m_refFunction;
+
+    static const std::string s_FriendlyName;
+};
+
+const std::string DynamicToStaticShapeNonZeroTests::s_FriendlyName = "non_zero";
+
+TEST_P(DynamicToStaticShapeNonZeroTests, inferAndValidate) {
+    prepareFunctions();
+    compareFunctions();
+}
+
+INSTANTIATE_TEST_CASE_P(NGraph, DynamicToStaticShapeNonZeroTests, testing::Combine(
+        testing::Values(
+                ngraph::element::f16,
+                ngraph::element::f32,
+                ngraph::element::i32,
+                ngraph::element::i64,
+                ngraph::element::u8),
+        testing::Values(
+                TensorShape{1000},
+                TensorShape{4, 1000},
+                TensorShape{3, 128, 256},
+                TensorShape{2, 3, 128, 256})
+));
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/ngraph/utils/ngraph_utils.h b/inference-engine/tests/functional/plugin/myriad/ngraph/utils/ngraph_utils.h

new file mode 100644 (file)

index 0000000..dd5cf87
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/ngraph/utils/ngraph_utils.h
@@ -0,0 +1,67 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/function.hpp>
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <queue>
+#include <string>
+
+namespace FuncTestUtils {
+
+using ComparingNodesPair = typename std::pair<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>>;
+using ComparingNodesBFSQueue = typename std::queue<ComparingNodesPair>;
+
+//
+// This function compares two nGraph functions and requires them to have exactly one output
+// Check nodes types
+// Check number of inputs
+// Check shapes of each Node
+//
+void CompareFunctions(const std::shared_ptr<ngraph::Function>& fActual,
+                      const std::shared_ptr<ngraph::Function>& fExpected) {
+    const auto fActualResults = fActual->get_results();
+    const auto fExpectedResults = fExpected->get_results();
+
+    ASSERT_EQ(fActualResults.size(), 1);
+    ASSERT_EQ(fExpectedResults.size(), 1);
+
+    const auto typeInfoToStr = [](const ngraph::Node::type_info_t& typeInfo) {
+        return std::string(typeInfo.name) + "/" + std::to_string(typeInfo.version);
+    };
+
+    ComparingNodesBFSQueue comparingNodes;
+    comparingNodes.push({fActualResults[0], fExpectedResults[0]});
+    while (!comparingNodes.empty()) {
+        const auto node1 = comparingNodes.front().first;
+        const auto node2 = comparingNodes.front().second;
+        comparingNodes.pop();
+
+        ASSERT_EQ(node1->get_type_info(), node2->get_type_info())
+                                    << "Functions compare: data types must be equal "
+                                    << typeInfoToStr(node1->get_type_info()) << " != "
+                                    << typeInfoToStr(node2->get_type_info());
+
+        ASSERT_EQ(node1->inputs().size(), node2->inputs().size())
+                                    << "Functions compare: numbers of inputs are different: "
+                                    << node1->inputs().size() << " and " << node2->inputs().size();
+
+        for (int i = 0; i < node1->inputs().size(); ++i) {
+            const auto partialShape1 = node1->input(i).get_partial_shape();
+            const auto partialShape2 = node2->input(i).get_partial_shape();
+            ASSERT_TRUE(partialShape1.relaxes(partialShape2) && partialShape1.refines(partialShape2))
+                                        << "Functions compare: Different shape detected "
+                                        << partialShape1 << " and " << partialShape2;
+
+            comparingNodes.push({node1->input_value(i).get_node_shared_ptr(),
+                                 node2->input_value(i).get_node_shared_ptr()});
+        }
+    }
+}
+
+}  // namespace FuncTestUtils
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/activation.cpp

index b43cff7..80c5d11 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/activation.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp

index 184355f..48a66d8 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/convolution.cpp

index a205d89..75ab4d5 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/convolution.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/nonzero.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/nonzero.cpp

new file mode 100644 (file)

index 0000000..57acc05
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/nonzero.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/nonzero.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+#include <vpu/vpu_plugin_config.hpp>
+#include <vpu/private_plugin_config.hpp>
+
+#include <vector>
+
+using namespace ngraph::helpers;
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+std::vector<std::vector<size_t>> inShapes = {
+        {1000},
+        {4, 1000},
+        {2, 4, 1000},
+};
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::U8,
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP16
+};
+
+// Enable this when #-29056 is ready
+INSTANTIATE_TEST_CASE_P(DISABLED_nonzero, NonZeroLayerTest,
+        ::testing::Combine(
+                ::testing::ValuesIn(inShapes),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
+                ::testing::Values(ConfigMap({{VPU_CONFIG_KEY(DETECT_NETWORK_BATCH), CONFIG_VALUE(NO)}}))),
+         NonZeroLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/pooling.cpp

index c90e8bd..674cf2a 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/pooling.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp

index 9a6bbd9..a411374 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp

index 3ba4fc6..9042d5a 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/split_conv_concat.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/split_conv_concat.cpp

index ec99a30..c6769c0 100644 (file)
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/subgraph_tests/split_conv_concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/configuration_tests/configuration_tests.hpp b/inference-engine/tests/functional/plugin/shared/include/configuration_tests/configuration_tests.hpp

index 293fcad..5f2b523 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/configuration_tests/configuration_tests.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/configuration_tests/configuration_tests.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/unique_node_names.hpp b/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/unique_node_names.hpp

index 891bcb7..1af0d1d 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/unique_node_names.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/execution_graph_tests/unique_node_names.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -26,6 +26,7 @@ public:
  
  protected:
      void SetUp() override;
+    void TearDown() override;
  };
  
  }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp b/inference-engine/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp

index 1751893..44497ac 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/activation.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/activation.hpp

index dfea8bb..4044822 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/activation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/activation.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/batch_to_space.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/batch_to_space.hpp

index ca8f0ab..3700af2 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/batch_to_space.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/batch_to_space.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp

index 90738cc..6310e82 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/concat.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution.hpp

index 5d48007..296e35f 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -26,12 +26,11 @@ typedef std::tuple<
          InferenceEngine::Precision,
          InferenceEngine::Precision,
          InferenceEngine::SizeVector,
-        std::string> convLayerTestParamsSet;
+        LayerTestsUtils::TargetDevice> convLayerTestParamsSet;
  namespace LayerTestsDefinitions {
  
  
-class ConvolutionLayerTest
-        : public LayerTestsUtils::LayerTestsCommonClass<convLayerTestParamsSet> {
+class ConvolutionLayerTest : public testing::WithParamInterface<convLayerTestParamsSet>, public LayerTestsUtils::FuncTestsCommon {
  public:
      static std::string getTestCaseName(testing::TestParamInfo<convLayerTestParamsSet> obj);
  
@@ -39,4 +38,4 @@ protected:
      void SetUp() override;
  };
  
-}  // namespace LayerTestsDefinitions
-\ No newline at end of file
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/nonzero.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/nonzero.hpp

new file mode 100644 (file)

index 0000000..399ff1a
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/nonzero.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+using ConfigMap = typename std::map<std::string, std::string>;
+
+using NonZeroLayerTestParamsSet = typename std::tuple<
+        InferenceEngine::SizeVector,          // Input shapes
+        InferenceEngine::Precision,           // Input precision
+        InferenceEngine::Precision,           // Network precision
+        std::string,                          // Device name
+        ConfigMap>;                           // Config map
+
+class NonZeroLayerTest
+        : public LayerTestsUtils::LayerTestsCommonClass<NonZeroLayerTestParamsSet> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<NonZeroLayerTestParamsSet> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/pooling.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/pooling.hpp

index ec5d5b2..01e0344 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/pooling.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/pooling.hpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/reshape.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/reshape.hpp

new file mode 100644 (file)

index 0000000..76c7365
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/reshape.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+    typedef std::tuple<
+            bool,                               // SpecialZero
+            InferenceEngine::Precision,         // Input precision
+            InferenceEngine::Precision,         // Network precision
+            std::vector<size_t>,                // Input shapes
+            std::vector<size_t>,                // OutForm Shapes
+            std::string,                        // Device name
+            std::map<std::string, std::string>  // Config
+            > reshapeParams;
+
+class ReshapeLayerTest
+        : public LayerTestsUtils::LayerTestsCommonClass<reshapeParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<reshapeParams> obj);
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/softmax.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/softmax.hpp

new file mode 100644 (file)

index 0000000..93613fd
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/softmax.hpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+namespace LayerTestsDefinitions {
+
+using softMaxLayerTestParams =
+    std::tuple<
+        InferenceEngine::Precision,         // netPrecision
+        InferenceEngine::Precision,         // inputPrecision
+        InferenceEngine::Layout,            // inputLayout
+        InferenceEngine::SizeVector,        // inputShape
+        size_t,                             // axis
+        std::string,                        // targetDevice
+        std::map<std::string, std::string>  // config
+    >;
+
+class SoftMaxLayerTest :
+        public LayerTestsUtils::LayerTestsCommonClass<softMaxLayerTestParams> {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<softMaxLayerTestParams> obj);
+
+protected:
+    void SetUp() override;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/space_to_batch.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/space_to_batch.hpp

index a0a6ee6..34ad87f 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/space_to_batch.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/space_to_batch.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp

index f47bf5d..617377c 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/split.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/strided_slice.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/strided_slice.hpp

new file mode 100644 (file)

index 0000000..206ff44
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/strided_slice.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "functional_test_utils/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+
+using stridedSliceParamsTuple = typename std::tuple<
+        InferenceEngine::SizeVector,       // Input shape
+        std::vector<int64_t>,              // Begin
+        std::vector<int64_t>,              // End
+        std::vector<int64_t>,              // Stride
+        std::vector<int64_t>,              // Begin mask
+        std::vector<int64_t>,              // End mask
+        std::vector<int64_t>,              // New axis mask
+        std::vector<int64_t>,              // Shrink axis mask
+        std::vector<int64_t>,              // Ellipsis axis mask
+        InferenceEngine::Precision,        // Input precision
+        InferenceEngine::Precision,        // Network precision
+        std::string>;                      // Device name>;
+
+class StridedSliceLayerTest : public LayerTestsUtils::LayerTestsCommonClass<stridedSliceParamsTuple> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<stridedSliceParamsTuple> &obj);
+
+protected:
+    void SetUp() override;
+};
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp

index 8b3c9fd..08593c1 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/split_conv_concat.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/configuration_tests/configuration_tests.cpp b/inference-engine/tests/functional/plugin/shared/src/configuration_tests/configuration_tests.cpp

index 678f3d8..22724ef 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/configuration_tests/configuration_tests.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/configuration_tests/configuration_tests.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/unique_node_names.cpp b/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/unique_node_names.cpp

index 2d9602f..ac1c574 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/unique_node_names.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/execution_graph_tests/unique_node_names.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -53,6 +53,12 @@ void ExecGraphUniqueNodeNames::SetUp() {
      fnPtr = std::make_shared<ngraph::Function>(results, params, "SplitConvConcat");
  }
  
+void ExecGraphUniqueNodeNames::TearDown() {
+    if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
+        PluginCache::get().reset();
+    }
+}
+
  TEST_P(ExecGraphUniqueNodeNames, CheckUniqueNodeNames) {
      InferenceEngine::CNNNetwork cnnNet(fnPtr);
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/inference-engine/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp

index 31a061f..9147242 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -33,9 +34,9 @@ TEST_P(PluginSpecificConversion, GeluConversionTest) {
          // Parameter->Activation->Output
          ASSERT_EQ(net.layerCount(), 3);
      } else if (device == "GPU") {
-        // Parameter--->ScaleShift-------------->Eltwise-->Result
+        // Parameter--->ScaleShift-------------->Eltwise
          //          `-->ScaleShift->ScaleShift-`
-        ASSERT_EQ(net.layerCount(), 6);
+        ASSERT_EQ(net.layerCount(), 5);
      }
  }
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp

index 889472b..1adf2b8 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/activation.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -107,6 +108,9 @@ TEST_P(ActivationLayerTest, CompareWithRefs) {
                                       outElementsCount,
                                       thr);
      fnPtr.reset();
+    if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
+        PluginCache::get().reset();
+    }
  }
  
  }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/batch_to_space.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/batch_to_space.cpp

index 80b475a..53ef5c0 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/batch_to_space.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/batch_to_space.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp

index 285c998..791dd0e 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution.cpp

index da584ef..0782701 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -50,6 +50,8 @@ std::string ConvolutionLayerTest::getTestCaseName(testing::TestParamInfo<convLay
  void ConvolutionLayerTest::SetUp() {
      convSpecificParams convParams;
      std::vector<size_t> inputShape;
+    auto inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+    auto netPrecision   = InferenceEngine::Precision::UNSPECIFIED;
      std::tie(convParams, inputPrecision, netPrecision, inputShape, targetDevice) = this->GetParam();
      ngraph::op::PadType padType;
      InferenceEngine::SizeVector kernel, stride, dilation;
@@ -64,10 +66,14 @@ void ConvolutionLayerTest::SetUp() {
              ngraph::builder::makeConvolution(paramOuts[0], ngPrc, kernel, stride, padBegin,
                                               padEnd, dilation, padType, convOutChannels));
      ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(conv)};
-    fnPtr = std::make_shared<ngraph::Function>(results, params, "convolution");
+    function = std::make_shared<ngraph::Function>(results, params, "convolution");
  }
  
  TEST_P(ConvolutionLayerTest, CompareWithRefs) {
-    inferAndValidate();
+    Run();
+
+    if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) {
+        PluginCache::get().reset();
+    }
  }
-}  // namespace LayerTestsDefinitions
-\ No newline at end of file
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/nonzero.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/nonzero.cpp

new file mode 100644 (file)

index 0000000..5855ba6
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/nonzero.cpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/nonzero.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ie_core.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+std::string NonZeroLayerTest::getTestCaseName(testing::TestParamInfo<NonZeroLayerTestParamsSet> obj) {
+    std::vector<size_t> inputShape;
+    InferenceEngine::Precision inputPrecision, netPrecision;
+    std::string targetDevice;
+    ConfigMap config;
+    std::tie(inputShape, inputPrecision, netPrecision, targetDevice, config) = obj.param;
+
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "inPRC=" << inputPrecision.name() << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void NonZeroLayerTest::SetUp() {
+    std::vector<size_t> inputShape;
+    std::tie(inputShape, inputPrecision, netPrecision, targetDevice, config) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto paramNode = std::make_shared<ngraph::opset1::Parameter>(ngPrc, ngraph::Shape(inputShape));
+
+    auto nonZeroOp = std::make_shared<ngraph::opset3::NonZero>(paramNode->output(0));
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(nonZeroOp)};
+    fnPtr = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{paramNode}, "non_zero");
+}
+
+TEST_P(NonZeroLayerTest, CompareWithRefs) {
+    inferAndValidate();
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/pooling.cpp

index 263e0fd..1edb73d 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/pooling.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/pooling.cpp
@@ -1,4 +1,5 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
+//
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/reshape.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/reshape.cpp

new file mode 100644 (file)

index 0000000..e425fda
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/reshape.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <ie_plugin_config.hpp>
+#include <ie_core.hpp>
+#include <functional>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+#include "single_layer_tests/reshape.hpp"
+
+namespace LayerTestsDefinitions {
+    std::string ReshapeLayerTest::getTestCaseName(testing::TestParamInfo<reshapeParams> obj) {
+    InferenceEngine::Precision inputPrecision, netPrecision;
+    InferenceEngine::SizeVector inputShapes, outFormShapes;
+    std::string targetDevice;
+    std::map<std::string, std::string> config;
+    bool specialZero;
+    std::tie(specialZero, inputPrecision, netPrecision, inputShapes, outFormShapes,
+            targetDevice, config) = obj.param;
+    std::ostringstream result;
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+    result << "specialZero=" << specialZero << "_";
+    result << "inPRC=" << inputPrecision.name() << "_";
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void ReshapeLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShapes, outFormShapes;
+    bool specialZero;
+    std::tie(specialZero, inputPrecision, netPrecision, inputShapes, outFormShapes,
+            targetDevice, config) = this->GetParam();
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto paramsIn = ngraph::builder::makeParams(ngPrc, {inputShapes});
+    auto paramIn = ngraph::helpers::convert2OutputVector(
+            ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramsIn));
+    auto constNode = std::make_shared<ngraph::opset1::Constant>(
+            ngraph::element::Type_t::i64, ngraph::Shape{outFormShapes.size()}, outFormShapes);
+    auto reshape = std::dynamic_pointer_cast<ngraph::opset1::Reshape>(
+            std::make_shared<ngraph::opset1::Reshape>(paramIn[0], constNode, specialZero));
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(reshape)};
+    fnPtr = std::make_shared<ngraph::Function>(results, paramsIn, "Reshape");
+}
+
+TEST_P(ReshapeLayerTest, CompareWithRefsDynamicBath) {
+    inferAndValidate();
+}
+}  // namespace LayerTestsDefinitions
+\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/softmax.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/softmax.cpp

new file mode 100644 (file)

index 0000000..430d5e7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/softmax.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/softmax.hpp"
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "functional_test_utils/layer_test_utils.hpp"
+
+#include "ie_core.hpp"
+
+#include "ngraph/op/softmax.hpp"
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+namespace LayerTestsDefinitions {
+
+std::string SoftMaxLayerTest::getTestCaseName(testing::TestParamInfo<softMaxLayerTestParams> obj) {
+    InferenceEngine::Precision netPrecision, inputPrecision;
+    InferenceEngine::Layout inputLayout;
+    InferenceEngine::SizeVector inputShape;
+    size_t axis;
+    std::string targetDevice;
+    std::map<std::string, std::string> config;
+    std::tie(netPrecision, inputPrecision, inputLayout, inputShape, axis, targetDevice, config) = obj.param;
+
+    std::ostringstream result;
+    result << "netPRC=" << netPrecision.name() << "_";
+    result << "inPRC=" << inputPrecision.name() << "_";
+    result << "inLayout=" << inputLayout << "_";
+    result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "axis=" << axis << "_";
+    result << "targetDevice=" << targetDevice;
+
+    return result.str();
+}
+
+void SoftMaxLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShape;
+    size_t axis;
+    std::tie(netPrecision, inputPrecision, inputLayout, inputShape, axis, targetDevice, config) = GetParam();
+    outputPrecision = inputPrecision;
+    outputLayout = inputLayout;
+
+    const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+    const auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+    const auto paramOuts =
+        ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+    const auto softMax = std::make_shared<ngraph::opset1::Softmax>(paramOuts.at(0), axis);
+
+    const ngraph::ResultVector results {std::make_shared<ngraph::opset1::Result>(softMax)};
+
+    fnPtr = std::make_shared<ngraph::Function>(results, params, "softMax");
+}
+
+TEST_P(SoftMaxLayerTest, CompareWithRefs) {
+    inferAndValidate();
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/space_to_batch.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/space_to_batch.cpp

index f56f877..4fec56a 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/space_to_batch.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/space_to_batch.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -52,6 +52,6 @@ void SpaceToBatchLayerTest::SetUp() {
  
  TEST_P(SpaceToBatchLayerTest, CompareWithRefs) {
      inferAndValidate();
-};
+}
  
  }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp

index fea3f83..943bdb2 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/strided_slice.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/strided_slice.cpp

new file mode 100644 (file)

index 0000000..7bc076b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/strided_slice.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include <ie_core.hpp>
+#include <ngraph_functions/builders.hpp>
+
+#include "functional_test_utils/blob_utils.hpp"
+#include "functional_test_utils/precision_utils.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include "single_layer_tests/strided_slice.hpp"
+
+namespace LayerTestsDefinitions {
+
+std::string StridedSliceLayerTest::getTestCaseName(const testing::TestParamInfo<stridedSliceParamsTuple> &obj) {
+    InferenceEngine::SizeVector inputShape;
+    std::vector<int64_t> begin, end, stride;
+    std::vector<int64_t> begin_mask, new_axis_mask, end_mask, shrink_mask, ellipsis_mask;
+    InferenceEngine::Precision inPrc, netPrc;
+    std::string targetName;
+    std::tie(inputShape, begin, end, stride, begin_mask, end_mask, new_axis_mask, shrink_mask, ellipsis_mask, inPrc, netPrc, targetName) = obj.param;
+    std::ostringstream result;
+    result << "inShape=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "inPRC=" << inPrc.name() << "_";
+    result << "netPRC=" << netPrc.name() << "_";
+    result << "begin=" << CommonTestUtils::vec2str(begin) << "_";
+    result << "end=" << CommonTestUtils::vec2str(end) << "_";
+    result << "stride=" << CommonTestUtils::vec2str(stride) << "_";
+    result << "begin_m=" << CommonTestUtils::vec2str(begin_mask) << "_";
+    result << "end_m=" << CommonTestUtils::vec2str(end_mask) << "_";
+    result << "new_axis_m=" << CommonTestUtils::vec2str(new_axis_mask) << "_";
+    result << "shrink_m=" << CommonTestUtils::vec2str(shrink_mask) << "_";
+    result << "ellipsis_m=" << CommonTestUtils::vec2str(ellipsis_mask) << "_";
+    result << "targetDevice=" << targetName << "_";
+    return result.str();
+}
+
+void StridedSliceLayerTest::SetUp() {
+    InferenceEngine::SizeVector inputShape;
+    std::vector<int64_t> begin, end, stride;
+    std::vector<int64_t> begin_mask, end_mask, new_axis_mask, shrink_mask, ellipsis_mask;
+    std::tie(inputShape, begin, end, stride, begin_mask, end_mask, new_axis_mask, shrink_mask, ellipsis_mask,
+             inputPrecision, netPrecision, targetDevice) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+    auto ss = ngraph::builder::makeStridedSlice(paramOuts[0], begin, end, stride, ngPrc, begin_mask, end_mask, new_axis_mask, shrink_mask, ellipsis_mask);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(ss)};
+    fnPtr = std::make_shared<ngraph::Function>(results, params, "StridedSlice");
+}
+
+TEST_P(StridedSliceLayerTest, CompareWithRefs) {
+    inferAndValidate();
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_conv_concat.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_conv_concat.cpp

index ed26b07..59ed1b4 100644 (file)
--- a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/split_conv_concat.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp

index 8798ac9..cf3e280 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp

index 009403a..6dc7473 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_layers_params.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  #include <map>
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp

index e7ba52a..516765b 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  #include <algorithm>
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp

index 55c67ae..b40b2d7 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  #include <cmath>
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp

index 0d63d16..f2fa02a 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  #include <fstream>
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/test_common.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/test_common.cpp

index ac0d2d0..10f5182 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/test_common.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/test_common.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/test_common.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/test_common.hpp

index 82c7652..a8e7328 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/test_common.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/test_common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/test_constants.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/test_constants.hpp

index 57158f7..5ad75af 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/test_constants.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/test_constants.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  namespace CommonTestUtils {
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp

index e5ba007..9d86f7c 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/ir_net.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/ir_net.cpp

index cd7f9fb..476ecb1 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/ir_net.cpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/ir_net.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_father.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_father.hpp

index 7760303..8b99937 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_father.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_father.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp

index a54c309..b22e023 100644 (file)
--- a/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/xml_net_builder/xml_filler.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/CMakeLists.txt b/inference-engine/tests/ie_test_utils/functional_test_utils/CMakeLists.txt

index 2817c74..280e328 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/CMakeLists.txt
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/CMakeLists.txt
@@ -4,25 +4,18 @@
  
  set(TARGET_NAME funcTestUtils)
  
-list(APPEND EXPORT_DEPENDENCIES
-        commonTestUtils
-        inference_engine
-        )
+list(APPEND EXPORT_DEPENDENCIES commonTestUtils inference_engine)
  
  addIeTarget(
-        NAME ${TARGET_NAME}
-        TYPE STATIC
-        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
-        ADD_CPPLINT
-        DEVELOPER_PACKAGE
-        EXPORT_DEPENDENCIES
-            ${EXPORT_DEPENDENCIES}
+    NAME ${TARGET_NAME}
+    TYPE STATIC
+    ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+    ADD_CPPLINT
+    DEVELOPER_PACKAGE
+    LINK_LIBRARIES ngraphFunctions
+    EXPORT_DEPENDENCIES ${EXPORT_DEPENDENCIES}
  )
  
-target_include_directories(${TARGET_NAME} PUBLIC
-            $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
+target_include_directories(${TARGET_NAME} PUBLIC $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
  
-target_link_libraries(${TARGET_NAME}
-        PUBLIC
-        ${EXPORT_DEPENDENCIES}
-        )
-\ No newline at end of file
+target_link_libraries(${TARGET_NAME} PUBLIC ${EXPORT_DEPENDENCIES})
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp

index 4c0af8d..7cd467e 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/blob_utils.hpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #pragma once
  
  
@@ -12,6 +11,7 @@
  
  #include <gtest/gtest.h>
  #include "blob_factory.hpp"
+#include "blob_transform.hpp"
  #include "precision_utils.h"
  #include "common_test_utils/data_utils.hpp"
  #include "common_test_utils/test_constants.hpp"
@@ -240,18 +240,40 @@ InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::Tenso
      blob->allocate();
      switch (td.getPrecision()) {
  #define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob, range, start_from, resolution); break;
-        CASE(InferenceEngine::Precision::FP32);
-        CASE(InferenceEngine::Precision::FP16);
-        CASE(InferenceEngine::Precision::U8);
-        CASE(InferenceEngine::Precision::U16);
-        CASE(InferenceEngine::Precision::I8);
-        CASE(InferenceEngine::Precision::I16);
-        CASE(InferenceEngine::Precision::I64);
-        CASE(InferenceEngine::Precision::BIN);
+        CASE(InferenceEngine::Precision::FP32)
+        CASE(InferenceEngine::Precision::FP16)
+        CASE(InferenceEngine::Precision::U8)
+        CASE(InferenceEngine::Precision::U16)
+        CASE(InferenceEngine::Precision::I8)
+        CASE(InferenceEngine::Precision::I16)
+        CASE(InferenceEngine::Precision::I64)
+        CASE(InferenceEngine::Precision::BIN)
+        CASE(InferenceEngine::Precision::I32)
  #undef CASE
          default:
              THROW_IE_EXCEPTION << "Wrong precision specified: " << td.getPrecision().name();
      }
      return blob;
  }
-}  // namespace FuncTestUtils
-\ No newline at end of file
+
+InferenceEngine::Blob::Ptr inline convertBlobLayout(const InferenceEngine::Blob::Ptr& in,
+                                                    InferenceEngine::Layout layout) {
+    IE_ASSERT(in != nullptr) << "Got NULL pointer";
+
+    const auto& inDesc = in->getTensorDesc();
+
+    if (inDesc.getLayout() == layout) {
+        return in;
+    }
+
+    const auto outDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), inDesc.getDims(), layout);
+
+    const auto out = make_blob_with_precision(outDesc);
+    out->allocate();
+
+    InferenceEngine::blob_copy(in, out);
+
+    return out;
+}
+
+}  // namespace FuncTestUtils
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp

new file mode 100644 (file)

index 0000000..3a6480f
--- /dev/null
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.cpp
@@ -0,0 +1,121 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "layer_test_utils.hpp"
+
+namespace LayerTestsUtils {
+
+FuncTestsCommon::FuncTestsCommon() {
+    core = PluginCache::get().ie(targetDevice).get();
+}
+
+void FuncTestsCommon::Run() {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Configure();
+    LoadNetwork();
+    Infer();
+    Validate();
+}
+
+FuncTestsCommon::~FuncTestsCommon() {
+    if (!configuration.empty()) {
+        PluginCache::get().reset();
+    }
+}
+
+InferenceEngine::Blob::Ptr FuncTestsCommon::GenerateInput(const InferenceEngine::InputInfo& info) const {
+    return FuncTestUtils::createAndFillBlob(info.getTensorDesc());
+}
+
+void FuncTestsCommon::Compare(const std::vector<std::uint8_t>& expected, const InferenceEngine::Blob::Ptr& actual) {
+    ASSERT_EQ(expected.size(), actual->byteSize());
+    const auto& expectedBuffer = expected.data();
+
+    auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+    IE_ASSERT(memory);
+    const auto lockedMemory = memory->wmap();
+    const auto actualBuffer = lockedMemory.as<const std::uint8_t*>();
+
+    const auto& precision = actual->getTensorDesc().getPrecision();
+    const auto& size = actual->size();
+    switch (precision) {
+        case InferenceEngine::Precision::FP32:
+            Compare(reinterpret_cast<const float*>(expectedBuffer), reinterpret_cast<const float*>(actualBuffer), size, 1e-2f);
+            break;
+        case InferenceEngine::Precision::I32:
+            Compare(reinterpret_cast<const std::int32_t*>(expectedBuffer), reinterpret_cast<const std::int32_t*>(actualBuffer), size, 0);
+            break;
+        default:
+            FAIL() << "Comparator for " << precision << " precision isn't supported";
+    }
+}
+
+void FuncTestsCommon::Configure() const {
+    if (!configuration.empty()) {
+        core->SetConfig(configuration, targetDevice);
+    }
+}
+
+void FuncTestsCommon::LoadNetwork() {
+    cnnNetwork = InferenceEngine::CNNNetwork{function};
+    executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice);
+    inferRequest = executableNetwork.CreateInferRequest();
+
+    for (const auto& input : cnnNetwork.getInputsInfo()) {
+        const auto& info = input.second;
+
+        auto blob = GenerateInput(*info);
+        inferRequest.SetBlob(info->name(), blob);
+        inputs.push_back(blob);
+    }
+}
+
+void FuncTestsCommon::Infer() {
+    inferRequest.Infer();
+}
+
+std::vector<InferenceEngine::Blob::Ptr> FuncTestsCommon::GetOutputs() {
+    auto outputs = std::vector<InferenceEngine::Blob::Ptr>{};
+    for (const auto& output : cnnNetwork.getOutputsInfo()) {
+        const auto& name = output.first;
+        outputs.push_back(inferRequest.GetBlob(name));
+    }
+    return outputs;
+}
+
+void FuncTestsCommon::Validate() {
+    // nGraph interpreter does not support f16
+    // IE converts f16 to f32
+    ngraph::pass::ConvertPrecision<ngraph::element::Type_t::f16, ngraph::element::Type_t::f32>().run_on_function(function);
+    function->validate_nodes_and_infer_types();
+
+    auto referenceInputs = std::vector<std::vector<std::uint8_t>>(inputs.size());
+    for (std::size_t i = 0; i < inputs.size(); ++i) {
+        const auto& input = inputs[i];
+        const auto& inputSize = input->byteSize();
+
+        auto& referenceInput = referenceInputs[i];
+        referenceInput.resize(inputSize);
+
+        auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(input);
+        IE_ASSERT(memory);
+        const auto lockedMemory = memory->wmap();
+        const auto buffer = lockedMemory.as<const std::uint8_t*>();
+        std::copy(buffer, buffer + inputSize, referenceInput.data());
+    }
+
+    const auto& expectedOutputs = ngraph::helpers::interpreterFunction(function, referenceInputs);
+    const auto& actualOutputs = GetOutputs();
+    IE_ASSERT(actualOutputs.size() == expectedOutputs.size())
+        << "nGraph interpreter has " << expectedOutputs.size() << " outputs, while IE " << actualOutputs.size();
+
+    for (std::size_t outputIndex = 0; outputIndex < expectedOutputs.size(); ++outputIndex) {
+        const auto& expected = expectedOutputs[outputIndex];
+        const auto& actual = actualOutputs[outputIndex];
+        Compare(expected, actual);
+    }
+}
+
+}  // namespace LayerTestsUtils
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp

index 70ed664..861b87c 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_test_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -12,7 +12,7 @@
  #include <gtest/gtest.h>
  #include <ngraph/node.hpp>
  #include <ngraph/function.hpp>
-
+#include <ie_plugin_config.hpp>
  #include <ngraph/function.hpp>
  
  #include "common_test_utils/common_utils.hpp"
@@ -38,20 +38,39 @@ typedef std::tuple<
  template<typename paramType>
  class LayerTestsCommonClass : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<paramType> {
  public:
-    InferenceEngine::Precision netPrecision, inputPrecision;
+    InferenceEngine::Precision netPrecision;
+    InferenceEngine::Precision inputPrecision;
+    InferenceEngine::Precision outputPrecision;
+    InferenceEngine::Layout inputLayout;
+    InferenceEngine::Layout outputLayout;
      std::string targetDevice;
      std::shared_ptr<ngraph::Function> fnPtr;
+    std::map<std::string, std::string> config;
+
+    LayerTestsCommonClass() {
+        netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+        inputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+        outputPrecision = InferenceEngine::Precision::UNSPECIFIED;
+        inputLayout = InferenceEngine::Layout::ANY;
+        outputLayout = InferenceEngine::Layout::ANY;
+    }
  
      void inline inferAndValidate() {
          // Skip test according to plugin specific disabledTestPatterns() (if any)
          SKIP_IF_CURRENT_TEST_IS_DISABLED()
          // Create CNNNetwork from ngrpah::Function
          InferenceEngine::CNNNetwork cnnNet(fnPtr);
-        // Set target input Precisions for the network
-        setNetInOutPrecision(cnnNet, inputPrecision);
+        // Set target input/output Precisions for the network
+        setNetInOutPrecision(cnnNet, inputPrecision, outputPrecision);
+        // Set target input Layouts for the network
+        setNetInOutLayout(cnnNet, inputLayout, outputLayout);
  
          // Get Core from cache
          auto ie = PluginCache::get().ie();
+        // Load config
+        if (!config.empty()) {
+            ie->SetConfig(config, targetDevice);
+        }
          // Load CNNNetwork to target plugins
          auto execNet = ie->LoadNetwork(cnnNet, targetDevice);
          // Create InferRequest
@@ -68,29 +87,54 @@ public:
          // Create input vector with raw data for reference calculation
          std::vector<const float *> inRawData;
          // References are calculated in float precision, so blobs have to be copied and casted if required
-        std::vector<InferenceEngine::Blob::Ptr> castedBlobs = inBlobs;
-        for (size_t i = 0; i < castedBlobs.size(); i++) {
-            if (inputPrecision != InferenceEngine::Precision::FP32) {
-                castedBlobs[i] = FuncTestUtils::copyBlobWithCast<InferenceEngine::Precision::FP32>(inBlobs[i]);
+        std::vector<InferenceEngine::Blob::Ptr> castedBlobs;
+        for (size_t i = 0; i < inBlobs.size(); i++) {
+            const auto precision = inBlobs[i]->getTensorDesc().getPrecision();
+            const auto layout = inBlobs[i]->getTensorDesc().getLayout();
+            const auto defLayout = InferenceEngine::TensorDesc::getLayoutByDims(inBlobs[i]->getTensorDesc().getDims());
+
+            if (precision == InferenceEngine::Precision::FP32 && layout == defLayout) {
+                inRawData.push_back(inBlobs[i]->cbuffer().template as<const float*>());
+            } else {
+                auto castedBlob = FuncTestUtils::copyBlobWithCast<InferenceEngine::Precision::FP32>(inBlobs[i]);
+                castedBlob = FuncTestUtils::convertBlobLayout(castedBlob, defLayout);
+                inRawData.push_back(castedBlob->cbuffer().template as<const float*>());
+                castedBlobs.push_back(castedBlob);
              }
-            inRawData.push_back(castedBlobs[i]->cbuffer().as<float *>());
          }
          // Run inference in IE
          req.Infer();
-
+        // Reset PluginCash
+        if (!config.empty()) {
+            PluginCache::get().reset();
+        }
          // Get output raw data from resulting output blobs
          std::vector<float *> outBlobsRawData;
          std::vector<size_t> outElementsCount;  // output elements count required for compareRawBuffers()
          for (const auto &output : cnnNet.getOutputsInfo()) {
              auto currentBlob = req.GetBlob(output.first);
-            outBlobsRawData.push_back(currentBlob->cbuffer().template as<float *>());
+
              outElementsCount.push_back(
-                    std::accumulate(begin(output.second->getDims()), end(output.second->getDims()), 1,
-                                    std::multiplies<float>()));
+                std::accumulate(
+                    std::begin(output.second->getDims()), std::end(output.second->getDims()),
+                    size_t {1}, std::multiplies<size_t>()));
+
+            const auto precision = currentBlob->getTensorDesc().getPrecision();
+            const auto layout = currentBlob->getTensorDesc().getLayout();
+            const auto defLayout = InferenceEngine::TensorDesc::getLayoutByDims(currentBlob->getTensorDesc().getDims());
+
+            if (precision == InferenceEngine::Precision::FP32 && layout == defLayout) {
+                outBlobsRawData.push_back(currentBlob->cbuffer().template as<float*>());
+            } else {
+                auto castedBlob = FuncTestUtils::copyBlobWithCast<InferenceEngine::Precision::FP32>(currentBlob);
+                castedBlob = FuncTestUtils::convertBlobLayout(castedBlob, defLayout);
+                outBlobsRawData.push_back(castedBlob->cbuffer().template as<float*>());
+                castedBlobs.push_back(castedBlob);
+            }
          }
  
          // Convert initial ngraph::Function to fp32 for references calculation
-        convertFuncToF32(fnPtr, netPrecision);;
+        convertFuncToF32(fnPtr, netPrecision);
          // Run ngraph Interpreter backend to calculate references
          auto refOutData = ngraph::helpers::inferFnWithInterp<ngraph::element::Type_t::f32>(fnPtr, inRawData);
          // Compare IE infer results vs ngraph Interpreter reference results
@@ -99,13 +143,18 @@ public:
  
          // Deallocate ngraph::Function pointer
          fnPtr.reset();
+        if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
+            PluginCache::get().reset();
+        }
      }
  
  protected:
-    void setNetInOutPrecision(InferenceEngine::CNNNetwork &cnnNet, InferenceEngine::Precision inPrc,
+    static void setNetInOutPrecision(InferenceEngine::CNNNetwork &cnnNet, InferenceEngine::Precision inPrc,
                                InferenceEngine::Precision outPrc = InferenceEngine::Precision::UNSPECIFIED) {
-        for (const auto &inputItem : cnnNet.getInputsInfo()) {
-            inputItem.second->setPrecision(inPrc);
+        if (inPrc != InferenceEngine::Precision::UNSPECIFIED) {
+            for (const auto &inputItem : cnnNet.getInputsInfo()) {
+                inputItem.second->setPrecision(inPrc);
+            }
          }
          if (outPrc != InferenceEngine::Precision::UNSPECIFIED) {
              for (const auto &output : cnnNet.getOutputsInfo()) {
@@ -114,6 +163,20 @@ protected:
          }
      }
  
+    static void setNetInOutLayout(InferenceEngine::CNNNetwork& cnnNet, InferenceEngine::Layout inputLayout,
+                                  InferenceEngine::Layout outputLayout = InferenceEngine::Layout::ANY) {
+        if (inputLayout != InferenceEngine::Layout::ANY) {
+            for (const auto& inputItem : cnnNet.getInputsInfo()) {
+                inputItem.second->setLayout(inputLayout);
+            }
+        }
+        if (outputLayout != InferenceEngine::Layout::ANY) {
+            for (const auto& output : cnnNet.getOutputsInfo()) {
+                output.second->setLayout(outputLayout);
+            }
+        }
+    }
+
      void convertFuncToF32(std::shared_ptr<ngraph::Function> fn, InferenceEngine::Precision prc) {
          switch (prc) {
              case InferenceEngine::Precision::FP32:
@@ -154,4 +217,52 @@ inline std::vector<std::shared_ptr<ngraph::Node>> findTargetNodes(std::shared_pt
      return nodes;
  }
  
+using TargetDevice = std::string;
+
+class FuncTestsCommon : public CommonTestUtils::TestsCommon {
+public:
+    virtual InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const;
+    virtual void Run();
+    virtual void Compare(const std::vector<std::uint8_t>& expected, const InferenceEngine::Blob::Ptr& actual);
+
+protected:
+     FuncTestsCommon();
+    ~FuncTestsCommon() override;
+
+    template<class T>
+    void Compare(const T* expected, const T* actual, std::size_t size, T threshold) {
+        for (std::size_t i = 0; i < size; ++i) {
+            const auto& ref = expected[i];
+            const auto& res = actual[i];
+
+            const auto absoluteDifference = std::abs(res - ref);
+            if (absoluteDifference <= threshold) {
+                continue;
+            }
+
+            const auto max = std::max(std::abs(res), std::abs(ref));
+            ASSERT_TRUE(max != 0 && ((absoluteDifference / max) <= threshold))
+                << "Relative comparison of values expected: " << ref << " and actual: " << res << " at index " << i << " with threshold " << threshold
+                << " failed";
+        }
+    }
+
+    TargetDevice targetDevice;
+    std::shared_ptr<ngraph::Function> function;
+    std::map<std::string, std::string> configuration;
+
+private:
+    void Configure() const;
+    void LoadNetwork();
+    void Infer();
+    std::vector<InferenceEngine::Blob::Ptr> GetOutputs();
+    void Validate();
+
+    InferenceEngine::Core* core = nullptr;
+    InferenceEngine::CNNNetwork cnnNetwork;
+    InferenceEngine::ExecutableNetwork executableNetwork;
+    InferenceEngine::InferRequest inferRequest;
+    std::vector<InferenceEngine::Blob::Ptr> inputs;
+};
+
  }  // namespace LayerTestsUtils
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.cpp

index 4f468ef..1f70469 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.hpp

index adb09d5..b222641 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/network_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/precision_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/precision_utils.hpp

index 222573e..3109de5 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/precision_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/precision_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/skip_tests_config.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/skip_tests_config.hpp

index 82d8e80..ff87edb 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/skip_tests_config.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/skip_tests_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp

index 750455a..00eb541 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp

index 4be8c5f..4373f07 100644 (file)
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt

index bf9b10f..5698f46 100644 (file)
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/CMakeLists.txt
@@ -9,6 +9,7 @@ list(APPEND EXPORT_DEPENDENCIES
          inference_engine_s
          inference_engine_preproc_s
          inference_engine_lp_transformations
+        inference_engine_ir_readers
          gmock)
  
  addIeTarget(
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_ie_imemory_state.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_ie_imemory_state.hpp

index fdd104b..62dc9de 100644 (file)
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_ie_imemory_state.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_ie_imemory_state.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_not_empty_icnn_network.hpp b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_not_empty_icnn_network.hpp

index 12129e2..05e86e8 100644 (file)
--- a/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_not_empty_icnn_network.hpp
+++ b/inference-engine/tests/ie_test_utils/unit_test_utils/mocks/mock_not_empty_icnn_network.hpp
@@ -63,6 +63,9 @@ public:
          inputs[MockNotEmptyICNNNetwork::INPUT_BLOB_NAME] = inputInfo;
      };
      void addLayer(const CNNLayerPtr& layer) noexcept override {}
+    std::shared_ptr<ngraph::Function> getFunction() noexcept override {
+        return nullptr;
+    }
      std::shared_ptr<const ngraph::Function> getFunction() const noexcept override {
          return nullptr;
      }
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp

index bdeb1a8..c9c73e2 100644 (file)
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -9,6 +9,7 @@
  
  #include <ngraph/opsets/opset1.hpp>
  #include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
  
  #include "ngraph_functions/utils/data_utils.hpp"
  
@@ -53,5 +54,16 @@ std::shared_ptr<ngraph::Node> makeSpaceToBatch(const ngraph::Output<Node> &in,
                                                 const std::vector<size_t> &blockShape,
                                                 const std::vector<size_t> &padsBegin,
                                                 const std::vector<size_t> &padsEnd);
+
+std::shared_ptr<ngraph::Node> makeStridedSlice(const ngraph::Output<Node> &in,
+                                               const std::vector<int64_t> &begin,
+                                               const std::vector<int64_t> &end,
+                                               const std::vector<int64_t> &stride,
+                                               const element::Type &type,
+                                               const std::vector<int64_t> &begin_mask,
+                                               const std::vector<int64_t> &end_mask,
+                                               const std::vector<int64_t> &new_axis_mask = std::vector<int64_t>{},
+                                               const std::vector<int64_t> &shrink_mask = std::vector<int64_t>{},
+                                               const std::vector<int64_t> &ellipsis_mask = std::vector<int64_t>{});
  }  // namespace builder
-}  // namespace ngraph
-\ No newline at end of file
+}  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/pass/convert_prc.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/pass/convert_prc.hpp

index c5d57cc..879c35b 100644 (file)
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/pass/convert_prc.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/pass/convert_prc.hpp
@@ -1,6 +1,18 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
  //
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
  
  #pragma once
  
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp

new file mode 100644 (file)

index 0000000..2032a66
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/subgraph_builders.hpp
@@ -0,0 +1,188 @@
+// Cngraph::opyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_precision.hpp>
+#include <functional_test_utils/precision_utils.hpp>
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+namespace subgraph {
+static std::shared_ptr<ngraph::Function> makeSplitConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20},
+                                                            InferenceEngine::Precision netPrecision = InferenceEngine::Precision::FP32) {
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
+
+    auto conv1 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1 = std::make_shared<ngraph::opset1::Relu>(conv1);
+
+    auto conv2 = ngraph::builder::makeConvolution(split->output(1), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2 = std::make_shared<ngraph::opset1::Relu>(conv2);
+
+    auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{relu1->output(0), relu2->output(0)}, 1);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat)};
+    std::shared_ptr<ngraph::Function> fnPtr = std::make_shared<ngraph::Function>(results, params);
+    return fnPtr;
+}
+
+static std::shared_ptr<ngraph::Function> makeSplitMultiConvConcat(std::vector<size_t> inputShape = {1, 4, 20, 20}) {
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(InferenceEngine::Precision::FP32);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+    auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
+
+    auto conv1_0 = ngraph::builder::makeConvolution(split->output(0), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1_0 = std::make_shared<ngraph::opset1::Relu>(conv1_0);
+    auto conv1_1 = ngraph::builder::makeConvolution(relu1_0, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1_1 = std::make_shared<ngraph::opset1::Relu>(conv1_1);
+    auto conv1_2 = ngraph::builder::makeConvolution(relu1_1, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1_2 = std::make_shared<ngraph::opset1::Relu>(conv1_2);
+    auto conv1_3 = ngraph::builder::makeConvolution(relu1_2, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1_3 = std::make_shared<ngraph::opset1::Relu>(conv1_3);
+    auto conv1_4 = ngraph::builder::makeConvolution(relu1_2, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu1_4 = std::make_shared<ngraph::opset1::Relu>(conv1_4);
+
+    auto conv2_0 = ngraph::builder::makeConvolution(split->output(1), ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2_0 = std::make_shared<ngraph::opset1::Relu>(conv2_0);
+    auto conv2_1 = ngraph::builder::makeConvolution(relu2_0, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2_1 = std::make_shared<ngraph::opset1::Relu>(conv2_1);
+    auto conv2_2 = ngraph::builder::makeConvolution(relu2_1, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2_2 = std::make_shared<ngraph::opset1::Relu>(conv2_2);
+    auto conv2_3 = ngraph::builder::makeConvolution(relu2_2, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2_3 = std::make_shared<ngraph::opset1::Relu>(conv2_3);
+    auto conv2_4 = ngraph::builder::makeConvolution(relu2_2, ngPrc, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                    ngraph::op::PadType::EXPLICIT, 5);
+    auto relu2_4 = std::make_shared<ngraph::opset1::Relu>(conv2_4);
+
+    auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{relu1_4->output(0), relu2_4->output(0)}, 1);
+    ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(concat)};
+    std::shared_ptr<ngraph::Function> fnPtr = std::make_shared<ngraph::Function>(results, params);
+    return fnPtr;
+}
+
+static std::shared_ptr<ngraph::Function>
+makeTIwithLSTMcell(InferenceEngine::Precision prc = InferenceEngine::Precision::FP32) {
+    auto ngPRC = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
+    // That which we iterate over
+    const size_t N = 32; // Batch size
+    const size_t L = 10; // Sequence length
+    const size_t I = 8;  // Input size
+    const size_t H = 32; // Hidden size
+    auto SENT = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, L, I});
+
+    auto H_init = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, 1, H});
+    auto C_init = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, 1, H});
+
+    auto H_t = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, 1, H});
+    auto C_t = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, 1, H});
+
+    // Body
+    auto X = std::make_shared<ngraph::opset1::Parameter>(ngPRC, ngraph::Shape{N, 1, I});
+    std::vector<uint64_t> dataW(4 * H * I, 0);
+    auto W_body = std::make_shared<ngraph::opset1::Constant>(ngPRC, ngraph::Shape{4 * H, I}, dataW);
+    std::vector<uint64_t> dataR(4 * H * H, 0);
+    auto R_body = std::make_shared<ngraph::opset1::Constant>(ngPRC, ngraph::Shape{4 * H, H}, dataR);
+    std::vector<uint64_t> inShape = {N, H};
+    auto constantH = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{2}, inShape);
+    inShape = {N, I};
+    auto constantX = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, ngraph::Shape{2}, inShape);
+    auto LSTM_cell =
+            std::make_shared<ngraph::opset1::LSTMCell>(std::make_shared<ngraph::opset1::Reshape>(X, constantX, false),
+                                                   std::make_shared<ngraph::opset1::Reshape>(H_t, constantH, false),
+                                                   std::make_shared<ngraph::opset1::Reshape>(C_t, constantH, false),
+                                                   W_body,
+                                                   R_body,
+                                                   H);
+    inShape = {N, 1, H};
+    auto constantHo = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, inShape);
+    auto H_o = std::make_shared<ngraph::opset1::Reshape>(LSTM_cell->output(0), constantHo, false);
+    auto C_o = std::make_shared<ngraph::opset1::Reshape>(LSTM_cell->output(1), constantHo, false);
+    auto body = std::make_shared<ngraph::op::TensorIterator::BodyLambda>(
+            ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t});
+
+    auto tensor_iterator = std::make_shared<ngraph::op::TensorIterator>();
+    tensor_iterator->set_body(body);
+    // start=0, stride=1, part_size=1, end=39, axis=1
+    tensor_iterator->set_sliced_input(X, SENT, 0, 1, 1, -1, 1);
+    // H_t is Hinit on the first iteration, Ho after that
+    tensor_iterator->set_merged_input(H_t, H_init, H_o);
+    tensor_iterator->set_merged_input(C_t, C_init, C_o);
+
+    // Output 0 is last Ho, result 0 of body
+    auto out0 = tensor_iterator->get_iter_value(H_o, -1);
+    // Output 1 is last Co, result 1 of body
+    auto out1 = tensor_iterator->get_iter_value(C_o, -1);
+
+    auto results = ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(out0),
+                                        std::make_shared<ngraph::opset1::Result>(out1)};
+    auto fn_ptr = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{SENT, H_init, C_init});
+    return fn_ptr;
+}
+
+static std::shared_ptr<ngraph::Function> makeSingleConv(std::vector<size_t> inputShape = {1, 3, 24, 24},
+                                                        InferenceEngine::Precision prc = InferenceEngine::Precision::FP32) {
+    ngraph::element::Type type = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
+    auto param0 = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(inputShape));
+    auto conv1 = ngraph::builder::makeConvolution(param0, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto result = std::make_shared<ngraph::opset1::Result>(conv1);
+    auto fn_ptr = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{param0});
+    return
+            fn_ptr;
+}
+
+static std::shared_ptr<ngraph::Function> makeMultiSingleConv(std::vector<size_t> inputShape = {1, 3, 24, 24}) {
+    ngraph::element::Type type = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(InferenceEngine::Precision::FP32);
+    auto param0 = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(inputShape));
+    auto conv1 = ngraph::builder::makeConvolution(param0, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv2 = ngraph::builder::makeConvolution(conv1, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv3 = ngraph::builder::makeConvolution(conv2, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv4 = ngraph::builder::makeConvolution(conv3, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv5 = ngraph::builder::makeConvolution(conv4, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv6 = ngraph::builder::makeConvolution(conv5, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv7 = ngraph::builder::makeConvolution(conv6, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv8 = ngraph::builder::makeConvolution(conv7, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                 ngraph::op::PadType::EXPLICIT, 5);
+    auto conv9 = ngraph::builder::makeConvolution(conv8, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto conv10 = ngraph::builder::makeConvolution(conv9, type, {3, 3}, {1, 1}, {0, 0}, {0, 0}, {1, 1},
+                                                  ngraph::op::PadType::EXPLICIT, 5);
+    auto result = std::make_shared<ngraph::opset1::Result>(conv10);
+    auto fn_ptr = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{param0});
+    return
+            fn_ptr;
+}
+
+static std::shared_ptr<ngraph::Function> make2InputSubtract(std::vector<size_t> inputShape = {1, 3, 24, 24},
+                                                            InferenceEngine::Precision prc = InferenceEngine::Precision::FP32) {
+    ngraph::element::Type type = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(prc);
+    auto param0 = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(inputShape));
+    auto param1 = std::make_shared<ngraph::opset1::Parameter>(type, ngraph::Shape(inputShape));
+    auto subtract = std::make_shared<ngraph::opset1::Subtract>(param0, param1);
+    auto result = std::make_shared<ngraph::opset1::Result>(subtract);
+    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{param0, param1});
+}
+}  // namespace subgraph
+}  // namespace builder
+}  // namespace ngraph
+\ No newline at end of file
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp

index 743400a..a33cd05 100644 (file)
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp

index ea8aff8..6fbdc07 100644 (file)
--- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporationconvert2OutputVector
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -12,12 +12,14 @@
  #include <vector>
  #include <memory>
  
+#include <ngraph/runtime/interpreter/int_backend_visibility.hpp>
  #include <ngraph/opsets/opset1.hpp>
  #include <ngraph/runtime/backend_manager.hpp>
-#include <ngraph/component_manager.hpp>
  #include <ngraph/runtime/backend.hpp>
  #include <ngraph/runtime/tensor.hpp>
  
+extern "C" INTERPRETER_BACKEND_API void ngraph_register_interpreter_backend();
+
  namespace ngraph {
  namespace helpers {
  
@@ -111,6 +113,7 @@ inferFnWithInterp(const std::shared_ptr<ngraph::Function> &fn,
      ngraph::runtime::Backend::set_backend_shared_library_search_directory("");
  
      ngraph_register_interpreter_backend();
+
      auto backend = ngraph::runtime::Backend::create("INTERPRETER");
  
      std::vector<std::shared_ptr<ngraph::runtime::Tensor>> inTensors;
@@ -138,5 +141,8 @@ inferFnWithInterp(const std::shared_ptr<ngraph::Function> &fn,
      }
      return outData;
  }
+
+std::vector<std::vector<std::uint8_t>> interpreterFunction(const std::shared_ptr<Function>& function, const std::vector<std::vector<std::uint8_t>>& inputs);
+
  }  // namespace helpers
  }  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/src/activation.cpp b/inference-engine/tests/ngraph_functions/src/activation.cpp

index aa692d1..9745164 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/activation.cpp
+++ b/inference-engine/tests/ngraph_functions/src/activation.cpp
@@ -1,7 +1,8 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
+
  #include <vector>
  #include <memory>
  
diff --git a/inference-engine/tests/ngraph_functions/src/batch_to_space.cpp b/inference-engine/tests/ngraph_functions/src/batch_to_space.cpp

index 135f097..6ce6860 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/batch_to_space.cpp
+++ b/inference-engine/tests/ngraph_functions/src/batch_to_space.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/src/constant.cpp b/inference-engine/tests/ngraph_functions/src/constant.cpp

index 5c5b47c..5600015 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/constant.cpp
+++ b/inference-engine/tests/ngraph_functions/src/constant.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/src/convolution.cpp b/inference-engine/tests/ngraph_functions/src/convolution.cpp

index 77094cb..34abb81 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/convolution.cpp
+++ b/inference-engine/tests/ngraph_functions/src/convolution.cpp
@@ -1,6 +1,7 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
+//
  
  #include <vector>
  #include <memory>
diff --git a/inference-engine/tests/ngraph_functions/src/params_vector.cpp b/inference-engine/tests/ngraph_functions/src/params_vector.cpp

index 6c6dce6..0ed3a35 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/params_vector.cpp
+++ b/inference-engine/tests/ngraph_functions/src/params_vector.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/src/space_to_batch.cpp b/inference-engine/tests/ngraph_functions/src/space_to_batch.cpp

index 964ede0..9e6b628 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/space_to_batch.cpp
+++ b/inference-engine/tests/ngraph_functions/src/space_to_batch.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/src/split.cpp b/inference-engine/tests/ngraph_functions/src/split.cpp

index 3d00a0f..afdcd75 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/split.cpp
+++ b/inference-engine/tests/ngraph_functions/src/split.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/ngraph_functions/src/strided_slice.cpp b/inference-engine/tests/ngraph_functions/src/strided_slice.cpp

new file mode 100644 (file)

index 0000000..5fdd48b
--- /dev/null
+++ b/inference-engine/tests/ngraph_functions/src/strided_slice.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+
+namespace ngraph {
+namespace builder {
+std::shared_ptr<ngraph::Node> makeStridedSlice(const ngraph::Output<Node> &in,
+                                               const std::vector<int64_t> &begin,
+                                               const std::vector<int64_t> &end,
+                                               const std::vector<int64_t> &stride,
+                                               const element::Type &type,
+                                               const std::vector<int64_t> &begin_mask,
+                                               const std::vector<int64_t> &end_mask,
+                                               const std::vector<int64_t> &new_axis_mask,
+                                               const std::vector<int64_t> &shrink_mask,
+                                               const std::vector<int64_t> &ellipsis_mask) {
+    ngraph::Shape constShape = {in.get_shape().size()};
+    auto beginNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, constShape, begin.data());
+    auto endNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, constShape, end.data());
+    auto strideNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::i64, constShape, stride.data());
+    auto ssNode = std::make_shared<ngraph::opset2::StridedSlice>(in, beginNode, endNode, strideNode, begin_mask, end_mask);
+    return ssNode;
+}
+
+}  // namespace builder
+}  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp

index cb9a594..fe91c4a 100644 (file)
--- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2019 Intel Corporationconvert2OutputVector
  // SPDX-License-Identifier: Apache-2.0
  //
  
@@ -7,6 +7,8 @@
  
  #include <ngraph/opsets/opset1.hpp>
  
+#include <ngraph_functions/utils/ngraph_helpers.hpp>
+
  namespace ngraph {
  namespace helpers {
  
@@ -20,13 +22,51 @@ ngraph::OutputVector convert2OutputVector(const std::vector<std::shared_ptr<ngra
      return outs;
  }
  
-template<class opType>
-ngraph::NodeVector castOps2Nodes(const std::vector<std::shared_ptr<opType>> &ops) {
-    ngraph::NodeVector nodes;
-    for (const auto &op : ops) {
-        nodes.push_back(std::dynamic_pointer_cast<ngraph::Node>(op));
+std::vector<std::vector<std::uint8_t>> interpreterFunction(const std::shared_ptr<Function>& function, const std::vector<std::vector<std::uint8_t>>& inputs) {
+    ngraph::runtime::Backend::set_backend_shared_library_search_directory("");
+    ngraph_register_interpreter_backend();
+    auto backend = ngraph::runtime::Backend::create("INTERPRETER");
+
+    const auto& parameters = function->get_parameters();
+    const auto& parametersNumber = parameters.size();
+    const auto& inputsNumber = inputs.size();
+    NGRAPH_CHECK(parametersNumber == inputsNumber,
+        "Got function (", function->get_friendly_name(), ") with ", parametersNumber, " parameters, but ", inputsNumber, " input blobs");
+
+    auto inputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
+    for (const auto& parameter : parameters) {
+        const auto& parameterIndex = function->get_parameter_index(parameter);
+        const auto& parameterShape = parameter->get_shape();
+        const auto& parameterType  = parameter->get_element_type();
+        const auto& parameterSize  = ngraph::shape_size(parameterShape) * parameterType.size();
+
+        const auto& input = inputs[parameterIndex];
+        const auto& inputSize = input.size();
+        NGRAPH_CHECK(parameterSize == inputSize,
+            "Got parameter (", parameter->get_friendly_name(), ") of size ", parameterSize, " bytes, but corresponding input with index ", parameterIndex,
+            " has ", inputSize, " bytes");
+
+        auto tensor = backend->create_tensor(parameterType, parameterShape);
+        tensor->write(input.data(), parameterSize);
+        inputTensors.push_back(tensor);
      }
-    return nodes;
+
+    auto outputTensors = std::vector<std::shared_ptr<runtime::Tensor>>{};
+    const auto& results = function->get_results();
+    std::transform(results.cbegin(), results.cend(), std::back_inserter(outputTensors), [&backend](const std::shared_ptr<op::Result>& result) {
+        return backend->create_tensor(result->get_element_type(), result->get_shape()); });
+
+    auto handle = backend->compile(function);
+    handle->call_with_validate(outputTensors, inputTensors);
+    auto outputs = std::vector<std::vector<std::uint8_t>>(results.size());
+    for (const auto& result : results) {
+        const auto& resultIndex = function->get_result_index(result);
+        auto& output = outputs[resultIndex];
+        output.resize(ngraph::shape_size(result->get_shape()) * result->get_element_type().size());
+        outputTensors[resultIndex]->read(output.data(), output.size());
+    }
+
+    return outputs;
  }
  
  }  // namespace helpers
diff --git a/inference-engine/tests/unit/CMakeLists.txt b/inference-engine/tests/unit/CMakeLists.txt

index 79ceb78..7769b67 100644 (file)
--- a/inference-engine/tests/unit/CMakeLists.txt
+++ b/inference-engine/tests/unit/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 Intel Corporation
+# Copyright (C) 2018-2020 Intel Corporation
  # SPDX-License-Identifier: Apache-2.0
  #
  
diff --git a/inference-engine/tests/unit/gna/gna_api_stub.cpp b/inference-engine/tests/unit/gna/gna_api_stub.cpp

index 00abeb5..f11f274 100644 (file)
--- a/inference-engine/tests/unit/gna/gna_api_stub.cpp
+++ b/inference-engine/tests/unit/gna/gna_api_stub.cpp
@@ -1,7 +1,6 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
-
  #define INTEL_GNA_DLLEXPORT 1
  
  #if GNA_LIB_VER == 1
diff --git a/inference-engine/tests/unit/gna/gna_mock_api.hpp b/inference-engine/tests/unit/gna/gna_mock_api.hpp

index 9b335c8..d94c8c1 100644 (file)
--- a/inference-engine/tests/unit/gna/gna_mock_api.hpp
+++ b/inference-engine/tests/unit/gna/gna_mock_api.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp

new file mode 100644 (file)

index 0000000..fb0ac0e
--- /dev/null
+++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
@@ -0,0 +1,176 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gna/gna_config.hpp>
+#include "gna_plugin_config.hpp"
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <map>
+
+using namespace InferenceEngine;
+using namespace GNAPluginNS;
+
+const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
+    {GNA_CONFIG_KEY(SCALE_FACTOR), "1.000000"},
+    {GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
+    {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), ""},
+    {GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), ""},
+    {GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT},
+    {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(YES)},
+    {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
+    {GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name()},
+    {GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(NO)},
+    {CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
+    {GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
+    {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
+};
+
+class GNAPluginConfigTest : public ::testing::Test {
+protected:
+    Config config;
+    void SetAndCompare(const std::string& key, const std::string& val) {
+        config.UpdateFromMap({{key, val}});
+        EXPECT_EQ(config.GetParameter(key), val);
+    }
+    void ExpectThrow(const std::string& key, const std::string& val) {
+        EXPECT_THROW(config.UpdateFromMap({{key, val}}),
+                     details::InferenceEngineException);
+    }
+    void SetAndCheckFlag(const std::string& key, bool& val, bool reverse = false) {
+        const bool yes = reverse ? false : true;
+        const bool no = !yes;
+        SetAndCompare(key, CONFIG_VALUE(YES));
+        EXPECT_EQ(val, yes);
+        SetAndCompare(key, CONFIG_VALUE(NO));
+        EXPECT_EQ(val, no);
+        SetAndCompare(key, CONFIG_VALUE(YES));
+        EXPECT_EQ(val, yes);
+        ExpectThrow(key, "abc");
+        ExpectThrow(key, "");
+    }
+};
+
+TEST_F(GNAPluginConfigTest, GnaConfigDefaultConfigIsExpected) {
+    ASSERT_EQ(config.key_config_map, supportedConfigKeysWithDefaults);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigScaleFactorTest) {
+    config.UpdateFromMap({{GNA_CONFIG_KEY(SCALE_FACTOR), std::string("34")}});
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR)), std::string("34.000000"));
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0")), std::string("34.000000"));
+    EXPECT_EQ(config.inputScaleFactors.size(), 1);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[0], 34.0);
+
+    config.UpdateFromMap({{GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_3"), std::string("15.2")}});
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR)), std::string("34.000000"));
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0")), std::string("34.000000"));
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_1")), std::string("1.000000"));
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_2")), std::string("1.000000"));
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_3")), std::string("15.200000"));
+    EXPECT_EQ(config.inputScaleFactors.size(), 4);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[0], 34.0);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[1], 1.0);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[2], 1.0);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[3], 15.2);
+
+    config.UpdateFromMap({{GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_9"), std::string("8.43")}});
+    EXPECT_EQ(config.GetParameter(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_9")), std::string("8.430000"));
+    EXPECT_EQ(config.inputScaleFactors.size(), 10);
+    EXPECT_FLOAT_EQ(config.inputScaleFactors[9], 8.43);
+
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_10"), std::string("8.43"));
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("&1"), std::string("8.43"));
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_"), std::string("8.43"));
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("abs"), std::string("8.43"));
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR), std::string("abc"));
+    ExpectThrow(GNA_CONFIG_KEY(SCALE_FACTOR), std::string("0"));
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigFirmwareModelImageTest) {
+    SetAndCompare(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), "abc");
+    EXPECT_EQ(config.dumpXNNPath, "abc");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigFirmwareModelImageGeneratorTest) {
+    SetAndCompare(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION), "def");
+    EXPECT_EQ(config.dumpXNNGeneration, "def");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
+    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW);
+#if GNA_LIB_VER == 1
+    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_HARDWARE));
+#else
+    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
+    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+#endif
+    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
+#if GNA_LIB_VER == 1
+    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE));
+#else
+    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
+    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+#endif
+    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT);
+#if GNA_LIB_VER == 1
+    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE));
+#else
+    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeSoftware);
+    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersion1_0);
+#endif
+    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_AUTO);
+#if GNA_LIB_VER == 1
+    EXPECT_EQ(config.gna_proc_type, static_cast<intel_gna_proc_t>(GNA_AUTO));
+#else
+    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeAuto);
+    EXPECT_EQ(config.pluginGna2DeviceConsistent, Gna2DeviceVersionSoftwareEmulation);
+#endif
+    ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "");
+    ExpectThrow(GNA_CONFIG_KEY(DEVICE_MODE), "abc");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigCompactMode) {
+    SetAndCheckFlag(GNA_CONFIG_KEY(COMPACT_MODE),
+                    config.gnaFlags.compact_mode);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigExclusiveAsyncRequestTest) {
+    SetAndCheckFlag(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+                    config.gnaFlags.exclusive_async_requests);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigPrecisionTest) {
+    SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I8).name());
+    EXPECT_EQ(config.gnaPrecision, Precision::I8);
+    SetAndCompare(GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name());
+    EXPECT_EQ(config.gnaPrecision, Precision::I16);
+    ExpectThrow(GNA_CONFIG_KEY(PRECISION), Precision(Precision::FP32).name());
+    ExpectThrow(GNA_CONFIG_KEY(PRECISION), "");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigPwlUniformDesignTest) {
+    SetAndCheckFlag(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN),
+                    config.gnaFlags.uniformPwlDesign);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
+    SetAndCheckFlag(CONFIG_KEY(PERF_COUNT),
+                    config.gnaFlags.performance_counting);
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigLibNThreadsTest) {
+    SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "2");
+    EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 2);
+    SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "25");
+    EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 25);
+    ExpectThrow(GNA_CONFIG_KEY(LIB_N_THREADS), "");
+    ExpectThrow(GNA_CONFIG_KEY(LIB_N_THREADS), "0");
+    ExpectThrow(GNA_CONFIG_KEY(LIB_N_THREADS), "128");
+    ExpectThrow(GNA_CONFIG_KEY(LIB_N_THREADS), "abc");
+}
+
+TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
+    SetAndCheckFlag(CONFIG_KEY(SINGLE_THREAD),
+                    config.gnaFlags.gna_openmp_multithreading,
+                    true);
+}
diff --git a/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp b/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp

index 6c76eaa..ec3416e 100644 (file)
--- a/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/ie_exception_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/unit/inference_engine/system_allocator_test.cpp b/inference-engine/tests/unit/inference_engine/system_allocator_test.cpp

index 7f3be7f..c0b0380 100644 (file)
--- a/inference-engine/tests/unit/inference_engine/system_allocator_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/system_allocator_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests/unit/vpu/heap_test.cpp b/inference-engine/tests/unit/vpu/heap_test.cpp

index d8c6097..d93df51 100644 (file)
--- a/inference-engine/tests/unit/vpu/heap_test.cpp
+++ b/inference-engine/tests/unit/vpu/heap_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2020 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
  // SPDX-License-Identifier: Apache-2.0
  //
  
diff --git a/inference-engine/tests_deprecated/helpers/CMakeLists.txt b/inference-engine/tests_deprecated/helpers/CMakeLists.txt

index 14265fa..281f283 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/helpers/CMakeLists.txt
@@ -2,7 +2,7 @@
  # SPDX-License-Identifier: Apache-2.0
  #
  
-set(TARGET_NAME helpers)
+set(TARGET_NAME ieTestHelpers)
  
  file(GLOB HELPERS_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
@@ -15,13 +15,13 @@ file (GLOB HELPERS_INCLUDES
  
  ## Enable Models multiple search pathes
  message("configuring file: ${CMAKE_CURRENT_BINARY_DIR}/test_model_repo.h")
-configure_file(test_model_repo.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/test_model_repo.hpp @ONLY)
  
  function(add_helpers target_name)
      add_library(${target_name} STATIC ${HELPERS_SRC})
  
      target_include_directories(${target_name} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
                                                       "${IE_MAIN_SOURCE_DIR}/src/inference_engine"
+                                                     $<TARGET_PROPERTY:inference_engine_ir_readers,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:inference_engine_lp_transformations,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:pugixml,INTERFACE_INCLUDE_DIRECTORIES>
                                                       "${IE_MAIN_SOURCE_DIR}/src/vpu/"
@@ -31,9 +31,6 @@ function(add_helpers target_name)
      target_include_directories(${target_name} PUBLIC
          "${IE_MAIN_SOURCE_DIR}/samples/common/os/windows")
  
-    target_compile_definitions(${target_name} PUBLIC ${ARGV}
-        MODELS_PATH=\"${MODELS_PATH}\" DATA_PATH=\"${VALIDATION_SET}\")
-
      set_property(TARGET ${target_name} PROPERTY COMPILE_PDB_NAME ${target_name})
  
      # add_cpplint_target(${target_name}_cpplint FOR_TARGETS ${target_name})
diff --git a/inference-engine/tests_deprecated/helpers/single_layer_common.cpp b/inference-engine/tests_deprecated/helpers/single_layer_common.cpp

index 9297160..a37f921 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/single_layer_common.cpp
+++ b/inference-engine/tests_deprecated/helpers/single_layer_common.cpp
@@ -66,20 +66,29 @@ BufferWrapper::BufferWrapper(const Blob::Ptr& blob, Precision _precision) : prec
          fp16_ptr = blob->buffer().as<ie_fp16*>();
      } else if (precision == Precision::FP32) {
          fp32_ptr = blob->buffer().as<float*>();
+    } else if (precision == Precision::I32) {
+        i32_ptr = blob->buffer().as<int32_t*>();
      } else {
          THROW_IE_EXCEPTION << "Unsupported precision for compare: " << precision;
      }
  }
  
  float BufferWrapper::operator[](size_t index) {
-    if (precision == Precision::FP16) return PrecisionUtils::f16tof32(fp16_ptr[index]);
+    if (precision == Precision::FP16) {
+        return PrecisionUtils::f16tof32(fp16_ptr[index]);
+    } else if (precision == Precision::I32) {
+        return i32_ptr[index];
+    }
      return fp32_ptr[index];
  }
  
  void BufferWrapper::insert(size_t index, float value) {
      if (precision == Precision::FP16) {
          fp16_ptr[index] = PrecisionUtils::f32tof16(value);
-    } else {
+    } else if (precision == Precision::I32) {
+        i32_ptr[index] = value;
+    }
+    else {
          fp32_ptr[index] = value;
      }
  }
diff --git a/inference-engine/tests_deprecated/helpers/single_layer_common.hpp b/inference-engine/tests_deprecated/helpers/single_layer_common.hpp

index 540bb4a..27ef66b 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/single_layer_common.hpp
+++ b/inference-engine/tests_deprecated/helpers/single_layer_common.hpp
@@ -127,6 +127,7 @@ class BufferWrapper {
      InferenceEngine::Precision precision;
      InferenceEngine::ie_fp16 *fp16_ptr;
      float *fp32_ptr;
+    int32_t *i32_ptr;
  public:
      explicit BufferWrapper(const InferenceEngine::Blob::Ptr &blob);
  
diff --git a/inference-engine/tests_deprecated/helpers/test_models_path.cpp b/inference-engine/tests_deprecated/helpers/test_model_path.cpp

similarity index 90%

rename from inference-engine/tests_deprecated/helpers/test_models_path.cpp

rename to inference-engine/tests_deprecated/helpers/test_model_path.cpp

index 4596edc..30e5698 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/test_models_path.cpp
+++ b/inference-engine/tests_deprecated/helpers/test_model_path.cpp
@@ -38,27 +38,8 @@ static std::string getDirname(std::string filePath) {
  }
  #endif
  
-const char* getModelPathNonFatal() noexcept {
-#ifdef MODELS_PATH
-    const char* models_path = std::getenv("MODELS_PATH");
-
-    if (models_path == nullptr && MODELS_PATH == nullptr) {
-        return nullptr;
-    }
-
-    if (models_path == nullptr) {
-        return MODELS_PATH;
-    }
-
-    return models_path;
-#else
-    return nullptr;
-#endif
-}
-
-
  static std::string get_models_path() {
-    const char* models_path = getModelPathNonFatal();
+    const char* models_path = TestDataHelpers::getModelPathNonFatal();
  
      if (nullptr == models_path) {
          ::testing::AssertionFailure() << "MODELS_PATH not defined";
@@ -97,10 +78,10 @@ static std::vector<std::string> getModelsDirs() {
  }
  
  ModelsPath::operator std::string() const {
-
      std::vector<std::string> absModelsPath;
      for (auto & path  : getModelsDirs()) {
-        const auto absPath = get_models_path() + kPathSeparator + "src" + kPathSeparator + path + _rel_path.str();
+        std::string b = get_models_path();
+        const auto absPath = get_models_path() + kPathSeparator + path + _rel_path.str();
          absModelsPath.push_back(absPath);
          if (exist(absPath)) {
              return absPath;
diff --git a/inference-engine/tests_deprecated/helpers/test_model_repo.hpp b/inference-engine/tests_deprecated/helpers/test_model_repo.hpp

new file mode 100644 (file)

index 0000000..2725631
--- /dev/null
+++ b/inference-engine/tests_deprecated/helpers/test_model_repo.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <string>
+
+std::string get_model_repo();
+
+namespace TestDataHelpers {
+
+const char *getModelPathNonFatal() noexcept;
+
+std::string get_data_path();
+
+inline const char *getModelPathNonFatalDefault() noexcept {
+#ifdef MODELS_PATH
+    const char *models_path = std::getenv("MODELS_PATH");
+
+    if (models_path == nullptr && MODELS_PATH == nullptr) {
+        return nullptr;
+    }
+
+    if (models_path == nullptr) {
+        return MODELS_PATH;
+    }
+
+    return models_path;
+#else
+    return nullptr;
+#endif
+};
+
+inline std::string get_data_path_default() {
+#ifdef DATA_PATH
+    const char *data_path = std::getenv("DATA_PATH");
+
+    if (data_path == NULL) {
+        if (DATA_PATH != NULL) {
+            data_path = DATA_PATH;
+        } else {
+            return nullptr;
+        }
+    }
+    return std::string(data_path);
+#else
+    return nullptr;
+#endif
+}
+}  // namespace TestDataHelpers
diff --git a/inference-engine/tests_deprecated/helpers/test_model_repo.hpp.in b/inference-engine/tests_deprecated/helpers/test_model_repo.hpp.in

deleted file mode 100644 (file)

index 3f7ad56..0000000
--- a/inference-engine/tests_deprecated/helpers/test_model_repo.hpp.in
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <string>
-std::string get_model_repo() {
-    return "@MODELS_LST@";
-}
-\ No newline at end of file
diff --git a/inference-engine/tests_deprecated/helpers/tests_common.cpp b/inference-engine/tests_deprecated/helpers/tests_common.cpp

index 57b6f6b..c7a8150 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/tests_common.cpp
+++ b/inference-engine/tests_deprecated/helpers/tests_common.cpp
@@ -173,7 +173,7 @@ static std::vector<std::shared_ptr<BaseTestCreator>>& getCreators() {
              std::make_shared<LayerTestCreator<InferenceEngine::ReduceLayer>>("ReduceSumSquare"),
              std::make_shared<LayerTestCreator<InferenceEngine::TopKLayer>>("TopK"),
              std::make_shared<LayerTestCreator<InferenceEngine::NonMaxSuppressionLayer>>("NonMaxSuppression"),
-            std::make_shared<LayerTestCreator<InferenceEngine::ScatterLayer>>("ScatterUpdate")
+            std::make_shared<LayerTestCreator<InferenceEngine::ScatterUpdateLayer>>("ScatterUpdate")
      };
      return creators;
  }
diff --git a/inference-engine/tests_deprecated/helpers/tests_common.hpp b/inference-engine/tests_deprecated/helpers/tests_common.hpp

index 03b027e..cbbd110 100644 (file)
--- a/inference-engine/tests_deprecated/helpers/tests_common.hpp
+++ b/inference-engine/tests_deprecated/helpers/tests_common.hpp
@@ -20,6 +20,7 @@
  #include <ie_input_info.hpp>
  #include <ie_icnn_network.hpp>
  
+#include "test_model_repo.hpp"
  #include "test_model_path.hpp"
  #include <tests_file_utils.hpp>
  #include <chrono>
@@ -41,11 +42,14 @@ inline std::string to_string_c_locale(T value) {
  class TestsCommon : public ::testing::Test {
  public:
      IE_SUPPRESS_DEPRECATED_START
-    static InferenceEngine::CNNLayer::Ptr createLayer(const std::string& type);
+
+    static InferenceEngine::CNNLayer::Ptr createLayer(const std::string &type);
+
      IE_SUPPRESS_DEPRECATED_END
  
  protected:
      void SetUp() override;
+
      void TearDown() override;
  
  public:
@@ -53,19 +57,6 @@ public:
          return make_plugin_name("mock_engine");
      }
  
-    static std::string get_data_path(){
-        const char* data_path = std::getenv("DATA_PATH");
-
-        if (data_path == NULL){
-            if(DATA_PATH != NULL){
-                data_path = DATA_PATH;
-            } else{
-                ::testing::AssertionFailure()<<"DATA_PATH not defined";
-            }
-        }
-        return std::string(data_path);
-    }
-
      static std::string make_so_name(const std::string & input) {
          return CommonTestUtils::pre + input + IE_BUILD_POSTFIX + CommonTestUtils::ext;
      }
diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt

index 21fef55..1a968f7 100644 (file)
--- a/inference-engine/tests_deprecated/unit/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt
@@ -157,15 +157,16 @@ endif ()
  
  target_link_libraries(${TARGET_NAME} PRIVATE
      # static libraries
+    inference_engine_s # need to have this explicitly for USE_STATIC_IE
      unitTestUtils
-    helpers_s
+    ieTestHelpers_s
      ${GNA_TEST_ENGINE}
  
      # dynamic libraries
      inference_engine_lp_transformations
+    inference_engine_ir_readers
      inference_engine_transformations
-    ${CMAKE_DL_LIBS}
-    )
+    ${CMAKE_DL_LIBS})
  
  if(TARGET libGNAStubs)
      target_link_libraries(${TARGET_NAME} PRIVATE libGNAStubs)
@@ -175,6 +176,11 @@ if (ENABLE_MKL_DNN)
      target_link_libraries(${TARGET_NAME} PRIVATE mkldnn)
  endif ()
  
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
+endif()
+
  add_test(NAME ${TARGET_NAME}
          COMMAND ${TARGET_NAME})
  
diff --git a/inference-engine/tests_deprecated/unit/cnn_network/v2_format_parser_test.cpp b/inference-engine/tests_deprecated/unit/cnn_network/v2_format_parser_test.cpp

index 85f5975..6edab00 100644 (file)
--- a/inference-engine/tests_deprecated/unit/cnn_network/v2_format_parser_test.cpp
+++ b/inference-engine/tests_deprecated/unit/cnn_network/v2_format_parser_test.cpp
@@ -29,7 +29,8 @@ TEST_F (V2FormatParserTest, invalidXml_ShouldThrow) {
              .node("net")
              .attr("name", "AlexNet").attr("version", 2);
  
-    ASSERT_THROW(parse(content), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    ASSERT_THROW(parse(content), std::exception);
  }
  
  TEST_F (V2FormatParserTest, canParseDims) {
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp

index 7958f60..a4e2f86 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@@ -324,6 +324,9 @@ void GNAPropagateMatcher :: match() {
                      case GnaPluginTestEnvironment::matchAffineWeights:
                          HasWeightsEq(combined, _env.transposedData);
                          break;
+                    case GnaPluginTestEnvironment::matchAffineWeightsSize:
+                        HasWeightsSizeEq(combined, _env.matched_weight_size);
+                        break;
                      case GnaPluginTestEnvironment::saveAffineWeights:
                          SaveWeights(combined, _env.transposedData, _env.transposedArgsForSaving);
                          break;
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp

index ee07ca2..a96224e 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@@ -71,7 +71,8 @@ class GnaPluginTestEnvironment {
          fillOutputValues,
          matchAffineWeightsTranspose,
          matchAffineWeights,
-        saveAffineWeights
+        matchAffineWeightsSize,
+        saveAffineWeights,
      };
      enum {
          kUnset = -1,
@@ -113,6 +114,8 @@ class GnaPluginTestEnvironment {
      std::pair<int, int> transposedArgsForSaving;
      std::vector<uint16_t>* transposedData;
      std::vector<DnnActivationType> pwlsToMatchWith;
+    size_t matched_weight_size = 0;
+    size_t nCopyLayersToMatch = -1;
  };
  
  class GNATestBase {
@@ -162,6 +165,14 @@ class GNATestConfigurability : public GNATestBase{
          _env.config[keyName] = ss.str();
          return *dynamic_cast<T*>(this);
      }
+    T & onCPU() {
+        _env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = GNA_CONFIG_VALUE(SW_FP32);
+        return *dynamic_cast<T*>(this);
+    }
+    T & withPolicy(GNAPluginNS::Policy::ConcatAlignment concatAlignmentPolicy) {
+        _env.policy.ConcatAlignmentPolicy = concatAlignmentPolicy;
+        return *dynamic_cast<T*>(this);
+    }
      T & withGNADeviceMode(std::string value) {
          _env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = value;
          return *dynamic_cast<T*>(this);
@@ -210,10 +221,6 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
          return *this;
      }
  
-    GNAPropagateMatcher & And() {
-        return *this;
-    }
-
      GNAPropagateMatcher & that() {
          return *this;
      }
@@ -268,7 +275,6 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
          return *this;
      }
  
-
      GNAPropagateMatcher & once() {
          return times(1);
      }
@@ -356,7 +362,6 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
          return *this;
      }
  
-
      GNAPropagateMatcher & affine_weights_transpozed(std::pair<int, int> &&transpozedArgs) {
          getMatcher().type = GnaPluginTestEnvironment::saveAffineWeights;
          _env.transposedArgsForSaving = std::move(transpozedArgs);
@@ -420,11 +425,6 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
          return *this;
      }
  
-    GNAPropagateMatcher & onCPU() {
-        _env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = GNA_CONFIG_VALUE(SW_FP32);
-        return *this;
-    }
-
   protected:
      void match();
      intel_nnet_type_t * original_nnet = nullptr;
@@ -513,6 +513,26 @@ class GNAQueryStateMatcher : public GNADumpXNNMatcher {
      void match();
  };
  
+/**
+ * @brief weights matcher has specific weights matching methods
+ */
+class GNAWeightsMatcher : public GNAPropagateMatcher {
+ public:
+    using base = GNAPropagateMatcher;
+    using base::base;
+
+    GNAWeightsMatcher & size() {
+        getMatcher().type = GnaPluginTestEnvironment::matchAffineWeightsSize;
+        return *this;
+    }
+    GNAWeightsMatcher & equals_to(size_t weights_size) {
+        if (getMatcher().type == GnaPluginTestEnvironment::matchAffineWeightsSize) {
+            _env.matched_weight_size = weights_size;
+        }
+        return *this;
+    }
+};
+
  
  
  /**
@@ -581,6 +601,16 @@ class GNATest : public U, public GNATestConfigurability<GNATest<U>>  {
          _env.model = _model;
          return *this;
      }
+    GNATest & afterLoadingModel(std::shared_ptr<ngraph::Function> ngraph_model) {
+        _env.ngraph_model = ngraph_model;
+        return *this;
+    }
+
+    GNAWeightsMatcher & affine_weights() {
+        returnedMatchers.push_back(std::make_shared<GNAWeightsMatcher>(_env));
+        _env = GnaPluginTestEnvironment();
+        return dynamic_cast<GNAWeightsMatcher&>(*returnedMatchers.back());
+    }
  
      GNAQueryStateMatcher & queryState() {
          returnedMatchers.push_back(std::make_shared<GNAQueryStateMatcher>(_env));
@@ -597,10 +627,12 @@ class GNATest : public U, public GNATestConfigurability<GNATest<U>>  {
          _env = GnaPluginTestEnvironment();
          return dynamic_cast<GNAPropagateMatcher&>(*returnedMatchers.back());
      }
+
      GNATest & importedFrom(std::string fileName) {
          _env.importedModelFileName = fileName;
          return *this;
      }
+
      GNATest & onInferModel(std::string _model = "",
                             std::function<void (InferenceEngine::CNNNetwork &)> _cb = [](InferenceEngine::CNNNetwork & net){}) {
          _env.model = _model;
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/layers/gna_align_filter2_tests.cpp b/inference-engine/tests_deprecated/unit/engines/gna/layers/gna_align_filter2_tests.cpp

new file mode 100644 (file)

index 0000000..e12fecd
--- /dev/null
+++ b/inference-engine/tests_deprecated/unit/engines/gna/layers/gna_align_filter2_tests.cpp
@@ -0,0 +1,184 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <tuple>
+#include <vector>
+#include <gtest/gtest.h>
+#include <single_layer_common.hpp>
+#include <ngraph/op/parameter.hpp>
+#include <ngraph/ops.hpp>
+#include <ie_precision.hpp>
+#include "../gna_matcher.hpp"
+
+using GNAAlignFilterTestParams  = std::tuple<InferenceEngine::Precision, GNAPluginNS::Policy::ConcatAlignment, std::size_t, std::size_t>;
+using namespace GNAPluginNS;
+
+class GNAAlignFilterTest : public GNATest<>,
+                             public testing::WithParamInterface<GNAAlignFilterTestParams> {
+ public:
+
+    static std::string getTestName(const testing::TestParamInfo<GNAAlignFilterTestParams>& params) {
+        std::string test_name;
+        if (std::get<1>(params.param) == GNAPluginNS::Policy::ConcatAlignment::FAST) {
+            test_name += "fast_";
+        }
+        test_name += "concat_of(" + std::to_string(std::get<2>(params.param));
+        test_name += "_" + std::to_string(std::get<3>(params.param));
+        test_name += ")_on_";
+        test_name += std::get<0>(params.param).name();
+        return test_name;
+    }
+
+ protected:
+
+    InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
+    std::size_t concat_inputs[2];
+    GNAPluginNS::Policy::ConcatAlignment alignmentPolicy;
+
+    void SetUp() override {
+        std::tie(precision, alignmentPolicy, concat_inputs[0], concat_inputs[1]) = GetParam();
+    }
+
+    std::shared_ptr<ngraph::Function> getNgraphModel() {
+        auto input0 = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{1, concat_inputs[0]});
+        auto input1 = std::make_shared<ngraph::op::Parameter>(ngraph::element::f32, ngraph::Shape{1, concat_inputs[1]});
+
+        auto relu0 = std::make_shared<ngraph::op::v0::Relu>(input0);
+        auto relu1 = std::make_shared<ngraph::op::v0::Relu>(input1);
+
+        auto concat = std::make_shared<ngraph::op::Concat>(ngraph::NodeVector{relu0, relu1}, 1);
+
+        auto relu3 = std::make_shared<ngraph::op::v0::Relu>(concat);
+
+        auto function = std::make_shared<ngraph::Function>(ngraph::NodeVector{relu3}, ngraph::ParameterVector{input0, input1});
+        return function;
+    }
+};
+
+TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_Small_mem_footprint) {
+
+    auto ngraf = getNgraphModel();
+    if (precision == InferenceEngine::Precision::FP32) {
+        GTEST_SKIP() << "FP32 case - won't produce gna primitives";
+    }
+
+    // calc expected weight size
+    size_t expected_affine_size = 0;
+    size_t expected_copy_layers = 0;
+
+    auto getFastAffineFilterParams = [](size_t sz) -> std::pair<size_t, size_t> {
+        //align first input by 8
+        auto copy_N = sz > 32 ? 1 : 0; // number of copy layers
+        auto firstFilter_frac = sz % 32;
+        auto firstFilter_N = ALIGN(firstFilter_frac, 8);
+
+        return {copy_N, firstFilter_N   * firstFilter_frac};
+    };
+
+    auto getNumCopyElements = [&getFastAffineFilterParams](size_t sz) {
+        return getFastAffineFilterParams(sz).first;
+    };
+    auto getsNumFilterWeights = [&getFastAffineFilterParams](size_t sz) {
+        return getFastAffineFilterParams(sz).second;
+    };
+
+    switch(alignmentPolicy) {
+        case  Policy::ConcatAlignment::ENABLED : {
+            //align first input by 8
+            auto firstFilter = ALIGN(concat_inputs[0], 8) * concat_inputs[0];
+            //align first input by 8
+            auto extraLeftElementsForSecond = concat_inputs[0] + 32 - ALIGN(concat_inputs[0], 32);
+
+            auto secondFilter = ALIGN(concat_inputs[1], 8) * (extraLeftElementsForSecond + concat_inputs[1]);
+
+            expected_affine_size = firstFilter + secondFilter;
+            break;
+        }
+        case   Policy::ConcatAlignment::FAST  : {
+
+            expected_copy_layers = getNumCopyElements(concat_inputs[0]);
+            expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
+
+            // calculation size for second filter
+            auto offset = ALIGN(concat_inputs[0], 32) - 32;
+            auto zerolen = concat_inputs[0] - offset;
+            auto second_output_len = zerolen + concat_inputs[1];
+
+            expected_affine_size += second_output_len  * ALIGN(concat_inputs[1], 8);
+            break;
+        }
+
+        default : {
+            FAIL() << "unsupported align policy: " << alignmentPolicy;
+        }
+    }
+
+    assert_that().onInferNgraphModel(ngraf)
+        .inNotCompactMode()
+        .withPolicy(alignmentPolicy)
+        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
+        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
+        .withGNAConfig(GNA_CONFIG_KEY(PRECISION), precision.name())
+        .gna()
+        .affine_weights()
+        .size()
+        .equals_to(expected_affine_size)
+        .And()
+        .copy_inserted_into_nnet()
+        .times(expected_copy_layers);
+}
+
+TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
+    auto ngraf = getNgraphModel();
+    if (precision == InferenceEngine::Precision::FP32) {
+        std::vector<std::vector<float>> input_data;
+        float start_value = 1.0;
+
+        for (auto dim : concat_inputs) {
+            if (dim > 0) {
+                input_data.push_back(std::vector<float>(dim));
+
+                std::iota(input_data.back().begin(), input_data.back().end(), start_value);
+                start_value += dim;
+            }
+        }
+
+        std::vector<float> expected_result(static_cast<size_t>(start_value - 1));
+        start_value = 1.0;
+        std::iota(expected_result.begin(), expected_result.end(), start_value);
+        assert_that().onInferNgraphModel(ngraf)
+            .inNotCompactMode()
+            .gna()
+            .propagate_forward()
+            .onCPU()
+            .withPolicy(alignmentPolicy)
+            .called_with()
+            .input(ngraf->get_parameters().at(0)->get_name(), input_data[0])
+            .input(ngraf->get_parameters().at(1)->get_name(), input_data[1])
+            .equals_to(expected_result);
+    } else {
+        assert_that().onInferNgraphModel(ngraf)
+            .inNotCompactMode()
+            .gna()
+            .withPolicy(alignmentPolicy)
+            .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
+            .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
+            .withGNAConfig(GNA_CONFIG_KEY(PRECISION), "I16")
+            .propagate_forward()
+            .called();
+    }
+}
+
+INSTANTIATE_TEST_CASE_P(
+    GNALayerTests,
+    GNAAlignFilterTest,
+    testing::Combine(
+    testing::Values(InferenceEngine::Precision::FP32, InferenceEngine::Precision::I16),
+    //fast or not fast alignment policy
+    testing::Values(GNAPluginNS::Policy::ConcatAlignment::FAST, GNAPluginNS::Policy::ConcatAlignment::ENABLED),
+    // Size of first Split layer output
+    testing::Values(31, 49),
+    // Size of second Split layer output
+    testing::Values(31, 73)),
+    GNAAlignFilterTest::getTestName);
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp

index a3f9d43..d75fe1c 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
@@ -7,19 +7,36 @@
  class CopyLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
      bool matchInserted;
      const int matchQuantity;
+    mutable int actualNumberOfCopyLayers;
   public:
      CopyLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
      bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
          if (foo == nullptr)
              return false;
+        actualNumberOfCopyLayers = 0;
+
          for(int i = 0; i < foo->nLayers; i++) {
              if (foo->pLayers[i].nLayerKind != INTEL_COPY) continue;
-            return matchInserted;
+
+            if (!matchInserted) {
+                return false;
+            }
+            actualNumberOfCopyLayers ++;
+        }
+        if (matchQuantity == -1) {
+            if (actualNumberOfCopyLayers > 0) {
+                return true;
+            }
+            return false;
+        }
+        if (actualNumberOfCopyLayers != matchQuantity) {
+            return false;
          }
-        return !matchInserted;
+        return true;
      };
      void DescribeTo(::std::ostream *os) const override {
-        *os << "should "<< (matchInserted ? "" : "not ") << "have Copy primitive as part of nnet structure";
+        *os << "should "<< (matchInserted ? "" : "not ") << "have " << (matchInserted ? std::to_string(matchQuantity) : "" )
+            << " Copy primitives as part of nnet structure" << (matchInserted ? std::string(" but was only: ") + std::to_string(actualNumberOfCopyLayers) + " copy layers" : "" );
      }
  };
  
diff --git a/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp b/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp

index 44e750c..2b18587 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
@@ -104,7 +104,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
              auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
  
              auto affineWeightsSize = foo->pLayers[i].nOutputRows *
-                foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows;
+                (foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows);
  
              if (affineWeightsSize != std::get<0>(transpozedData)->size()) {
                  error << "gna-xnn layer(" << i << ") weights size mismatch: expected "
@@ -136,6 +136,49 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
      }
  };
  
+class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+    enum HowMatch{
+        eNone,
+        eEqAffine,
+    } eMatchKind;
+
+    mutable std::stringstream error;
+    mutable int actual;
+    size_t expected_weights_size;
+ public:
+    explicit WeightsSizeMatcher(const size_t data_len) :
+        eMatchKind(eEqAffine),
+        expected_weights_size(data_len){
+    }
+    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+        if (foo == nullptr)
+            return false;
+
+        size_t sizeTotal = 0;
+        std::stringstream ss;
+        for(int i = 0; i < foo->nLayers; i++) {
+            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE && eMatchKind == eEqAffine) continue;
+
+            auto affineWeightsSize = foo->pLayers[i].nOutputRows *
+                (foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows);
+
+            sizeTotal += affineWeightsSize;
+            ss << "[" << i << "]: " << affineWeightsSize << ", ";
+
+        }
+
+        if (eMatchKind == eEqAffine &&  sizeTotal != expected_weights_size) {
+            error << "gna-affine layers " << ss.str() << " have diff total weights size : " << sizeTotal
+                  << ", while expected to have: " << expected_weights_size << "\n";
+            return false;
+        }
+        return true;
+    };
+    void DescribeTo(::std::ostream *os) const override {
+        *os << error.str() << std::endl;
+    }
+};
+
  
  class WeightsSaver: public ::testing::MatcherInterface<const intel_nnet_type_t*> {
      mutable TranspozeIterator iterator;
@@ -182,3 +225,7 @@ void SaveWeights(std::unique_ptr<NNetComponentMatcher>& components,  std::vector
      components->add(new WeightsSaver(make_tuple(data, dims.first, dims.second)));
  }
  
+void HasWeightsSizeEq(std::unique_ptr<NNetComponentMatcher>& components,  size_t weights_size) {
+    components->add(new WeightsSizeMatcher(weights_size));
+}
+
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp

index bf66e43..69b4c3b 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/dump_test.cpp
@@ -108,6 +108,7 @@ TEST(MKLDNNDumpTests, SerU8AsTxt) {
  
      std::string deser_header, ref_header = "U8 4D shape: 2 3 4 5 (120)";
      std::getline(buff, deser_header);
+    deser_header = deser_header.substr(0, ref_header.length());
      ASSERT_EQ(deser_header, ref_header);
  
      auto num_line = std::count(std::istreambuf_iterator<char>(buff),
@@ -129,6 +130,7 @@ TEST(MKLDNNDumpTests, SerAsTxt) {
  
      std::string deser_header, ref_header = "FP32 2D shape: 2 3 (6)";
      std::getline(buff, deser_header);
+    deser_header = deser_header.substr(0, ref_header.length());
      ASSERT_EQ(deser_header, ref_header);
  
      auto num_line = std::count(std::istreambuf_iterator<char>(buff),
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp

index 09f33b1..566f038 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp
@@ -2,6 +2,7 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
+
  #include "common_test_utils/data_utils.hpp"
  #include "mkldnn_graph.h"
  #include "test_graph.hpp"
diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp

index 5c72381..0670283 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp
@@ -187,7 +187,13 @@ protected:
      }
  };
  
-TEST_P(MKLDNNCPUExtScatterTFTests, TestsScatter) {}
+// Disabled these tests as they need to adjust with new specs:
+// - new Scatter Update layer: like TF scatter_update
+// - new Scatter Elements Update: like ONNX Scatter Elements
+// See merge requests:
+// DLDT #6005: Specification for the ScatterElementsUpdate layer
+// DLDT #6091: Specification for ScatterUpdate operation
+TEST_P(MKLDNNCPUExtScatterTFTests, DISABLED_TestsScatter) {}
  
  INSTANTIATE_TEST_CASE_P(
          TestsScatter, MKLDNNCPUExtScatterTFTests,
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/adjust_data_location_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/adjust_data_location_tests.cpp

index 3103f98..c0232cb 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/vpu/adjust_data_location_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/adjust_data_location_tests.cpp
@@ -79,13 +79,13 @@ TEST_F(VPU_AdjustDataLocationTest, FlushCMX_TwoSpecialConsumers) {
  
      pipeline.run(model);
  
-    ASSERT_EQ(data1->location(), DataLocation::CMX);
+    ASSERT_EQ(data1->dataLocation().location, Location::CMX);
      ASSERT_EQ(data1->numConsumers(), 1);
  
      auto data1Consumer = data1->singleConsumer();
      auto data1ConsumerOutput = data1Consumer->output(0);
      ASSERT_EQ(data1Consumer->type(), StageType::Copy);
-    ASSERT_EQ(data1ConsumerOutput->location(), DataLocation::BSS);
+    ASSERT_EQ(data1ConsumerOutput->dataLocation().location, Location::BSS);
      ASSERT_EQ(data1ConsumerOutput->numChildDatas(), 4);
      ASSERT_TRUE(contains(data1ConsumerOutput->childDataEdges(), [data2](const SharedAllocation& e) { return e->child() == data2; }));
      ASSERT_TRUE(contains(data1ConsumerOutput->childDataEdges(), [data3](const SharedAllocation& e) { return e->child() == data3; }));
@@ -152,13 +152,13 @@ TEST_F(VPU_AdjustDataLocationTest, SpillWithBranch) {
      pipeline.run(model);
  
      auto hw1Output = hw1->output(0);
-    ASSERT_EQ(hw1Output->location(), DataLocation::CMX);
+    ASSERT_EQ(hw1Output->dataLocation().location, Location::CMX);
  
      auto copyStage = hw1Output->singleConsumer();
      ASSERT_EQ(copyStage->type(), StageType::Copy);
  
      auto copyStageOutput = copyStage->output(0);
-    ASSERT_EQ(copyStageOutput->location(), DataLocation::BSS);
+    ASSERT_EQ(copyStageOutput->dataLocation().location, Location::BSS);
  
      ASSERT_EQ(copyStageOutput->numConsumers(), 2);
      for (const auto& copyStageOutputConsumer : copyStageOutput->consumers()) {
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_const_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_const_concat_tests.cpp

index 72c0c26..dc666d0 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_const_concat_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/eliminate_const_concat_tests.cpp
@@ -3,6 +3,8 @@
  //
  
  #include "graph_transformer_tests.hpp"
+#include <vpu/model/data_contents/replicated_data_content.hpp>
+
  #include <precision_utils.h>
  
  using namespace vpu;
@@ -19,8 +21,8 @@ TEST_F(VPU_EliminateConstConcatTest, EliminateCase_1D) {
  
      const auto model = CreateModel();
  
-    const auto constData1 = model->addConstData("const1", dataDesc1, replicateContent(1.0f, dataDesc1.totalDimSize()));
-    const auto constData2 = model->addConstData("const2", dataDesc2, replicateContent(2.0f, dataDesc2.totalDimSize()));
+    const auto constData1 = model->addConstData("const1", dataDesc1, replicateContent(1.0f, dataDesc1.totalDimSize(), dataDesc1));
+    const auto constData2 = model->addConstData("const2", dataDesc2, replicateContent(2.0f, dataDesc2.totalDimSize(), dataDesc2));
  
      const auto concatData = model->addNewData("concat", dataDescConcat);
  
@@ -74,8 +76,8 @@ TEST_F(VPU_EliminateConstConcatTest, EliminateCase_2D) {
  
      const auto model = CreateModel();
  
-    const auto constData1 = model->addConstData("const1", dataDesc1, replicateContent(1.0f, dataDesc1.totalDimSize()));
-    const auto constData2 = model->addConstData("const2", dataDesc2, replicateContent(2.0f, dataDesc2.totalDimSize()));
+    const auto constData1 = model->addConstData("const1", dataDesc1, replicateContent(1.0f, dataDesc1.totalDimSize(), dataDesc1));
+    const auto constData2 = model->addConstData("const2", dataDesc2, replicateContent(2.0f, dataDesc2.totalDimSize(), dataDesc2));
  
      const auto concatData = model->addNewData("concat", dataDescConcat);
  
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/merge_parallel_fc.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/merge_parallel_fc.cpp

index f1e0bf6..6ecf86a 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/vpu/merge_parallel_fc.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/merge_parallel_fc.cpp
@@ -2,10 +2,12 @@
  // SPDX-License-Identifier: Apache-2.0
  //
  
-#include <initializer_list>
+#include "graph_transformer_tests.hpp"
+
  #include <vpu/stages/stub_stage.hpp>
+#include <vpu/model/data_contents/ie_blob_content.hpp>
  
-#include "graph_transformer_tests.hpp"
+#include <initializer_list>
  
  using namespace vpu;
  
diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/replace_with_screlu_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/replace_with_screlu_tests.cpp

index 9ddf0b1..65379f3 100644 (file)
--- a/inference-engine/tests_deprecated/unit/engines/vpu/replace_with_screlu_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/replace_with_screlu_tests.cpp
@@ -5,6 +5,7 @@
  #include <vpu/stages/stub_stage.hpp>
  
  #include "graph_transformer_tests.hpp"
+#include "vpu/model/data_contents/ie_blob_content.hpp"
  
  using namespace vpu;
  
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp

index 7fbc02c..2061103 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/cnn_ngraph_impl_tests.cpp
@@ -152,12 +152,6 @@ TEST_F(CNNNGraphImplTests, TestSetBatch) {
  
      ASSERT_EQ(2, cnnNet.getBatchSize());
      ASSERT_EQ(2, cnnNet.getCNNNetwork()->getBatchSize());
-
-    auto cnnNet2 = cnnNet.cloneNGraphImpl();
-
-    ASSERT_EQ(2, cnnNet2->getBatchSize());
-    ASSERT_EQ(2, cnnNet2->getCNNNetwork()->getBatchSize());
-    ASSERT_NE(cnnRefNet, cnnNet2->getCNNNetwork());
  }
  
  TEST_F(CNNNGraphImplTests, TestSaveAffinity) {
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/convert_ngraph_to_cnn_network_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/convert_ngraph_to_cnn_network_tests.cpp

index d7f27c6..1f5e0ef 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/convert_ngraph_to_cnn_network_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/convert_ngraph_to_cnn_network_tests.cpp
@@ -7,6 +7,7 @@
  #include "tests_common.hpp"
  
  #include <convert_function_to_cnn_network.hpp>
+#include <cpp/ie_cnn_network.h>
  
  #include <ngraph/function.hpp>
  #include <ngraph/opsets/opset1.hpp>
@@ -30,7 +31,7 @@ TEST_F(ConvertFunctionToCNNNetworkTests, ConvertPReLUNetwork) {
                                                 ngraph::ParameterVector{param1, param2});
      }
  
-    InferenceEngine::details::CNNNetworkNGraphImpl nGraphImpl(f);
+    InferenceEngine::CNNNetwork nGraphImpl(f);
      try {
          auto net = InferenceEngine::details::convertFunctionToICNNNetwork(f, nGraphImpl);
          FAIL();
@@ -59,10 +60,10 @@ TEST_F(ConvertFunctionToCNNNetworkTests, ConvertConvolutionNetwork) {
                                                 ngraph::ParameterVector{param1, param2});
      }
  
-    InferenceEngine::details::CNNNetworkNGraphImpl nGraphImpl(f);
+    InferenceEngine::CNNNetwork nGraphImpl(f);
      try {
          auto net = InferenceEngine::details::convertFunctionToICNNNetwork(f, nGraphImpl);
      } catch (InferenceEngine::details::InferenceEngineException &err) {
          FAIL();
      }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/network_serializer_tests.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/network_serializer_tests.cpp

index 8afc781..010351c 100644 (file)
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/network_serializer_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/inference_engine_tests/network_serializer_tests.cpp
@@ -95,8 +95,7 @@ static const auto model = R"_(
  )_";
  
  TEST(NetworkSerializerTest, TopoSortResultUnique) {
-
-    auto reader = std::shared_ptr<InferenceEngine::ICNNNetReader>(InferenceEngine::CreateCNNNetReader());
+    auto reader = InferenceEngine::CreateCNNNetReaderPtr();
  
      InferenceEngine::ResponseDesc resp;
  
diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/pointer_test.cpp b/inference-engine/tests_deprecated/unit/inference_engine_tests/pointer_test.cpp

deleted file mode 100644 (file)

index 8af6f72..0000000
--- a/inference-engine/tests_deprecated/unit/inference_engine_tests/pointer_test.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include "cpp/ie_cnn_net_reader.h"
-
-using namespace InferenceEngine;
-
-class PointerTests : public ::testing::Test {};
-
-TEST_F(PointerTests, InferenceEnginePtrStoresValues) {
-    std::shared_ptr <ICNNNetReader> p(InferenceEngine::CreateCNNNetReader());
-    ASSERT_NE(p.get(), nullptr);
-}
diff --git a/inference-engine/tests_deprecated/unit/topology_verification_tests/v2_topology_verification_test.cpp b/inference-engine/tests_deprecated/unit/topology_verification_tests/v2_topology_verification_test.cpp

index da771e7..f610e9f 100644 (file)
--- a/inference-engine/tests_deprecated/unit/topology_verification_tests/v2_topology_verification_test.cpp
+++ b/inference-engine/tests_deprecated/unit/topology_verification_tests/v2_topology_verification_test.cpp
@@ -151,7 +151,8 @@ TEST_F(V2TopologyVerificationTests, testCheckConvolutionInputDim_More) {
      string testContent = getNetworkWithConvLayer("Q78", { 1, 1, 3, 227, 227 });
  
      xmlHelper->loadContent(testContent);
-    EXPECT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    EXPECT_THROW(xmlHelper->parse(), std::exception);
  }
  
  //convolution input must be 4D
@@ -159,27 +160,31 @@ TEST_F(V2TopologyVerificationTests, testCheckConvolutionInputDim_Less) {
      string testContent = getNetworkWithConvLayer("Q78", { 227, 227 });
  
      xmlHelper->loadContent(testContent);
-    EXPECT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    EXPECT_THROW(xmlHelper->parse(), std::exception);
  }
  
  //pooling input must be 4D
  TEST_F(V2TopologyVerificationTests, testCheckPoolingInputDim_Less) {
      string testContent = getNetworkWithPoolLayer({ 227, 227 });
      xmlHelper->loadContent(testContent);
-    EXPECT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    EXPECT_THROW(xmlHelper->parse(), std::exception);
  }
  
  //pooling input must be 4D
  TEST_F(V2TopologyVerificationTests, testCheckPoolingInputDim_More) {
      string testContent = getNetworkWithPoolLayer({ 1, 1, 3, 227, 227 });
      xmlHelper->loadContent(testContent);
-    EXPECT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    EXPECT_THROW(xmlHelper->parse(), std::exception);
  }
  
  TEST_F(V2TopologyVerificationTests, testLeayerPrecisionIsNotMIXED) {
      string testContent = getNetworkWithConvLayer("MIXED");
      xmlHelper->loadContent(testContent);
-    EXPECT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    EXPECT_THROW(xmlHelper->parse(), std::exception);
  }
  
  TEST_F(V2TopologyVerificationTests, testMixedPrecisionIfLayerAndNetworkPrecisionsDiffer) {
@@ -198,7 +203,9 @@ TEST_F(V2TopologyVerificationTests, throwsIfCropDimIsTooBig) {
  
      string testContent = getNetworkWithCropLayer({ data });
      xmlHelper->loadContent(testContent);
-    ASSERT_THROW(xmlHelper->parse(), InferenceEngine::details::InferenceEngineException);
+    
+    // TODO: fix RTTI issue and replace by InferenceEngine::details::InferenceEngineException
+    ASSERT_THROW(xmlHelper->parse(), std::exception);
  }
  
  TEST_F(V2TopologyVerificationTests, testNoThrowWithProperCropParameters) {
diff --git a/inference-engine/thirdparty/clDNN/api/layout.hpp b/inference-engine/thirdparty/clDNN/api/layout.hpp

index db17b8e..af2c407 100644 (file)
--- a/inference-engine/thirdparty/clDNN/api/layout.hpp
+++ b/inference-engine/thirdparty/clDNN/api/layout.hpp
@@ -420,6 +420,8 @@ struct layout {
          } else if (this->format == cldnn::format::os_is_yx_osv32_isv32p) {
              sizes[0] = align_to(sizes[0], 32);
              sizes[1] = align_to(sizes[1], 32);
+        } else if (this->format == cldnn::format::image_2d_rgba) {
+            sizes[1] = 4;
          }
          size_t total = std::accumulate(
              sizes.begin(),
diff --git a/inference-engine/thirdparty/clDNN/api/tensor.hpp b/inference-engine/thirdparty/clDNN/api/tensor.hpp

index d16d210..30650ee 100644 (file)
--- a/inference-engine/thirdparty/clDNN/api/tensor.hpp
+++ b/inference-engine/thirdparty/clDNN/api/tensor.hpp
@@ -121,12 +121,14 @@ struct format {
          b_fs_yx_32fp,                           ///< format for data for binary convolutions
          winograd_2x3_s1_data,                   ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1
          nv12,                                   ///< format for media nv12 input
+        image_2d_rgba,                          ///< format for image2d RGBA, always allocates memory for 4 feature maps (even when only 3 are used)
  
          // Weights formats
          oiyx,                                         ///< the most common format for 2D weights
          yxio,                                         ///< format used 2D weights
          oizyx,                                        ///< the most common format for 3D convolution
          os_iyx_osv16,                                 ///< format used only for convolution weights:
+        os_is_yx_osv16_isv16,                               ///< format used for convolution i8 weights
          os_zyxi_osv16,                                ///< format used for weights for 3D convolution
          os_is_yx_isv16_osv16,                         ///< format used for blocked convolution
          os_is_zyx_isv16_osv16,                        ///< format used for weights for blocked 3D convolution
@@ -183,6 +185,7 @@ struct format {
          gs_oiyx_gsv16,                                ///< format used for weights for 2D convolution
          gs_oiyx_gsv32,                                ///< format used for weights for 2D convolution
          g_is_os_zyx_osv16_isv16,                      ///< format used for grouped weights for blocked 3D deconvolution
+        g_os_is_yx_osv16_isv4,
          g_is_os_yx_osv16_isv16,
          g_os_is_zyx_isv8_osv16_isv2,
          g_os_is_yx_isv8_osv16_isv2,
@@ -228,6 +231,7 @@ struct format {
                  { bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, 0, "bfzyx",  "bfxyz",  {{0, 16 }, {1, 16}}}},
                  { bs_fs_yx_bsv16_fsv16,  { 1, 1, 3, 0, 0, "bfyx",   "bfxy?",  {{0, 16 }, {1, 16}}}},
                  { nv12,                  { 1, 1, 2, 0, 0, "bfyx",   "bfxy?",  {}}},
+                { image_2d_rgba,         { 1, 1, 2, 0, 0, "bfyx",   "bfxy?",  {}}},
  
                  { oiyx,                                        { 1, 1, 2, 0, 0, "bfyx",   "bfxy",       {}}},
                  { yxio,                                        { 1, 1, 2, 0, 0, "yxfb",   "bfxy?",      {}}},
@@ -262,6 +266,7 @@ struct format {
                  { os_is_zyx_isv8_osv16_isv2,                   { 1, 1, 3, 0, 0, "bfzyx",  "bfxyz",      {{1, 8}, {0, 16}, {1, 2}}}},
                  { os_zyxi_osv16,                               { 1, 1, 3, 0, 0, "bzyxf",  "bfxyz",      {{0, 16}}}},
                  { os_is_yx_isv8_osv16_isv2,                    { 1, 1, 2, 0, 0, "bfzyx",  "bfxyz",      {{1, 8}, {0, 16}, {1, 2}}}},
+                { os_is_yx_osv16_isv16,                        { 1, 1, 2, 0, 0, "bfyx",   "bfxy",       {{1, 16}, {0, 16}}}},
  
                  { goiyx,                                       { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {}}},
                  { goizyx,                                      { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g",  {}}},
@@ -274,7 +279,8 @@ struct format {
                  { g_is_os_yx_osv16_isv16,                      { 1, 1, 2, 0, 1, "gfbyx",  "bfxy????g",  {{0, 16}, {1, 16}}}},
                  { g_os_is_zyx_isv8_osv16_isv2,                 { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g",  {{1, 8}, {0, 16}, {1, 2}}}},
                  { g_os_is_yx_isv8_osv16_isv2,                  { 1, 1, 2, 0, 1, "gbfyx",  "bfxy????g",  {{1, 8}, {0, 16}, {1, 2}}}},
-                { g_os_is_zyx_isv16_osv16,                     { 1, 1, 3, 0, 1, "bfzyx",  "bfxyz???g",  {{0, 16}, {1, 16}}}},
+                { g_os_is_zyx_isv16_osv16,                     { 1, 1, 3, 0, 1, "gbfzyx", "bfxyz???g",  {{0, 16}, {1, 16}}}},
+                { g_os_is_yx_osv16_isv4,                       { 1, 1, 2, 0, 1, "gbfxy",  "bfxy????g",  {{0, 16}, {1, 4}}}},
          };
          return traits.at(fmt);
      }
@@ -311,7 +317,8 @@ struct format {
                  fmt == image_2d_weights_c1_b_fyx ||
                  fmt == image_2d_weights_winograd_6x3_s1_fbxyb ||
                  fmt == image_2d_weights_winograd_6x3_s1_xfbyb ||
-                fmt == nv12);
+                fmt == nv12 ||
+                fmt == image_2d_rgba);
      }
      /// @brief Checks if @p format is of grouped type
      static bool is_grouped(type fmt) { return group_num(fmt) != 0; }
diff --git a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp b/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp

index a2f9d20..f7528c4 100644 (file)
--- a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp
+++ b/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp
@@ -177,6 +177,7 @@ struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise> {
  
      /// @brief Is optimization that output contains data from second input ON ?
      bool second_input_in_output = false;
+    bool depth_to_space_already_fused = false;
  
  protected:
      const primitive_id_arr conv_weights;
diff --git a/inference-engine/thirdparty/clDNN/common/khronos_ocl_clhpp/cl2_ext.hpp b/inference-engine/thirdparty/clDNN/common/khronos_ocl_clhpp/cl2_ext.hpp

index 8f668b1..c67c817 100644 (file)
--- a/inference-engine/thirdparty/clDNN/common/khronos_ocl_clhpp/cl2_ext.hpp
+++ b/inference-engine/thirdparty/clDNN/common/khronos_ocl_clhpp/cl2_ext.hpp
@@ -520,7 +520,7 @@ typedef CL_API_ENTRY cl_mem(CL_API_CALL * PFN_clCreateFromMediaSurfaceINTEL)(
                  return detail::errHandler(CL_INVALID_ARG_VALUE, fname);
              }
  
-            static PFN_clGetDeviceIDsFromMediaAdapterINTEL pfn_clGetDeviceIDsFromMediaAdapterINTEL = NULL;
+            PFN_clGetDeviceIDsFromMediaAdapterINTEL pfn_clGetDeviceIDsFromMediaAdapterINTEL = NULL;
              if (!pfn_clGetDeviceIDsFromMediaAdapterINTEL) {
                  pfn_clGetDeviceIDsFromMediaAdapterINTEL =
                      reinterpret_cast<PFN_clGetDeviceIDsFromMediaAdapterINTEL>
@@ -540,44 +540,46 @@ typedef CL_API_ENTRY cl_mem(CL_API_CALL * PFN_clCreateFromMediaSurfaceINTEL)(
                  0,
                  NULL,
                  &n);
-            if (err != CL_SUCCESS) {
+            if (err != CL_SUCCESS && err != CL_DEVICE_NOT_FOUND) {
                  return detail::errHandler(err, fname);
              }
  
-            vector<cl_device_id> ids(n);
-            err = pfn_clGetDeviceIDsFromMediaAdapterINTEL(
-                object_,
-                media_adapter_type,
-                media_adapter,
-                media_adapter_set,
-                n,
-                ids.data(),
-                NULL);
-            if (err != CL_SUCCESS) {
-                return detail::errHandler(err, fname);
-            }
+            if (err != CL_DEVICE_NOT_FOUND)
+            {
+                vector<cl_device_id> ids(n);
+                err = pfn_clGetDeviceIDsFromMediaAdapterINTEL(
+                    object_,
+                    media_adapter_type,
+                    media_adapter,
+                    media_adapter_set,
+                    n,
+                    ids.data(),
+                    NULL);
+                if (err != CL_SUCCESS) {
+                    return detail::errHandler(err, fname);
+                }
  
-            // Cannot trivially assign because we need to capture intermediates
-            // with safe construction
-            // We must retain things we obtain from the API to avoid releasing
-            // API-owned objects.
-            if (devices) {
-                devices->resize(ids.size());
-
-                // Assign to param, constructing with retain behaviour
-                // to correctly capture each underlying CL object
-                for (size_type i = 0; i < ids.size(); i++) {
-                    (*devices)[i] = Device(ids[i], true);
+                // Cannot trivially assign because we need to capture intermediates
+                // with safe construction
+                // We must retain things we obtain from the API to avoid releasing
+                // API-owned objects.
+                if (devices) {
+                    devices->resize(ids.size());
+
+                    // Assign to param, constructing with retain behaviour
+                    // to correctly capture each underlying CL object
+                    for (size_type i = 0; i < ids.size(); i++) {
+                        (*devices)[i] = Device(ids[i], true);
+                    }
                  }
-            }
  
-            // set up acquire/release extensions
-            SharedSurfLock::Init(object_);
-            ImageVA::Init(object_);
+                // set up acquire/release extensions
+                SharedSurfLock::Init(object_);
+                ImageVA::Init(object_);
  #ifdef WIN32
-            BufferDX::Init(object_);
+                BufferDX::Init(object_);
  #endif
-
+            }
              return CL_SUCCESS;
          }
      };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp

index 9cd2c4f..a8028ca 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
@@ -53,6 +53,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{
      { DataLayout::b_fs_yx_32fp,          {  0,  1, -1, -1,  2,  3 } },
      { DataLayout::bfwzyx,                {  0,  1,  2,  3,  4,  5 } },
      { DataLayout::nv12,                  {  0,  1, -1, -1,  2,  3 } },
+    { DataLayout::image_2d_rgba,         {  0,  1, -1, -1,  2,  3 } },
  }};
  
  WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
@@ -71,6 +72,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
      { WeightsLayout::os_i_osv8__ai8,                              { -1, -1, -1,   0,   1, -1, -1, -1 } },
      { WeightsLayout::os_i_osv16__ai8,                             { -1, -1, -1,   0,   1, -1, -1, -1 } },
      { WeightsLayout::os_i_osv16,                                  { -1, -1, -1,   0,   1, -1, -1, -1 } },
+    { WeightsLayout::os_is_yx_osv16_isv16,                        {  0,  1, -1,   2,   3, -1, -1, -1 } },
      { WeightsLayout::i_yxs_os_yxsv2_osv16,                        {  1,  2, -1,   3,   0, -1, -1, -1 } },
      { WeightsLayout::iy_xs_os_xsv2_osv16__ao32,                   {  1,  2, -1,   3,   0, -1, -1, -1 } },
      { WeightsLayout::iy_xs_os_xsv2_osv8__ao32,                    {  1,  2, -1,   3,   0, -1, -1, -1 } },
@@ -104,6 +106,7 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
      { WeightsLayout::os_is_yx_isv8_osv16_isv2,                    {  0,  1, -1,   2,   3, -1, -1, -1 } },
      { WeightsLayout::os_zyxi_osv16,                               {  1,  2,  3,   0,   4, -1, -1, -1 } },
      { WeightsLayout::os_i_yxs_osv4_yxsv4,                         {  0,  1, -1,   2,   3, -1, -1, -1 } },
+    { WeightsLayout::is_os_yx_osv16_isv16,                        {  0,  1, -1,   3,   2, -1, -1, -1 } },
      { WeightsLayout::goiyx,                                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
      { WeightsLayout::goizyx,                                      {  0,  1,  2,   3,   4, -1, -1,  5 } },
      { WeightsLayout::g_os_iyx_osv16,                              {  0,  1, -1,   2,   3, -1, -1,  4 } },
@@ -120,8 +123,11 @@ WeightsTensor::WeightsChannelArray WeightsTensor::weightsChannelArray {{
      { WeightsLayout::g_os_is_zyx_isv16_osv16,                     {  0,  1,  2,   3,   4, -1, -1,  5 } },
      { WeightsLayout::giy_xs_os_xsv2_osv16__ao32,                  {  1,  2, -1,   3,   0, -1, -1,  4 } },
      { WeightsLayout::giy_xs_os_xsv2_osv8__ao32,                   {  1,  2, -1,   3,   0, -1, -1,  4 } },
-    { WeightsLayout::gs_oi_yxs_gsv4_yxsv4,                        {  0,  1, -1,   2,   3, -1, -1,  4 } },
      { WeightsLayout::g_os_is_yx_isv16_osv16,                      {  0,  1, -1,   2,   3, -1, -1,  4 } },
+    { WeightsLayout::gs_oi_yxs_gsv4_yxsv4,                        {  0,  1, -1,   2,   3, -1, -1,  4 } },
+    { WeightsLayout::gs_oi_yxs_gsv16_yxsv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
+    { WeightsLayout::gs_oi_yxs_gsv32_yxsv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
+    { WeightsLayout::g_os_is_yx_osv16_isv4,                       {  0,  1, -1,   2,   3, -1, -1,  4 } },
  }};
  
  NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
@@ -614,6 +620,22 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
          case gs_oi_yxs_gsv4_yxsv4:
              newDims[4] = RoundUp(newDims[4], 4);
              break;
+        case os_is_yx_osv16_isv16:
+            assert(newDims.size() == 4);
+            newDims[2] = RoundUp(newDims[2], 16);
+            newDims[3] = RoundUp(newDims[3], 16);
+            break;
+        case gs_oi_yxs_gsv16_yxsv4:
+            newDims[4] = RoundUp(newDims[4], 16);
+            break;
+        case gs_oi_yxs_gsv32_yxsv4:
+            newDims[4] = RoundUp(newDims[4], 32);
+            break;
+        case g_os_is_yx_osv16_isv4:
+            assert(newDims.size() == 5);
+            newDims[2] = RoundUp(newDims[2], 4);
+            newDims[3] = RoundUp(newDims[3], 16);
+            break;
          default:
              break;
      }
@@ -658,8 +680,13 @@ NDims WeightsTensor::GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l
      } else if (l == gs_oi_yxs_gsv4_yxsv4) {
          ret[2].pitch = RoundUp(ret[0].v * ret[1].v, 4) * 4;
          ret[4].pitch = ret[3].v * RoundUp(ret[0].v * ret[1].v, 4);
+    } else if (l == gs_oi_yxs_gsv16_yxsv4) {
+        ret[2].pitch = RoundUp(ret[0].v * ret[1].v, 4) * 16;
+        ret[4].pitch = ret[3].v * RoundUp(ret[0].v * ret[1].v, 4);
+    } else if (l == gs_oi_yxs_gsv32_yxsv4) {
+        ret[2].pitch = RoundUp(ret[0].v * ret[1].v, 4) * 32;
+        ret[4].pitch = ret[3].v * RoundUp(ret[0].v * ret[1].v, 4);
      }
-
      return ret;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h

index 7f05b67..05979ae 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
@@ -61,6 +61,7 @@ enum DataLayout {
      b_fs_yx_32fp,           // bfyx with blocks of 16 packed binary input channels
      bfwzyx,                 // batch, feature, 4D spatial
      nv12,                   // media nv12 layout
+    image_2d_rgba,          // image2d RGBA
      DataLayoutCount         // NUMBER OF ELEMENTS IN ENUM
  };
  
@@ -89,6 +90,7 @@ enum WeightsLayout {
      os_i_osv8__ai8,  // TODO can we drop the alignment form layout name?
      os_i_osv16__ai8,
      os_i_osv16,
+    os_is_yx_osv16_isv16,           // wieghts for int8 blocked conv
      i_yxs_os_yxsv2_osv16,
      iy_xs_os_xsv2_osv16__ao32,
      iy_xs_os_xsv2_osv8__ao32,
@@ -138,8 +140,12 @@ enum WeightsLayout {
      g_os_is_zyx_isv16_osv16,
      giy_xs_os_xsv2_osv16__ao32,
      giy_xs_os_xsv2_osv8__ao32,
-    gs_oi_yxs_gsv4_yxsv4,                // grouped weights for depthwise IMAD convolution
      g_os_is_yx_isv16_osv16,
+    gs_oi_yxs_gsv4_yxsv4,                // grouped weights for depthwise IMAD convolution (b_fs_yx_fsv4 format)
+    gs_oi_yxs_gsv16_yxsv4,               // grouped weights for depthwise IMAD convolution (b_fs_yx_fsv16 format)
+    gs_oi_yxs_gsv32_yxsv4,               // grouped weights for depthwise IMAD convolution (b_fs_yx_fsv32 format)
+
+    g_os_is_yx_osv16_isv4,
      WeightsLayoutCount                   // NUMBER OF ELEMENTS IN ENUM
  };
  
@@ -225,6 +231,7 @@ inline bool GroupedLayout(WeightsLayout l) {
          case WeightsLayout::giy_xs_os_xsv2_osv16__ao32:
          case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:
          case WeightsLayout::gs_oi_yxs_gsv4_yxsv4:
+        case WeightsLayout::g_os_is_yx_osv16_isv4:
              return true;
          default:
              return false;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp

index 6394086..f10d56c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_simple_ref.cpp
@@ -59,6 +59,7 @@ ParamsKey ConcatenationKernel_simple_Ref::GetSupportedKey() const {
      k.EnableConcatAxis(ConcatAxis::FEATURE);
      k.EnableConcatAxis(ConcatAxis::BATCH);
      k.EnableConcatKernelPerInput();
+    k.EnableDifferentTypes();
      return k;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp

new file mode 100644 (file)

index 0000000..1362af8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.cpp
@@ -0,0 +1,241 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "convolution_kernel_b_fs_yx_fsv16_imad_1x1.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+#include <vector>
+#include <iostream>
+#include <algorithm>
+
+//
+// Kernel specific constants
+//
+#define SIMD_SIZE 16
+
+namespace kernel_selector {
+
+namespace {
+
+size_t getOutBlock_X(size_t output_size_x, size_t stride_x) {
+    size_t output_block_width = 0;
+    size_t max_block_size = std::min((SIMD_SIZE - 1) / stride_x + 1, output_size_x);
+
+    if (output_size_x <= max_block_size)
+        return output_size_x;
+
+    for (size_t block = 4; block <= max_block_size; ++block) {
+        if (output_size_x % block == 0)
+            output_block_width = block;
+    }
+    if (output_block_width == 0 && output_size_x < max_block_size * 3) {
+        size_t min_overhang = max_block_size;
+        for (size_t block = 4; block <= max_block_size; ++block) {
+            size_t overhang = block - output_size_x % block;
+            if (overhang <= min_overhang) {
+                min_overhang = overhang;
+                output_block_width = block;
+            }
+        }
+    }
+
+    if (output_block_width == 0) {
+        output_block_width = max_block_size;
+    }
+    return output_block_width;
+}
+
+bool should_k_slice(const convolution_params& params, size_t output_block_width) {
+    constexpr float preferred_eu_occupancy = 5.f;
+    if (params.inputs[0].Feature().v % (16 * 4) != 0)
+        return false;
+
+    size_t eu_count = params.engineInfo.computeUnitsCount;
+    auto global_size = CeilDiv(params.output.X().v, output_block_width) *
+        params.output.Y().v *
+        params.output.Batch().v * Align(CeilDiv(params.output.Feature().v, 2), SIMD_SIZE);
+    auto threads = global_size / SIMD_SIZE;
+    auto optimal_threads_num = eu_count * preferred_eu_occupancy;
+    return threads < optimal_threads_num;
+}
+
+}  // namespace
+
+Convolution_kernel_b_fs_yx_fsv16_imad_1x1::Convolution_kernel_b_fs_yx_fsv16_imad_1x1()
+    : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_1x1") {
+    for (size_t bw = 1; bw <= SIMD_SIZE; ++bw) {
+        for (auto exe : ConvolutionKernelBase::autoTuneOptions) {
+            all_tune_params.push_back(AutoTuneParams{ bw, true, exe });
+            all_tune_params.push_back(AutoTuneParams{ bw, false, exe });
+        }
+    }
+}
+
+ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+
+    k.EnableInputWeightsType(WeightsType::INT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    return k;
+}
+
+JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetJitConstants(const convolution_params& params,
+                                                                        const DispatchData& kd) const {
+    auto mem_consts = Parent::GetJitConstants(params, kd);
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", kd.cldnnStyle.blockWidth));
+    mem_consts.AddConstant(MakeJitConstant("FEATURE_LWS_SPLIT", kd.cldnnStyle.prefetch));
+
+    if (!params.fused_ops.empty()) {
+        auto input_dt = GetActivationType(params);
+        FusedOpsConfiguration conf_scalar = {"", {"out_b", "out_f + out_f_offset", "out_y", "out_x + i"}, "dequantized", input_dt, 1 };
+        conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
+        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
+    }
+
+    return mem_consts;
+}  // GetJitConstants
+
+ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::SetDefault(const convolution_params& params,
+                                                                                          int index) const {
+    DispatchData kd;
+    const auto& output = params.output;
+    auto tune_params = GetAutoTuneParams(params, index);
+    size_t k_slices = tune_params.k_slicing ? 4 : 1;
+
+    kd.gws0 = CeilDiv(output.X().v, tune_params.out_block_width);
+    kd.gws1 = output.Y().v;
+    kd.gws2 = output.Batch().v * Align(CeilDiv(output.Feature().v, 2), SIMD_SIZE) * k_slices;
+
+    kd.lws0 = 1;
+    kd.lws1 = 1;
+    kd.lws2 = SIMD_SIZE * k_slices;
+
+    kd.cldnnStyle = {0, 0, 0, 0, 0};
+    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+
+    kd.cldnnStyle.blockWidth = tune_params.out_block_width;
+    kd.cldnnStyle.prefetch = k_slices;
+
+    kd.efficiency = FORCE_PRIORITY_2;
+
+    return kd;
+}  // SetDefault
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad_1x1::Validate(const Params& params, const optional_params& options) const {
+    if (!Parent::Validate(params, options)) {
+        return false;
+    }
+
+    KernelData kd = KernelData::Default<convolution_params>(params);
+    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+    if ((newParams.filterSize.x != newParams.filterSize.y) ||
+        newParams.filterSize.x != 1) {
+        // Fitler size needs to be 1x1
+        return false;
+    }
+
+    if ((newParams.stride.x != newParams.stride.y) ||
+        (newParams.stride.x != 1 && newParams.stride.x != 2)) {
+        // Strides must be 1x1 or 2x2
+        return false;
+    }
+
+    if (newParams.groups != 1 || newParams.split != 1)
+        return false;
+
+    return true;
+}
+
+Convolution_kernel_b_fs_yx_fsv16_imad_1x1::AutoTuneParams
+Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetAutoTuneParams(const convolution_params& params, int index) const {
+    if (index >= 0 && index < static_cast<int>(all_tune_params.size())) {
+        return all_tune_params[index];
+    }
+    AutoTuneParams default_params;
+    default_params.out_block_width = getOutBlock_X(params.output.X().v, params.stride.x);
+    default_params.k_slicing = should_k_slice(params, default_params.out_block_width);
+    default_params.exe_mode = DEFAULT;
+    return default_params;
+}
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad_1x1::ValidateAutoTuneParams(const convolution_params& params,
+                                                                       const AutoTuneParams& tune_params) const {
+    if (tune_params.k_slicing && params.inputs[0].Feature().v % (16 * 4) != 0)
+        return false;
+
+    size_t max_block_size = std::min(static_cast<size_t>((SIMD_SIZE - 1) / params.stride.x + 1), params.output.X().v);
+    if (tune_params.out_block_width > max_block_size)
+        return false;
+
+    return true;
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetKernelsData(const Params& params,
+                                                                      const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetTunedKernelsDataByIndex(const Params & params,
+                                                                                  const optional_params & options,
+                                                                                  int autoTuneIndex) const {
+    auto conv_params = static_cast<const convolution_params&>(params);
+    auto tune_params = GetAutoTuneParams(conv_params, autoTuneIndex);
+    if (!ValidateAutoTuneParams(conv_params, tune_params))
+        return {};
+    return GetCommonKernelsData(params, options, tune_params.exe_mode, autoTuneIndex);
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_1x1::GetKernelsDataForAutoTune(const Params & params,
+                                                                                 const optional_params & options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+    auto& conv_params = static_cast<const convolution_params&>(params);
+
+    KernelsData res = {};
+
+    for (size_t i = 0; i < all_tune_params.size(); i++) {
+        auto tune_params = GetAutoTuneParams(conv_params, static_cast<int>(i));
+        if (!ValidateAutoTuneParams(conv_params, tune_params))
+            continue;
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options, static_cast<int>(i));
+        if (!kd.empty()) {
+            res.emplace_back(kd[0]);
+        }
+    }
+
+    return res;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h

new file mode 100644 (file)

index 0000000..7133d2d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_1x1.h
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+#include <vector>
+#include <string>
+
+namespace kernel_selector {
+
+class Convolution_kernel_b_fs_yx_fsv16_imad_1x1 : public ConvolutionKernelBase {
+public:
+    using Parent = ConvolutionKernelBase;
+    Convolution_kernel_b_fs_yx_fsv16_imad_1x1();
+    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_1x1() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsData GetKernelsDataForAutoTune(const Params & params, const optional_params & options) const override;
+    KernelsData GetTunedKernelsDataByIndex(const Params & params, const optional_params & options, int autoTuneIndex = -1) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    bool Validate(const Params& params, const optional_params& options) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+    bool NeedPaddedInput() const override { return true; }
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
+        return WeightsLayout::os_is_yx_osv16_isv16;
+    }
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::ELTWISE,
+                 FusedOpType::QUANTIZE,
+                 FusedOpType::SCALE,
+                 FusedOpType::ACTIVATION };
+    }
+
+    struct AutoTuneParams {
+        size_t out_block_width;
+        bool k_slicing;
+        std::string exe_mode;
+    };
+    std::vector<AutoTuneParams> all_tune_params;
+
+    bool ValidateAutoTuneParams(const convolution_params& params, const AutoTuneParams& tune_params) const;
+    AutoTuneParams GetAutoTuneParams(const convolution_params& params, int index) const;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp

new file mode 100644 (file)

index 0000000..a1aef4f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp
@@ -0,0 +1,165 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+#include <vector>
+#include <iostream>
+
+//
+// Kernel specific constants
+//
+#define SIMD_SIZE 16
+
+static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x) {
+    size_t output_block_width = 0;
+    size_t max_block_size = std::min((SIMD_SIZE - filter_size_x) / stride_x + 1, output_size_x);
+
+    if (output_size_x <= max_block_size)
+        return output_size_x;
+
+    for (size_t block = 4; block <= max_block_size; ++block) {
+        if (output_size_x % block == 0)
+            output_block_width = block;
+    }
+    if (output_block_width == 0 && output_size_x < max_block_size * 3) {
+        size_t min_overhang = max_block_size;
+        for (size_t block = 4; block <= max_block_size; ++block) {
+            size_t overhang = block - output_size_x % block;
+            if (overhang <= min_overhang) {
+                min_overhang = overhang;
+                output_block_width = block;
+            }
+        }
+    }
+
+    if (output_block_width == 0) {
+        output_block_width = max_block_size;
+    }
+    return output_block_width;
+}
+
+static size_t get_ofm_per_wi(const size_t output_size_f) {
+    if (output_size_f % 32 == 0)
+        return 2;
+    return 1;
+}
+
+namespace kernel_selector {
+
+ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+
+    k.EnableInputWeightsType(WeightsType::INT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.DisableTuning();
+    return k;
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetKernelsData(const Params& params,
+                                                                   const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+
+JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetJitConstants(const convolution_params& params,
+                                                                     const DispatchData& kd) const {
+    auto mem_consts = Parent::GetJitConstants(params, kd);
+    const auto& output = params.output;
+
+    mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x)));
+    mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", get_ofm_per_wi(output.Feature().v)));
+    mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", SIMD_SIZE * get_ofm_per_wi(output.Feature().v)));
+
+    if (!params.fused_ops.empty()) {
+        auto input_dt = GetActivationType(params);
+        FusedOpsConfiguration conf_scalar = {"", {"out_b", "out_f + j * 16", "out_y", "out_x + i"}, "dequantized", input_dt, 1};
+        conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
+        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
+    }
+
+    return mem_consts;
+}  // GetJitConstants
+
+ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::SetDefault(const convolution_params& params,
+                                                                           int) const {
+    DispatchData kd;
+    const auto& output = params.output;
+    auto output_block_width = getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x);
+    auto ofm_blocks_per_simd = get_ofm_per_wi(output.Feature().v);
+
+    kd.gws0 = CeilDiv(output.X().v, output_block_width);
+    kd.gws1 = output.Y().v;
+    kd.gws2 = output.Batch().v * Align(output.Feature().v / ofm_blocks_per_simd, SIMD_SIZE);
+
+    kd.lws0 = 1;
+    kd.lws1 = 1;
+    kd.lws2 = SIMD_SIZE;
+
+    kd.cldnnStyle = {0, 0, 0, 0, 0};
+    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+
+    if (params.filterSize.x == 3)
+        kd.efficiency = FORCE_PRIORITY_2;
+    else
+        kd.efficiency = FORCE_PRIORITY_5;
+
+    return kd;
+}  // SetDefault
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3::Validate(const Params& params, const optional_params& options) const {
+    if (!Parent::Validate(params, options)) {
+        return false;
+    }
+
+    KernelData kd = KernelData::Default<convolution_params>(params);
+    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+    if ((newParams.filterSize.x != newParams.filterSize.y) ||
+        (newParams.filterSize.x != 3 && newParams.filterSize.x != 5)) {
+        // Fitler size needs to be 3x3 or 5x5
+        return false;
+    }
+
+    if ((newParams.stride.x != newParams.stride.y) ||
+        (newParams.stride.x != 1 && newParams.stride.x != 2)) {
+        // Strides must be 1x1 or 2x2
+        return false;
+    }
+
+    if (newParams.groups != 1 || newParams.split != 1)
+        return false;
+
+    return true;
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h

new file mode 100644 (file)

index 0000000..e69a798
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class Convolution_kernel_b_fs_yx_fsv16_imad_3x3 : public ConvolutionKernelBase {
+public:
+    using Parent = ConvolutionKernelBase;
+    Convolution_kernel_b_fs_yx_fsv16_imad_3x3() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3") {}
+    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    bool Validate(const Params& params, const optional_params& options) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+    bool NeedPaddedInput() const override { return true; }
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
+        return WeightsLayout::os_is_yx_osv16_isv16;
+    }
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::ELTWISE,
+                 FusedOpType::QUANTIZE,
+                 FusedOpType::SCALE,
+                 FusedOpType::ACTIVATION };
+    }
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp

new file mode 100644 (file)

index 0000000..e62348c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp
@@ -0,0 +1,158 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+#include <vector>
+#include <iostream>
+
+//
+// Kernel specific constants
+//
+#define SIMD_SIZE 16
+
+static size_t getOutBlock_X(size_t output_size_x) {
+    auto output_block_width = 7;
+    if (output_size_x % 8 == 0)
+        output_block_width = 8;
+    return output_block_width;
+}
+
+
+namespace kernel_selector {
+
+ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::F16);
+
+    k.EnableInputWeightsType(WeightsType::INT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.DisableTuning();
+    return k;
+}
+
+KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetKernelsData(const Params& params,
+                                                                      const optional_params& options) const {
+    return GetCommonKernelsData(params, options);
+}
+
+JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetJitConstants(const convolution_params& params,
+                                                                        const DispatchData& kd) const {
+    auto mem_consts = Parent::GetJitConstants(params, kd);
+    const auto& output = params.output;
+
+    mem_consts.AddConstants({MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v))});
+
+    if (!params.fused_ops.empty()) {
+        auto input_dt = GetActivationType(params);
+        FusedOpsConfiguration conf_scalar = {"",
+                                             {"out_b", "(out_f + get_sub_group_id() * 16)", "out_y", "out_x + i"},
+                                             "dequantized",
+                                             input_dt,
+                                             1};
+        conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true);
+        mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
+    }
+
+    return mem_consts;
+}  // GetJitConstants
+
+ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::SetDefault(
+    const convolution_params& params,
+    int) const {
+    DispatchData kd;
+    const auto& output = params.output;
+
+    auto output_block_width = getOutBlock_X(output.X().v);
+    kd.gws0 = output.X().v / output_block_width;
+    kd.gws1 = output.Y().v;
+    kd.gws2 = output.Batch().v * output.Feature().v * 2;
+
+    kd.lws0 = 1;
+    kd.lws1 = 1;
+    kd.lws2 = SIMD_SIZE * 4;
+
+    kd.cldnnStyle = {0, 0, 0, 0, 0};
+    kd.gemmStyle = {0, 0, 0, 0, 0, 0};
+
+    kd.efficiency = FORCE_PRIORITY_1;
+
+    return kd;
+}  // SetDefault
+
+bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::Validate(const Params& params, const optional_params& options) const {
+    if (!Parent::Validate(params, options)) {
+        return false;
+    }
+
+    KernelData kd = KernelData::Default<convolution_params>(params);
+    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+    if (newParams.output.Feature().v % (2 * SIMD_SIZE) != 0) {
+        return false;
+    }
+
+    if ((newParams.filterSize.x != newParams.filterSize.y) ||
+        newParams.filterSize.x != 3) {
+        // Fitler size needs to be 3x3
+        return false;
+    }
+
+    if ((newParams.stride.x != newParams.stride.y) ||
+        (newParams.stride.x != 1 && newParams.stride.x != 2)) {
+        // Strides must be 1x1 or 2x2
+        return false;
+    }
+
+    if (newParams.output.X().v % 8 != 0 && newParams.output.X().v % 7 != 0) {
+        return false;
+    }
+
+    if (CeilDiv(newParams.inputs[0].Feature().v, 16) % 4 != 0) {
+        return false;
+    }
+
+    const auto& output = newParams.output;
+    auto output_block_width = getOutBlock_X(output.X().v);
+    size_t eu_count = params.engineInfo.computeUnitsCount;
+    auto global_size =
+        (output.X().v / output_block_width) * output.Y().v * ((output.Batch().v * output.Feature().v));
+    if ((global_size / 16) > (eu_count * 7)) {
+        return false;
+    }
+
+    if (newParams.groups != 1 || newParams.split != 1)
+        return false;
+
+    return true;
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h

new file mode 100644 (file)

index 0000000..d3dfeaf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+#include <vector>
+
+namespace kernel_selector {
+
+class Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks : public ConvolutionKernelBase {
+public:
+    using Parent = ConvolutionKernelBase;
+    Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks") {}
+    virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    bool Validate(const Params& params, const optional_params& options) const override;
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+    bool NeedPaddedInput() const override { return true; }
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
+        return WeightsLayout::os_is_yx_osv16_isv16;
+    }
+
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::ELTWISE,
+                 FusedOpType::QUANTIZE,
+                 FusedOpType::SCALE,
+                 FusedOpType::ACTIVATION };
+    }
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp

new file mode 100644 (file)

index 0000000..d15c965
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.cpp
@@ -0,0 +1,247 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp"
+
+#include <vector>
+#include <string>
+#include <algorithm>
+
+namespace kernel_selector {
+
+ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw()
+    : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv_16_32_imad_dw") {
+    std::vector<size_t> simd_sizes = { 8, 16 };
+    std::vector<size_t> tile_x_sizes = { 1, 2, 3, 4, 5, 7, 8, 11, 16, 24, 32 };
+    std::vector<std::string> exe_modes = ConvolutionKernelBase::autoTuneOptions;
+
+    constexpr size_t max_block_size = 32 * 8;
+
+    for (auto simd : simd_sizes) {
+        for (size_t tile_x = 1; tile_x <= 32; ++tile_x) {
+            if (simd * tile_x > max_block_size)
+                continue;
+            for (auto exe_mode : exe_modes) {
+                all_tune_params.push_back(AutoTuneParams{ simd, tile_x, exe_mode });
+            }
+        }
+    }
+}
+
+ParamsKey kernel_selector::ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableInputWeightsType(WeightsType::INT8);
+    k.EnableInputWeightsType(WeightsType::UINT8);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv32);
+    k.EnableDifferentTypes();
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableQuantization(QuantizationType::SYMMETRIC);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_WEIGHTS);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA);
+    k.EnableQuantization(QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS);
+    k.EnableDepthwiseSeparableOpt();
+    k.EnableGroupedConvolution();
+    return k;
+}
+
+bool ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::Validate(const Params& params, const optional_params& options) const {
+    if (!Parent::Validate(params, options))
+        return false;
+
+    auto conv_params = static_cast<const convolution_params&>(params);
+
+    if (conv_params.inputs[0].GetLayout() != conv_params.output.GetLayout())
+        return false;
+
+    if (conv_params.groups != conv_params.output.Feature().v || conv_params.groups != conv_params.inputs[0].Feature().v)
+        return false;
+
+    // Additional checks for asymmetric data
+    if (conv_params.quantization == QuantizationType::ASYMMETRIC_DATA ||
+        conv_params.quantization == QuantizationType::ASYMMETRIC_DATA_AND_WEIGHTS) {
+        // Needs compensation optimization
+        if (conv_params.compensation.empty())
+            return false;
+        // Padding not supported
+        const auto inputLimitX = (conv_params.output.X().v - 1) * conv_params.stride.x
+                               + (conv_params.filterSize.x - 1) * conv_params.dilation.x + 1;
+        const auto inputLimitY = (conv_params.output.Y().v - 1) * conv_params.stride.y
+                               + (conv_params.filterSize.y - 1) * conv_params.dilation.y + 1;
+        const auto inputLimitZ = (conv_params.output.Z().v - 1) * conv_params.stride.z
+                               + (conv_params.filterSize.z - 1) * conv_params.dilation.z + 1;
+
+        bool needs_pad = false;
+        needs_pad |= conv_params.padding.x != 0;
+        needs_pad |= conv_params.padding.y != 0;
+        needs_pad |= conv_params.padding.z != 0;
+        needs_pad |= inputLimitX > conv_params.output.X().v;
+        needs_pad |= inputLimitY > conv_params.output.Y().v;
+        needs_pad |= inputLimitZ > conv_params.output.Z().v;
+
+        if (needs_pad)
+            return false;
+    }
+
+    return true;
+}
+
+WeightsLayout ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetPreferredWeightsLayout(const convolution_params& params) const {
+    if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16)
+        return WeightsLayout::gs_oi_yxs_gsv16_yxsv4;
+    else
+        return WeightsLayout::gs_oi_yxs_gsv32_yxsv4;
+}
+
+ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::AutoTuneParams
+ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetAutoTuneParams(const convolution_params& params, int index) const {
+    if (index >= 0 && index < static_cast<int>(all_tune_params.size())) {
+        return all_tune_params[index];
+    }
+    AutoTuneParams tune_params;
+    tune_params.simd = 16;
+    if (params.output.GetLayout() == DataLayout::b_fs_yx_fsv16) {
+        tune_params.tile_x = std::min((size_t)16, params.output.X().v);
+    } else {
+        tune_params.tile_x = std::min((size_t)8, params.output.X().v);
+    }
+
+    if (params.output.X().v < 3 * tune_params.tile_x && params.output.X().v % tune_params.tile_x != 0) {
+        tune_params.tile_x = tune_params.tile_x / 2;
+    }
+
+    return tune_params;
+}
+
+ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::DispatchData
+ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::SetDefault(const convolution_params& params, int autoTuneIndex) const {
+    DispatchData kd;
+    auto& out = params.output;
+
+    auto tune_params = GetAutoTuneParams(params, autoTuneIndex);
+
+    size_t fsv = 1;
+    if (out.GetLayout() == DataLayout::b_fs_yx_fsv16) {
+        fsv = 16;
+    } else if (out.GetLayout() == DataLayout::b_fs_yx_fsv32) {
+        fsv = 32;
+    }
+
+    std::vector<size_t> global = {
+        CeilDiv(out.X().v, tune_params.tile_x),
+        out.Y().v,
+        CeilDiv(out.Feature().v, fsv) * tune_params.simd * out.Batch().v
+    };
+    std::vector<size_t> local = { 1, 1, tune_params.simd };
+
+    kd.gws0 = global[0];
+    kd.gws1 = global[1];
+    kd.gws2 = global[2];
+
+    kd.lws0 = local[0];
+    kd.lws1 = local[1];
+    kd.lws2 = local[2];
+
+    kd.gemmStyle = { 0, 0, 0, 0, 0, 0 };
+
+    kd.cldnnStyle.blockWidth = tune_params.tile_x;
+
+    kd.efficiency = params.stride.x == 1 ? FORCE_PRIORITY_1 : FORCE_PRIORITY_2;
+
+    return kd;
+}
+
+JitConstants ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
+    auto mem_consts = Parent::GetJitConstants(params, kd);
+
+    constexpr size_t imad_width = 4;
+    auto filter_spatial = params.weights.X().v * params.weights.Y().v;
+    auto filter_blocked = filter_spatial / imad_width * imad_width;
+
+    mem_consts.AddConstant(MakeJitConstant("LWS0", kd.lws0));
+    mem_consts.AddConstant(MakeJitConstant("LWS1", kd.lws1));
+    mem_consts.AddConstant(MakeJitConstant("SIMD", kd.lws2));
+
+    mem_consts.AddConstant(MakeJitConstant("TILE_X", kd.cldnnStyle.blockWidth));
+    mem_consts.AddConstant(MakeJitConstant("FILTER_BLOCKED", filter_blocked));
+
+    if (!params.fused_ops.empty()) {
+        auto input_dt = GetActivationType(params);
+        auto conf_1 = FusedOpsConfiguration("_1",
+                                            { "b", "fused_ops_f", "y", "fused_ops_x" },
+                                            "fused_ops_in",
+                                            input_dt,
+                                            1,
+                                            LoadType::LT_ALIGNED_READ,
+                                            BoundaryCheck::ENABLED,
+                                            IndexType::TENSOR_COORD,
+                                            Tensor::DataChannelName::FEATURE);
+        auto conf_2 = conf_1;
+        conf_2.suffix = "_2";
+        conf_2.vec_size = 2;
+        auto conf_4 = conf_1;
+        conf_4.suffix = "_4";
+        conf_4.vec_size = 4;
+        mem_consts.Merge(MakeFusedOpsJitConstants(params, { conf_1, conf_2, conf_4 }));
+    }
+
+    return mem_consts;
+}
+
+KernelsData ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetTunedKernelsDataByIndex(const Params& params,
+                                                                                const optional_params& options,
+                                                                                int autoTuneIndex) const {
+    auto convParams = static_cast<const convolution_params&>(params);
+    auto tuneParams = GetAutoTuneParams(convParams, autoTuneIndex);
+    return GetCommonKernelsData(params, options, tuneParams.exeMode, autoTuneIndex);
+}
+
+KernelsData ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+
+KernelsData ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw::GetKernelsDataForAutoTune(const Params& params,
+                                                                               const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+    auto& conv_params = static_cast<const convolution_params&>(params);
+
+    KernelsData res = {};
+
+    for (size_t i = 0; i < all_tune_params.size(); i++) {
+        auto tune_params = GetAutoTuneParams(conv_params, static_cast<int>(i));
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options, static_cast<int>(i));
+        if (!kd.empty()) {
+            res.emplace_back(kd[0]);
+        }
+    }
+
+    return res;
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp

new file mode 100644 (file)

index 0000000..f3a2777
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp
@@ -0,0 +1,56 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+#include <vector>
+#include <string>
+
+namespace kernel_selector {
+class ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw : public ConvolutionKernelBase {
+public:
+    using Parent = ConvolutionKernelBase;
+    ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw();
+    virtual ~ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw() {}
+
+    ParamsKey GetSupportedKey() const override;
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsData GetKernelsDataForAutoTune(const Params & params, const optional_params & options) const override;
+    KernelsData GetTunedKernelsDataByIndex(const Params & params, const optional_params & options, int autoTuneIndex = -1) const override;
+
+protected:
+    bool Validate(const Params& params, const optional_params& options) const override;
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params& params) const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::ELTWISE,
+                 FusedOpType::QUANTIZE,
+                 FusedOpType::SCALE,
+                 FusedOpType::ACTIVATION };
+    }
+
+    bool NeedPaddedInput() const override { return true; }
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+
+    struct AutoTuneParams {
+        size_t simd;
+        size_t tile_x;
+        std::string exeMode;
+    };
+    std::vector<AutoTuneParams> all_tune_params;
+
+    AutoTuneParams GetAutoTuneParams(const convolution_params& params, int index) const;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp

index 32bf719..f23e282 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -216,6 +216,9 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
  
      if (NeedPaddedInput()) {
          kd.reorderInput = CovolutionUpdateInputParams(newParams);
+
+        if (kd.reorderInput && !options.allowInputReordering)
+            return {};
      }
      DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp

new file mode 100644 (file)

index 0000000..94cb32e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.cpp
@@ -0,0 +1,91 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "convolution_kernel_bfyx_iyxo.h"
+#include <vector>
+#include <utility>
+#include <algorithm>
+
+namespace kernel_selector {
+// Sub-group size used by "convolution_kernel_bfyx_iyxo" kernel.
+constexpr size_t sub_group_size = 16;
+
+ParamsKey ConvolutionKernel_bfyx_iyxo::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableSubGroup();
+    k.EnableBiasPerFeature();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    return k;
+}
+
+ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_iyxo::SetDefault(const convolution_params& cp, int) const {
+    DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+
+    runInfo.efficiency = FORCE_PRIORITY_9;
+
+    runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4;
+    runInfo.gws1 = cp.output.Y().v;
+    runInfo.gws2 = sub_group_size;
+
+    runInfo.lws0 = 1;
+    runInfo.lws1 = 1;
+    runInfo.lws2 = sub_group_size;
+
+    return runInfo;
+}
+
+bool ConvolutionKernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const {
+    if (!ConvolutionKernelBase::Validate(p, o) || !CovolutionCheckInput(p, o)) {
+        return false;
+    }
+
+    const auto& params = static_cast<const convolution_params&>(p);
+    if (params.inputs[0].X().v % 64)
+        return false;
+
+    bool bFilterSize = (params.filterSize.x == 5 && params.filterSize.y == 5) ||
+                       (params.filterSize.x == 3 && params.filterSize.y == 3 && (params.inputs[0].Feature().v % 4) == 0) ||
+                       (params.filterSize.x == 1 && params.filterSize.y == 1);
+
+    bool bStride = (params.stride.x == 1 && params.stride.y == 1);
+
+    if (!bFilterSize || !bStride || (params.output.Feature().v % 4) != 0 || (params.output.Batch().v != 1)) {
+        return false;
+    }
+
+    return true;
+}
+
+JitConstants ConvolutionKernel_bfyx_iyxo::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const {
+    auto jit = Parent::GetJitConstants(params, runInfo);
+
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+
+    return jit;
+}
+
+KernelsData ConvolutionKernel_bfyx_iyxo::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h

new file mode 100644 (file)

index 0000000..ec82082
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_iyxo.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+#include <string>
+
+namespace kernel_selector {
+
+class ConvolutionKernel_bfyx_iyxo : public ConvolutionKernelBase {
+public:
+    using Parent = ConvolutionKernelBase;
+    ConvolutionKernel_bfyx_iyxo() : Parent("convolution_gpu_bfyx_iyxo") {}
+    virtual ~ConvolutionKernel_bfyx_iyxo() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
+        return WeightsLayout::iyxo;
+    }
+
+    JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    bool Validate(const Params& p, const optional_params& o) const override;
+    bool NeedPaddedInput() const override { return true; }
+    DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp

index 4d8e7e3..20e2c9d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp
@@ -24,18 +24,17 @@
  //
  #define SIMD_SIZE 16
  
-static bool getOutBlock_WH(size_t output_size,
+static void getOutBlock_WH(size_t output_size,
                             size_t stride,
                             size_t kernel_size,
+                           size_t dilation,
                             size_t& output_block_w,
                             size_t& output_block_h) {
-    bool verify_output_ranges = false;
-
      output_block_w = output_block_h = 0;
  
      size_t upper_border = output_size < SIMD_SIZE ? output_size : SIMD_SIZE;
  
-    size_t stride_restrictions = (SIMD_SIZE - (kernel_size - 1)) / stride;
+    size_t stride_restrictions = (SIMD_SIZE - (kernel_size - 1) * dilation - 1) / stride + 1;
  
      size_t max_posible_tile_size = upper_border < stride_restrictions ? upper_border : stride_restrictions;
  
@@ -46,7 +45,7 @@ static bool getOutBlock_WH(size_t output_size,
  
          size_t block_size = 0;
  
-        for (size_t i = min_horisontal_block_size; i < max_posible_tile_size; i++) {
+        for (size_t i = min_horisontal_block_size; i <= max_posible_tile_size; i++) {
              if (output_size % i == 0)
                  block_size = i;
          }
@@ -55,7 +54,6 @@ static bool getOutBlock_WH(size_t output_size,
              output_block_w = block_size;
          } else {
              output_block_w = max_posible_tile_size;
-            verify_output_ranges = true;
          }
      }
  
@@ -63,8 +61,6 @@ static bool getOutBlock_WH(size_t output_size,
          output_block_h = output_block_w;
      else
          output_block_h = 1;
-
-    return verify_output_ranges;
  }
  
  namespace kernel_selector {
@@ -73,20 +69,28 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const {
      ParamsKey k;
      k.EnableInputDataType(Datatype::INT8);
      k.EnableInputDataType(Datatype::UINT8);
+
      k.EnableOutputDataType(Datatype::INT8);
      k.EnableOutputDataType(Datatype::UINT8);
      k.EnableOutputDataType(Datatype::F32);
+
      k.EnableInputWeightsType(WeightsType::INT8);
      k.EnableInputWeightsType(WeightsType::UINT8);
+
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
      k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+
      k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
      k.EnableOutputLayout(DataLayout::byxf_af32);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+
      k.EnableDifferentTypes();
      k.EnableDifferentInputWeightsTypes();
      k.EnableTensorOffset();
      k.EnableTensorPitches();
-//    k.EnableDilation();
+    k.EnableDilation();
      k.EnableBiasPerFeature();
+    k.EnableGroupedConvolution();
      k.EnableNonBiasTerm();
      k.EnableBatching();
      k.EnableQuantization(QuantizationType::SYMMETRIC);
@@ -103,33 +107,24 @@ JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& p
  
      const auto& input = params.inputs[0];
      const auto& output = params.output;
-
-    const auto& iDims = input.GetDims();
-    const auto& oDims = output.GetDims();
      const auto& weights = params.weights;
-    const auto& wDims = weights.GetDims();
-    const int iX = DataTensor::Channelndex(input.GetLayout(), Tensor::DataChannelName::X);
-    const int iY = DataTensor::Channelndex(input.GetLayout(), Tensor::DataChannelName::Y);
-    const int iF = DataTensor::Channelndex(input.GetLayout(), Tensor::DataChannelName::FEATURE);
-    const int wOD = WeightsTensor::Channelndex(weights.GetLayout(), Tensor::WeightsChannelName::OFM);
-    const int oX = DataTensor::Channelndex(output.GetLayout(), Tensor::DataChannelName::X);
-    const int oY = DataTensor::Channelndex(output.GetLayout(), Tensor::DataChannelName::Y);
+
+    size_t in_fsv = 4;
+    if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv4)
+        in_fsv = 4;
+    else if (params.inputs[0].GetLayout() == DataLayout::b_fs_yx_fsv16)
+        in_fsv = 16;
+    else if (params.inputs[0].GetLayout() == DataLayout::byxf_af32)
+        in_fsv = 32;
  
      mem_consts.AddConstants({
-        MakeJitConstant("_IW", iDims[iX].v),
-        MakeJitConstant("_IH", iDims[iY].v),
-        MakeJitConstant("_ID", RoundUp(iDims[iF].v, 4)),
-        MakeJitConstant("IWPAD", iDims[iX].pad.before + iDims[iX].pad.after),
-        MakeJitConstant("IHPAD", iDims[iY].pad.before + iDims[iY].pad.after),
-        MakeJitConstant("_OW", oDims[oX].v),
-        MakeJitConstant("_OH", oDims[oY].v),
-        MakeJitConstant("_OD", wDims[wOD].v),
-        MakeJitConstant("OWPAD", oDims[oX].pad.before + oDims[oX].pad.after),
-        MakeJitConstant("OHPAD", oDims[oY].pad.before + oDims[oY].pad.after),
+        MakeJitConstant("_ID", RoundUp(input.Feature().v, in_fsv)),
+        MakeJitConstant("IWPAD", input.X().pad.Total()),
+        MakeJitConstant("IHPAD", input.Y().pad.Total()),
+        MakeJitConstant("_OD", Align(output.Feature().v, SIMD_SIZE)),
+        MakeJitConstant("OWPAD", output.X().pad.Total()),
+        MakeJitConstant("OHPAD", output.Y().pad.Total()),
          MakeJitConstant("SIMD_SIZE", SIMD_SIZE),
-        MakeJitConstant("K_HEIGHT", wDims[iY].v),
-        MakeJitConstant("K_WIDTH", wDims[iX].v),
-        MakeJitConstant("K_STRIDE", params.stride.x),  // X and Y must be equal
      });
  
      if (params.filterSize.x != 3 || params.filterSize.y != 3) {
@@ -139,10 +134,9 @@ JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& p
      mem_consts.Merge(MakeTypeJitConstants(GetPackedInputType(params), "PACKED"));
  
      size_t obw, obh;
-    bool verify_output_ranges = getOutBlock_WH(oDims[oX].v, params.stride.x, wDims[iX].v, obw, obh);
+    getOutBlock_WH(output.X().v, params.stride.x, weights.X().v, params.dilation.x, obw, obh);
      mem_consts.AddConstants({MakeJitConstant("OUT_BLOCK_WIDTH", obw),
-                             MakeJitConstant("OUT_BLOCK_HEIGHT", obh),
-                             MakeJitConstant("NEED_TO_VERIFY_OUTPUT_RANGES", verify_output_ranges)});
+                             MakeJitConstant("OUT_BLOCK_HEIGHT", obh)});
  
      if (!params.fused_ops.empty()) {
          auto input_dt = GetActivationType(params);
@@ -155,39 +149,23 @@ JitConstants ConvolutionKernel_imad::GetJitConstants(const convolution_params& p
  }  // GetJitConstants
  
  ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const convolution_params& params,
-                                                                           int) const {
+                                                                       int) const {
      DispatchData kd;
  
-    const auto& in = params.inputs[0];
      const auto& output = params.output;
      const auto& weights = params.weights;
-    const auto& iDims = in.GetDims();
-    const auto& oDims = output.GetDims();
-    const auto& wDims = weights.GetDims();
-    const int oX = DataTensor::Channelndex(output.GetLayout(), Tensor::DataChannelName::X);
-    const int oY = DataTensor::Channelndex(output.GetLayout(), Tensor::DataChannelName::Y);
-    const int oB = DataTensor::Channelndex(output.GetLayout(), Tensor::DataChannelName::BATCH);
-    const int wOD = WeightsTensor::Channelndex(weights.GetLayout(), Tensor::WeightsChannelName::OFM);
-    const int wX = WeightsTensor::Channelndex(weights.GetLayout(), Tensor::WeightsChannelName::X);
  
      size_t otw, oth;
-    getOutBlock_WH(oDims[oX].v, params.stride.x, wDims[wX].v, otw, oth);
+    getOutBlock_WH(output.X().v, params.stride.x, weights.X().v, params.dilation.x, otw, oth);
  
-    size_t dim_add = ((wDims[wOD].v * iDims[oB].v) % SIMD_SIZE);
-    if (dim_add != 0)
-        dim_add = SIMD_SIZE - dim_add;
+    std::vector<size_t> global = {// number of tiles needed to cover output width
+                                  CeilDiv(output.X().v, otw),
  
-    std::vector<size_t> global = {// globalRange[0] = ((_IW / K_STRIDE) + (OTW - 1)) / OTW;
-                                  // number of tiles needed to cover output width
-                                  CeilDiv(oDims[oX].v, otw),
-
-                                  // globalRange[1] = ((_IH / K_STRIDE) + (OTH - 1)) / OTH;
                                    // number of tiles needed to cover output height
-                                  CeilDiv(oDims[oY].v, oth),
+                                  CeilDiv(output.Y().v, oth),
  
-                                  // globalRange[2] = (_OD * _B) + ((_B *_OD) % __WORKGROUP_SIZE);
                                    // round depth range up
-                                  ((wDims[wOD].v * iDims[oB].v) + dim_add)};
+                                  Align(weights.OFM().v, SIMD_SIZE) * params.groups * output.Batch().v};
  
      std::vector<size_t> local = {1, 1, SIMD_SIZE};
  
@@ -205,7 +183,7 @@ ConvolutionKernelBase::DispatchData ConvolutionKernel_imad::SetDefault(const con
      // This kernel is quite slow for 1x1 and KHx1 kernels
      // TODO: check if we need any optimized kernels in this layout
      // If yes, we need to implement some customization for these cases.
-    kd.efficiency = FORCE_PRIORITY_2;
+    kd.efficiency = FORCE_PRIORITY_3;
  
      return kd;
  }  // SetDefault
@@ -215,17 +193,13 @@ bool ConvolutionKernel_imad::Validate(const Params& params, const optional_param
          return false;
      }
  
-    KernelData kd = KernelData::Default<convolution_params>(params);
-    convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
-
-    if (newParams.stride.x != newParams.stride.y) {
-        // Strides must be equal
+    auto& newParams = static_cast<const convolution_params&>(params);
+    if ((newParams.inputs[0].Feature().v / newParams.groups) % 4 != 0)
          return false;
-    }
-    if (newParams.output.X().v != newParams.output.Y().v) {
-        // W and H must be equal
+
+    size_t min_block_size_x = (newParams.weights.X().v - 1) * newParams.dilation.x + 1;
+    if (min_block_size_x > SIMD_SIZE)
          return false;
-    }
  
      return true;
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h

index f3db5dd..37378c7 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.h
@@ -35,8 +35,8 @@ protected:
      JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
      DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
      bool NeedPaddedInput() const override { return true; }
-    WeightsLayout GetPreferredWeightsLayout(const convolution_params &) const override {
-        return WeightsLayout::os_is_yx_osv16_isv4;
+    WeightsLayout GetPreferredWeightsLayout(const convolution_params &p) const override {
+        return p.groups > 1 ? WeightsLayout::g_os_is_yx_osv16_isv4 : WeightsLayout::os_is_yx_osv16_isv4;
      }
  
      std::vector<FusedOpType> GetSupportedFusedOps() const override {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp

index 99dba36..4e069df 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@@ -21,6 +21,7 @@
  #include "convolution_kernel_bfyx_direct_10_12_16.h"
  #include "convolution_kernel_bfyx_os_iyx_osv16.h"
  #include "convolution_kernel_bfyx_os_iyx_osv16_2_sg.h"
+#include "convolution_kernel_bfyx_iyxo.h"
  #include "convolution_kernel_yxfb_ref.h"
  #include "convolution_kernel_yxfb_yxio_b16.h"
  #include "convolution_kernel_yxfb_yxio_b8.h"
@@ -69,12 +70,21 @@
  #include "convolution_kernel_mmad_b_fs_yx_fsv32_dw.h"
  #include "convolution_kernel_mmad_bfyx_b_fs_yx_fsv32.h"
  #include "convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.h"
+#include "convolution_kernel_b_fs_yx_fsv16_imad_1x1.h"
+#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h"
+#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h"
+#include "convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp"
  
  namespace kernel_selector {
  convolution_kernel_selector::convolution_kernel_selector() {
      Attach<ConvolutionKernel_Ref>();
      Attach<DeformableConvolutionKernel_bfyx_Ref>();
  
+    // b_fs_yx_fsv16 int8
+    Attach<Convolution_kernel_b_fs_yx_fsv16_imad_1x1>();
+    Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3>();
+    Attach<Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks>();
+
      // b_fs_yx_fsv16 and b_fs_zyx_fsv16
      Attach<ConvolutionKernel_b_fs_yx_fsv16_depthwise>();
      Attach<ConvolutionKernel_b_fs_yx_fsv16_1x1>();
@@ -97,6 +107,7 @@ convolution_kernel_selector::convolution_kernel_selector() {
      Attach<ConvolutionKernel_bfyx_GEMMLike>();
      Attach<ConvolutionKernel_bfyx_Direct_10_10_12>();
      Attach<ConvolutionKernel_bfyx_os_iyx_osv16>();
+    Attach<ConvolutionKernel_bfyx_iyxo>();
      Attach<ConvolutionKernel_bfyx_1x1>();
      Attach<ConvolutionKernel_bfyx_1x1_gemm_buf>();
      Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>();
@@ -145,6 +156,7 @@ convolution_kernel_selector::convolution_kernel_selector() {
      Attach<ConvolutionKernel_mmad_b_fs_yx_fsv32>();
      Attach<ConvolutionKernel_mmad_b_fs_yx_fsv32_dw>();
      Attach<ConvolutionKernel_mmad_bfyx_b_fs_yx_fsv32>();
+    Attach<ConvolutionKernel_b_fs_yx_fsv_16_32_imad_dw>();
  }
  
  KernelsData convolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp

new file mode 100644 (file)

index 0000000..081ff45
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.cpp
@@ -0,0 +1,82 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_kernel_base.h"
+#include "kernel_selector_utils.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+
+bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::DEPTH_TO_SPACE ||
+        o.GetType() != KernelType::DEPTH_TO_SPACE) {
+        return false;
+    }
+
+    return true;
+}
+
+CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const {
+    CommonDispatchData runInfo;
+
+    std::vector<size_t> global = { params.output.Batch().v,
+                                   params.output.Feature().v,
+                                   params.output.Y().v * params.output.X().v };
+
+    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+    runInfo.gws0 = global[0];
+    runInfo.gws1 = global[1];
+    runInfo.gws2 = global[2];
+
+    runInfo.lws0 = local[0];
+    runInfo.lws1 = local[1];
+    runInfo.lws2 = local[2];
+
+    return runInfo;
+}
+
+JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    jit.AddConstant(MakeJitConstant("BLOCK_SIZE", params.block_size));
+
+    return jit;
+}
+
+KernelsData DepthToSpaceKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const {
+    KernelData kd = KernelData::Default<depth_to_space_params>(params);
+    depth_to_space_params& newParams = *static_cast<depth_to_space_params*>(kd.params.get());
+
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    auto runInfo = SetDefault(newParams);
+    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+    auto cldnn_jit = GetJitConstants(newParams);
+    std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+    auto& kernel = kd.kernels[0];
+
+    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+    kd.estimatedTime = estimatedTime;
+
+    return { kd };
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.h

new file mode 100644 (file)

index 0000000..366938c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.h
@@ -0,0 +1,61 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// depth_to_space_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct depth_to_space_params : public base_params {
+    depth_to_space_params() : base_params(KernelType::DEPTH_TO_SPACE), block_size(0) {}
+    size_t block_size;
+
+    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// depth_to_space_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct depth_to_space_optional_params : optional_params {
+    depth_to_space_optional_params() : optional_params(KernelType::DEPTH_TO_SPACE) {}
+};
+
+struct depth_to_space_fuse_params : fuse_params {
+    depth_to_space_fuse_params() : fuse_params(KernelType::DEPTH_TO_SPACE) {}
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// DepthToSpaceKernelBase
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+class DepthToSpaceKernelBase : public common_kernel_base {
+public:
+    using common_kernel_base::common_kernel_base;
+    virtual ~DepthToSpaceKernelBase() {}
+
+    struct DispatchData : public CommonDispatchData {
+    };
+
+protected:
+    virtual bool Validate(const Params&, const optional_params&) const;
+    virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
+    virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const;
+    KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimatedTime) const;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp

new file mode 100644 (file)

index 0000000..e926af6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.cpp
@@ -0,0 +1,75 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_kernel_block2_opt.h"
+#include "kernel_selector_utils.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+ParamsKey DepthToSpaceKernelBlock2Opt::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    return k;
+}
+
+bool DepthToSpaceKernelBlock2Opt::Validate(const Params& p, const optional_params& o) const {
+    if (!DepthToSpaceKernelBase::Validate(p, o))
+        return false;
+
+    const auto& params = static_cast<const depth_to_space_params&>(p);
+
+    if ((params.block_size != 2) || (params.inputs[0].X().v % 2 != 0))
+        return false;
+
+    return true;
+}
+
+CommonDispatchData DepthToSpaceKernelBlock2Opt::SetDefault(const depth_to_space_params& params) const {
+    CommonDispatchData runInfo;
+
+    std::vector<size_t> global = { Align(params.inputs[0].X().v / 2, 16),
+                                   params.inputs[0].Y().v,
+                                   1};
+
+    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
+
+    runInfo.gws0 = global[0];
+    runInfo.gws1 = global[1];
+    runInfo.gws2 = global[2];
+
+    runInfo.lws0 = local[0];
+    runInfo.lws1 = local[1];
+    runInfo.lws2 = local[2];
+
+    return runInfo;
+}
+
+JitConstants DepthToSpaceKernelBlock2Opt::GetJitConstants(const depth_to_space_params& params) const {
+    JitConstants jit = Parent::GetJitConstants(params);
+
+    jit.AddConstant(MakeJitConstant("IN_WIDTH", params.inputs[0].X().v / 2));
+
+    return jit;
+}
+
+KernelsData DepthToSpaceKernelBlock2Opt::GetKernelsData(const Params& params, const optional_params& options) const {
+    return GetCommonKernelsData(params, options, FORCE_PRIORITY_5);
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.h

new file mode 100644 (file)

index 0000000..721f49a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_block2_opt.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "depth_to_space_kernel_base.h"
+
+namespace kernel_selector {
+class DepthToSpaceKernelBlock2Opt : public DepthToSpaceKernelBase {
+public:
+    using Parent = DepthToSpaceKernelBase;
+
+    DepthToSpaceKernelBlock2Opt() : DepthToSpaceKernelBase("depth_to_space_block2_opt") {}
+    virtual ~DepthToSpaceKernelBlock2Opt() {}
+
+    bool Validate(const Params&, const optional_params&) const override;
+    JitConstants GetJitConstants(const depth_to_space_params& params) const override;
+    CommonDispatchData SetDefault(const depth_to_space_params& params) const override;
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp

index 6b67f9f..827b670 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -34,52 +34,7 @@ ParamsKey DepthToSpaceKernelRef::GetSupportedKey() const {
      return k;
  }
  
-CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params& params,
-                                                     const optional_params&) const {
-    CommonDispatchData runInfo;
-
-    std::vector<size_t> global = {params.output.Batch().v,
-                                  params.output.Feature().v,
-                                  params.output.Y().v * params.output.X().v};
-
-    auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
-
-    runInfo.gws0 = global[0];
-    runInfo.gws1 = global[1];
-    runInfo.gws2 = global[2];
-
-    runInfo.lws0 = local[0];
-    runInfo.lws1 = local[1];
-    runInfo.lws2 = local[2];
-
-    return runInfo;
-}
-
-JitConstants DepthToSpaceKernelRef::GetJitConstants(const depth_to_space_params& params) const {
-    JitConstants jit = MakeBaseParamsJitConstants(params);
-
-    jit.AddConstant(MakeJitConstant("BLOCK_SIZE", params.block_size));
-
-    return jit;
-}
-
  KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
-    KernelData kd = KernelData::Default<depth_to_space_params>(params);
-    depth_to_space_params& newParams = *static_cast<depth_to_space_params*>(kd.params.get());
-
-    assert(params.GetType() == KernelType::DEPTH_TO_SPACE);
-
-    auto runInfo = SetDefault(newParams, options);
-    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
-    auto cldnn_jit = GetJitConstants(newParams);
-    std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
-    auto& kernel = kd.kernels[0];
-
-    FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
-
-    kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
-
-    return {kd};
+    return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
  }
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h

index 50f314a..f74111f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -16,33 +16,14 @@
  
  #pragma once
  
-#include "common_kernel_base.h"
+#include "depth_to_space_kernel_base.h"
  
  namespace kernel_selector {
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// depth_to_space_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct depth_to_space_params : public base_params {
-    depth_to_space_params() : base_params(KernelType::DEPTH_TO_SPACE), block_size(0) {}
-
-    size_t block_size;
-
-    virtual ParamsKey GetParamsKey() const { return base_params::GetParamsKey(); }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// depth_to_space_optional_params
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-struct depth_to_space_optional_params : optional_params {
-    depth_to_space_optional_params() : optional_params(KernelType::DEPTH_TO_SPACE) {}
-};
-
-class DepthToSpaceKernelRef : public common_kernel_base {
+class DepthToSpaceKernelRef : public DepthToSpaceKernelBase {
  public:
-    DepthToSpaceKernelRef() : common_kernel_base("depth_to_space_ref") {}
+    DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {}
      virtual ~DepthToSpaceKernelRef() {}
-    virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
-    virtual CommonDispatchData SetDefault(const depth_to_space_params& params, const optional_params&) const;
+
      KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
      ParamsKey GetSupportedKey() const override;
  };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp

index 67e444d..d6e4264 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -16,10 +16,14 @@
  
  #include "depth_to_space_kernel_selector.h"
  #include "depth_to_space_kernel_ref.h"
+#include "depth_to_space_kernel_block2_opt.h"
  
  namespace kernel_selector {
  
-depth_to_space_kernel_selector::depth_to_space_kernel_selector() { Attach<DepthToSpaceKernelRef>(); }
+depth_to_space_kernel_selector::depth_to_space_kernel_selector() {
+    Attach<DepthToSpaceKernelRef>();
+    Attach<DepthToSpaceKernelBlock2Opt>();
+}
  
  KernelsData depth_to_space_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
      return GetNaiveBestKernel(params, options, KernelType::DEPTH_TO_SPACE);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp

index 0de10dd..c1bbf41 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@@ -156,7 +156,7 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
      for (size_t i = 0; i < ewParams.inputs.size(); i++) {
          // Allow the same input sizes OR per-channel operation
          if ((ewParams.inputs[i].LogicalSize() != output.LogicalSize()) &&
-            (ewParams.inputs[i].LogicalSize() != output.Feature().v) &&
+            (ewParams.inputs[i].LogicalSize() != output.Feature().v || ewParams.inputs[i].Feature().v != output.Feature().v) &&
              (ewParams.inputs[i].LogicalSize() != 1))
              return false;
      }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp

index 3885b30..948e832 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
@@ -25,14 +25,20 @@ ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const {
      ParamsKey k;
      k.EnableInputDataType(Datatype::INT8);
      k.EnableInputDataType(Datatype::UINT8);
+
      k.EnableOutputDataType(Datatype::INT8);
      k.EnableOutputDataType(Datatype::UINT8);
      k.EnableOutputDataType(Datatype::F32);
+
      k.EnableInputWeightsType(WeightsType::INT8);
-    k.EnableDifferentInputWeightsTypes();
-    k.EnableDifferentTypes();
+
      k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+
      k.EnableOutputLayout(DataLayout::bf);
+
+    k.EnableDifferentInputWeightsTypes();
+    k.EnableDifferentTypes();
      k.EnableBiasPerOutput();
      k.EnableBiasPerFeature();
      k.EnableNonBiasTerm();
@@ -101,11 +107,14 @@ JitConstants FullyConnectedKernelIMAD::GetJitConstants(const fully_connected_par
  }
  
  KernelsData FullyConnectedKernelIMAD::GetKernelsData(const Params& params, const optional_params& options) const {
+    auto fc_params = static_cast<const fully_connected_params&>(params);
+    auto& input = fc_params.inputs[0];
+
      KernelsData res = {};
      for (size_t i = 0; i < autoTuneOptions.size(); i++) {
          KernelsData kd = GetTunedKernelsDataByIndex(params,
                                                      options,
-                                                    DataLayout::b_fs_yx_fsv4,
+                                                    input.GetLayout(),
                                                      WeightsLayout::os_is_yx_osv16_isv4,
                                                      FORCE_PRIORITY_1,
                                                      static_cast<int>(i));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp

index d02dfbb..ac48606 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
@@ -90,6 +90,10 @@ ParamsKey fused_conv_eltwise_params::GetParamsKey() const {
          k.EnableFusedConvEltwiseRWOutOpt();
      }
  
+    if (depth_to_space_already_fused) {
+        k.EnableFusedConvEltwDepthToSpaceFusing();
+    }
+
      return k;
  }
  
@@ -370,7 +374,11 @@ KernelsData fused_conv_eltwise_kernel_base::GetKernelsDataForAutoTune(const Para
  }
  
  static DataTensor GetConvolutionBFYXPaddedTensor(const fused_conv_eltwise_params& cp) {
-    DataTensor t = cp.inputs[0];
+    DataTensor t;
+    if (cp.inputs.size() > 1 && (cp.inputs[0].X().v <= cp.inputs[1].X().v))
+        t = cp.inputs[1];
+    else
+        t = cp.inputs[0];
      std::vector<Tensor::Pad> pad{{0, 0}, {0, 0}, {0, 0}, {0, 0}, { 0, 0 } };
  
      auto& conv = cp.conv;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h

index c24efee..43d3c81 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -64,6 +64,7 @@ struct fused_conv_eltwise_params : public weight_bias_params {
  
      float non_conv_scale = 1.0f;
      bool second_input_in_output = false;
+    bool depth_to_space_already_fused = false;
  
      std::string to_string() const override;
      std::string to_cache_string_v2() const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp

new file mode 100644 (file)

index 0000000..6f91ce2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.cpp
@@ -0,0 +1,96 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "fused_conv_eltwise_kernel_bfyx_iyxo.h"
+#include <vector>
+#include <utility>
+#include <algorithm>
+
+namespace kernel_selector {
+constexpr size_t sub_group_size = 16;
+
+fused_conv_eltwise_kernel_bfyx_iyxo::fused_conv_eltwise_kernel_bfyx_iyxo()
+    : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_bfyx_iyxo") {
+}
+
+ParamsKey fused_conv_eltwise_kernel_bfyx_iyxo::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputWeightsType(WeightsType::F16);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::image_2d_rgba);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableSubGroup();
+    k.EnableSubGroupShort();
+    k.EnableBiasPerFeature();
+    k.EnableBiasPerOutput();
+    k.EnableNonBiasTerm();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    k.EnableFusedConvEltwSplitSupport();
+    k.EnableFusedConvEltwDilation();
+    k.EnableFusedConvEltwTranspose();
+    k.EnableFusedConvEltwiseRWOutOpt();
+    k.EnableFusedConvEltwDepthToSpaceFusing();
+    return k;
+}
+
+fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_iyxo::SetDefault(
+    const fused_conv_eltwise_params& cp,
+    int) const {
+    DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp);
+
+    runInfo.efficiency = FORCE_PRIORITY_9;
+
+    runInfo.gws0 = CeilDiv(cp.output.X().v, sub_group_size) / 4 / 2;
+    runInfo.gws1 = cp.output.Y().v / 2;
+    runInfo.gws2 = sub_group_size;
+
+    runInfo.lws0 = 1;
+    runInfo.lws1 = 1;
+    runInfo.lws2 = sub_group_size;
+
+    return runInfo;
+}
+
+bool fused_conv_eltwise_kernel_bfyx_iyxo::Validate(const Params& p, const optional_params& o) const {
+    if (!fused_conv_eltwise_kernel_base::Validate(p, o) || !FusedConvolutionEltwiseCheckInput(p, o)) {
+        return false;
+    }
+
+    const auto& params = static_cast<const fused_conv_eltwise_params&>(p);
+    if (params.inputs[0].X().v % 128 || params.inputs[0].Y().v % 2)
+        return false;
+
+    return true;
+}
+
+JitConstants fused_conv_eltwise_kernel_bfyx_iyxo::GetJitConstants(const fused_conv_eltwise_params& params,
+                                                                  const DispatchData& runInfo) const {
+    auto jit = Parent::GetJitConstants(params, runInfo);
+    jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+    return jit;
+}
+
+KernelsData fused_conv_eltwise_kernel_bfyx_iyxo::GetKernelsData(const Params& params,
+                                                                        const optional_params& options) const {
+    return GetTunedKernelsDataByIndex(params, options);
+}
+
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h

new file mode 100644 (file)

index 0000000..965a863
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_iyxo.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+
+class fused_conv_eltwise_kernel_bfyx_iyxo : public fused_conv_eltwise_kernel_base {
+public:
+    using Parent = fused_conv_eltwise_kernel_base;
+    fused_conv_eltwise_kernel_bfyx_iyxo();
+    virtual ~fused_conv_eltwise_kernel_bfyx_iyxo() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    WeightsLayout GetPreferreddWeightsLayout(const fused_conv_eltwise_params&) const override {
+        return WeightsLayout::iyxo;
+    }
+    JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+    bool Validate(const Params& p, const optional_params& o) const override;
+    bool NeedPaddedInput() const override { return true; }
+    DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp

index 80e6057..fe1976b 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
  #include "fused_conv_eltwise_kernel_yxfb_yxio_b16.h"
  #include "fused_conv_eltwise_kernel_imad.h"
  #include "fused_conv_eltwise_kernel_af32_imad_1x1.h"
+#include "fused_conv_eltwise_kernel_bfyx_iyxo.h"
  
  namespace kernel_selector {
  fused_conv_eltwise_kernel_selector::fused_conv_eltwise_kernel_selector() {
@@ -33,6 +34,7 @@ fused_conv_eltwise_kernel_selector::fused_conv_eltwise_kernel_selector() {
      Attach<fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8>();
      Attach<fused_conv_eltwise_kernel_imad>();
      Attach<fused_conv_eltwise_kernel_af32_imad_1x1>();
+    Attach<fused_conv_eltwise_kernel_bfyx_iyxo>();
  }
  
  KernelsData fused_conv_eltwise_kernel_selector::GetBestKernels(const Params& params,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h

index 3f8fddc..fc66a7a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp

new file mode 100644 (file)

index 0000000..c7d1c6a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp
@@ -0,0 +1,315 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "mvn_kernel_b_fs_yx_fsv16_imad.hpp"
+#include "common/common_tools.h"
+
+#include <string>
+#include <algorithm>
+#include <iostream>
+
+namespace kernel_selector {
+
+static constexpr size_t simd = 16;
+static constexpr size_t fsv = 16;
+static constexpr size_t pref_work_groups = 16;
+
+ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::INT8);
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT8);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableDifferentTypes();
+    k.EnableBatching();
+    // TODO Add support for across channels
+    // k.EnableMVNMode(MVNMode::ACROSS_CHANNELS);
+    k.EnableMVNMode(MVNMode::WITHIN_CHANNELS);
+    k.EnableMVNNormalizeVariance();
+    return k;
+}
+
+bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_params& options) const {
+    if (!Parent::Validate(p, options))
+        return false;
+
+    auto params = static_cast<const mvn_params&>(p);
+
+    // TODO Add support for input padding via iterating over y (parallel or in kernel).
+    if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0)
+        return false;
+
+    return true;
+}
+
+MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const {
+    auto kd = Parent::SetDefault(params);
+
+    auto items_num = params.output.X().v * params.output.Y().v;
+    auto max_wg = params.engineInfo.maxWorkGroupSize;
+    auto slm_per_sg = fsv * 4;
+    auto max_slm = params.engineInfo.maxLocalMemSize;
+    auto max_sgs = max_slm / slm_per_sg;
+
+    auto max_lws = std::min(max_wg, max_sgs * simd);
+
+    auto lws = std::max(std::min(items_num, max_lws) / simd, (size_t)1) * simd;
+
+    kd.gws0 = lws;
+    kd.gws1 = CeilDiv(params.output.Feature().v, fsv);
+    kd.gws2 = params.output.Batch().v;
+
+    kd.lws0 = lws;
+    kd.lws1 = 1;
+    kd.lws2 = 1;
+
+    kd.itemsNum = 1;
+
+    return kd;
+}
+
+JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& params, DispatchData kd) const {
+    auto jits = Parent::GetJitConstants(params, kd);
+
+    auto activation_dt = GetActivationType(params);
+    jits.Merge(MakeTypeJitConstants(activation_dt, "MEAN"));
+    jits.AddConstant(MakeJitConstant("SIMD", simd));
+    jits.AddConstant(MakeJitConstant("LWS", kd.lws0));
+    jits.AddConstant(MakeJitConstant("GWS", kd.gws0));
+    jits.AddConstant(MakeJitConstant("ITEM_GROUPS", kd.itemsNum));
+
+    if (!params.fused_ops.empty()) {
+        std::vector<std::string> idx_order;
+        idx_order = { "b", "(f + set_idx)", "(output_spatial / OUTPUT_SIZE_X)", "(output_spatial % OUTPUT_SIZE_X)" };
+        auto conf = FusedOpsConfiguration("", idx_order, "normalized", activation_dt);
+        jits.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+    return jits;
+}
+
+MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(const mvn_params& params) const {
+    MultiDispatchData md;
+
+    auto items_num = params.output.X().v * params.output.Y().v;
+    auto max_wg = params.engineInfo.maxWorkGroupSize;
+    auto slm_per_sg = fsv * 4;
+    auto max_slm = params.engineInfo.maxLocalMemSize;
+    auto max_sgs = max_slm / slm_per_sg;
+
+    auto max_lws = std::min(max_wg, max_sgs * simd);
+    auto lws = std::max(std::min(items_num, max_lws) / simd, (size_t)1) * simd;
+
+    // TODO Check if larger number of work-groups does not provide benefit
+    size_t item_groups = pref_work_groups;
+    md.item_groups = item_groups;
+
+    size_t stage1_lws = lws;
+
+    md.stage_1.gws0 = stage1_lws * item_groups;
+    md.stage_1.gws1 = CeilDiv(params.output.Feature().v, fsv);
+    md.stage_1.gws2 = params.output.Batch().v;
+
+    md.stage_1.lws0 = stage1_lws;
+    md.stage_1.lws1 = 1;
+    md.stage_1.lws2 = 1;
+
+    md.stage_1.itemsNum = item_groups;
+
+    size_t stage2_lws = std::max(std::min(item_groups, max_lws) / simd, (size_t)1) * simd;
+
+    md.stage_2.gws0 = stage2_lws;
+    md.stage_2.gws1 = CeilDiv(params.output.Feature().v, fsv);
+    md.stage_2.gws2 = params.output.Batch().v;
+
+    md.stage_2.lws0 = stage2_lws;
+    md.stage_2.lws1 = 1;
+    md.stage_2.lws2 = 1;
+
+    md.stage_2.itemsNum = item_groups;
+
+    md.stage_final.gws0 = std::max(items_num / simd, (size_t)1) * simd;
+    md.stage_final.gws1 = CeilDiv(params.output.Feature().v, fsv);
+    md.stage_final.gws2 = params.output.Batch().v;
+
+    md.stage_final.lws0 = simd;
+    md.stage_final.lws1 = 1;
+    md.stage_final.lws2 = 1;
+
+    md.stage_final.itemsNum = 1;
+
+    return md;
+}
+
+KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, const optional_params& options, float estimated_time) const {
+    if (!Validate(params, options))
+        return {};
+
+    constexpr size_t intermidiate_bytes = 4;
+    const mvn_params& orgParams = static_cast<const mvn_params&>(params);
+
+    auto runInfo = SetDefaultForMulti(orgParams);
+
+    size_t kernels_num = params.mvnNormalizeVariance ? 5 : 3;
+    KernelData kd = KernelData::Default<mvn_params>(params, kernels_num);
+
+    auto finalKernelName = GetKernelName(orgParams);
+    {
+        // Mean first stage
+        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1);
+        cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_1", 1));
+        auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel,
+                         runInfo.stage_1,
+                         params.engineInfo,
+                         finalKernelName,
+                         jit,
+                         entry_point,
+                         "",
+                         false,
+                         false,
+                         0,
+                         0);
+        kernel.arguments.clear();  // Clear original output argument
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+        kd.internalBufferSizes.push_back(
+            params.output.Batch().v * Align(params.output.Feature().v, fsv) * runInfo.item_groups * intermidiate_bytes);
+    }
+    {
+        // Mean second stage
+        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2);
+        cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_2", 1));
+        auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[1];
+        FillCLKernelData(kernel,
+                         runInfo.stage_2,
+                         params.engineInfo,
+                         finalKernelName,
+                         jit,
+                         entry_point,
+                         "",
+                         false,
+                         false,
+                         0,
+                         0);
+        kernel.arguments.clear();  // Clear original output argument
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
+        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+    }
+    if (params.mvnNormalizeVariance) {
+        // Variance first stage
+        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_1);
+        cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_1", 1));
+        auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[2];
+        FillCLKernelData(kernel,
+                         runInfo.stage_1,
+                         params.engineInfo,
+                         finalKernelName,
+                         jit,
+                         entry_point,
+                         "",
+                         false,
+                         false,
+                         0,
+                         0);
+        kernel.arguments.clear();  // Clear original output argument
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+    }
+    if (params.mvnNormalizeVariance) {
+        // Variance second stage
+        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_2);
+        cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_2", 1));
+        auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[3];
+        FillCLKernelData(kernel,
+                         runInfo.stage_2,
+                         params.engineInfo,
+                         finalKernelName,
+                         jit,
+                         entry_point,
+                         "",
+                         false,
+                         false,
+                         0,
+                         0);
+        kernel.arguments.clear();  // Clear original output argument
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
+        kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes);
+    }
+    {  // Final
+        auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final);
+        cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MAIN", 1));
+        cldnn_jit.AddConstant(MakeJitConstant("PRECALC_MEAN", 1));
+        cldnn_jit.AddConstant(MakeJitConstant("PRECALC_VARIANCE", params.mvnNormalizeVariance));
+        auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
+        auto& kernel = kd.kernels[kernels_num - 1];
+        FillCLKernelData(kernel,
+                         runInfo.stage_final,
+                         params.engineInfo,
+                         finalKernelName,
+                         jit,
+                         entry_point,
+                         "",
+                         false,
+                         false,
+                         1,
+                         GetFusedPrimitiveInputsCount(params));
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 });
+        if (params.mvnNormalizeVariance) {
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 });
+        }
+    }
+    kd.intenralBufferDataType = Datatype::F32;
+    kd.estimatedTime = estimated_time;
+
+    return { kd };
+}
+
+
+KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params, const optional_params& optParams) const {
+    const mvn_params& orgParams = static_cast<const mvn_params&>(params);
+
+    auto max_slm = params.engineInfo.maxLocalMemSize;
+    auto slm_per_sg = fsv * 4;
+    auto max_lws = params.engineInfo.maxWorkGroupSize;
+    auto items_num = orgParams.output.X().v * orgParams.output.Y().v;
+
+    auto enough_slm = max_lws / simd * simd * slm_per_sg <= max_slm;
+    auto enough_lws = max_lws / simd >= 1;
+    auto enough_items = items_num >= max_lws / simd * simd * pref_work_groups;
+
+    if (enough_slm && enough_lws && enough_items)
+        return GetMultiStageKernelsData(orgParams, optParams, FORCE_PRIORITY_4);
+    else
+        return GetCommonKernelsData(params, optParams, FORCE_PRIORITY_4);
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp

new file mode 100644 (file)

index 0000000..38d9e99
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.hpp
@@ -0,0 +1,56 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#pragma once
+
+#include "mvn_kernel_base.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+class MVNKernel_b_fs_yx_fsv16_imad : public MVNKernelBase {
+public:
+    using Parent = MVNKernelBase;
+    MVNKernel_b_fs_yx_fsv16_imad() : MVNKernelBase("mvn_gpu_b_fs_yx_fsv16_imad") {}
+    virtual ~MVNKernel_b_fs_yx_fsv16_imad() {}
+
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+
+protected:
+    struct MultiDispatchData {
+        DispatchData stage_1;
+        DispatchData stage_2;
+        DispatchData stage_final;
+
+        size_t item_groups;
+    };
+
+    bool Validate(const Params&, const optional_params&) const override;
+    DispatchData SetDefault(const mvn_params& params) const override;
+    JitConstants GetJitConstants(const mvn_params& params, DispatchData kd) const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return {
+            FusedOpType::ACTIVATION,
+            FusedOpType::QUANTIZE,
+            FusedOpType::ELTWISE,
+            FusedOpType::SCALE
+        };
+    }
+
+    KernelsData GetMultiStageKernelsData(const mvn_params& params, const optional_params&, float estimated_time) const;
+    MultiDispatchData SetDefaultForMulti(const mvn_params& params) const;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.cpp

index 0733c33..7e5942f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.cpp
@@ -16,14 +16,16 @@
  #include "mvn_kernel_selector.h"
  #include "mvn_kernel_ref.h"
  #include "mvn_kernel_bfyx_opt.h"
+#include "mvn_kernel_b_fs_yx_fsv16_imad.hpp"
  
  namespace kernel_selector {
  mvn_kernel_selector::mvn_kernel_selector() {
      Attach<MVNKernelRef>();
      Attach<MVNKernelBfyxOpt>();
+    Attach<MVNKernel_b_fs_yx_fsv16_imad>();
  }
  
  KernelsData mvn_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
      return GetNaiveBestKernel(params, options, KernelType::MVN);
  }
-}  // namespace kernel_selector
-\ No newline at end of file
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp

index 7666925..205cd16 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -53,6 +53,7 @@ inline uint32_t SubGroupSize(WeightsLayout l) {
          case WeightsLayout::g_os_is_zyx_isv16_osv16:
          case WeightsLayout::giy_xs_os_xsv2_osv16__ao32:
          case WeightsLayout::g_os_is_yx_isv16_osv16:
+        case WeightsLayout::os_is_yx_osv16_isv16:
              return 16;
          case WeightsLayout::os_i_osv8__ai8:
          case WeightsLayout::iy_xs_os_xsv2_osv8__ao32:
@@ -167,7 +168,18 @@ ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_weig
  ReorderKernelBase::DispatchData ReorderKernelBase::SetDefault(const reorder_params& params) const {
      DispatchData kd;
  
-    auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
+    auto& input = params.inputs[0];
+    DataTensor input_tensor = input;
+    // Image formats reorders use read_image and write_image functions that operate on 4 channels at once, and support only single batch,
+    // make sure that reorder size is equal to spatials sizes only
+    if (params.inputs[0].GetLayout() == DataLayout::image_2d_rgba || params.output.GetLayout() == DataLayout::image_2d_rgba) {
+        std::vector<size_t> input_sizes(4, 1);
+        input_sizes[0] = input.X().v;
+        input_sizes[1] = input.Y().v;
+        input_tensor = DataTensor(input_sizes, input.GetDType(), DataLayout::image_2d_rgba);
+    }
+
+    auto global = GetTensorFriendlyWorkGroups(input_tensor);
      auto local = GetOptimalLocalWorkGroupSizes(global, params.engineInfo);
  
      kd.gws0 = global[0];
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp

index f5f7ef2..c85b044 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.cpp
@@ -15,6 +15,10 @@
  #include <core/common/kernel_selector_utils.h>
  #include "resample_kernel_ref.h"
  
+#include <algorithm>
+#include <vector>
+#include <string>
+
  namespace kernel_selector {
  
  ParamsKey ResampleKernelRef::GetSupportedKey() const {
@@ -43,9 +47,66 @@ KernelsData ResampleKernelRef::GetKernelsData(const Params& params, const option
      return GetCommonKernelsData(params, options);
  }
  
+static size_t packing_factor(const resample_params& params) {
+    // TODO Add support for only input packing
+    bool in_out_8bit = (params.inputs[0].GetDType() == Datatype::UINT8 || params.inputs[0].GetDType() == Datatype::INT8) &&
+                       (params.output.GetDType() == Datatype::UINT8 || params.output.GetDType() == Datatype::INT8);
+
+    if (!in_out_8bit)
+        return 1;
+
+    auto get_layout_packing_factor = [](const DataLayout& layout) -> size_t {
+        switch (layout) {
+        case DataLayout::b_fs_yx_fsv16:
+            return 16;
+        case DataLayout::b_fs_yx_fsv4:
+            return 4;
+        case DataLayout::byxf_af32:
+            return 16;
+        default:
+            break;
+        }
+        return 1;
+    };
+
+    size_t input_factor = get_layout_packing_factor(params.inputs[0].GetLayout());
+    size_t output_factor = get_layout_packing_factor(params.output.GetLayout());
+
+    return std::min(input_factor, output_factor);
+}
+
+static bool use_packing(const resample_params& params) {
+    if (params.resampleType != ResampleType::NEAREST_NEIGHBOR)
+        return false;
+
+    auto pack = packing_factor(params);
+    if (pack == 1)
+        return false;
+
+    if (params.inputs[0].Feature().v % pack != 0 || params.output.Feature().v % pack != 0 ||
+        params.inputs[0].Feature().pad.before % pack != 0 || params.output.Feature().pad.before % pack != 0)
+        return false;
+
+    auto packed_work_items = params.output.X().v * params.output.Y().v * params.output.Z().v
+        * CeilDiv(params.output.Feature().v, pack) * params.output.Batch().v;
+    // TODO Loosen this requirement to minimum EUs needed to saturate cache bandwidth
+    constexpr size_t max_work_items_per_eu = 32 * 7;
+    auto minimum_work_items = params.engineInfo.computeUnitsCount * max_work_items_per_eu;
+
+    if (packed_work_items < minimum_work_items)
+        return false;
+
+    return true;
+}
+
  JitConstants ResampleKernelRef::GetJitConstants(const resample_params& params) const {
      JitConstants jit = ResampleKernelBase::GetJitConstants(params);
  
+    if (use_packing(params)) {
+        jit.AddConstant(MakeJitConstant("PACK_SIZE", packing_factor(params)));
+        jit.AddConstant(MakeJitConstant("FEATURE_PACKED_MODE", "1"));
+    }
+
      if (!params.fused_ops.empty()) {
          std::vector<std::string> idx_order;
          if (DataTensor::ChannelsCount(params.output.GetLayout()) == 4) {
@@ -60,4 +121,27 @@ JitConstants ResampleKernelRef::GetJitConstants(const resample_params& params) c
  
      return jit;
  }
+
+ResampleKernelBase::DispatchData ResampleKernelRef::SetDefault(const resample_params& arg) const {
+    auto dispatch = Parent::SetDefault(arg);
+
+    if (use_packing(arg)) {
+        auto pack = packing_factor(arg);
+        std::vector<size_t> global;
+        std::vector<size_t> local;
+
+        global = { arg.output.X().v, arg.output.Y().v * arg.output.Z().v, CeilDiv(arg.output.Feature().v, pack) * arg.output.Batch().v };
+        local = GetOptimalLocalWorkGroupSizes(global, arg.engineInfo);
+
+        dispatch.gws0 = global[0];
+        dispatch.gws1 = global[1];
+        dispatch.gws2 = global[2];
+
+        dispatch.lws0 = local[0];
+        dispatch.lws1 = local[1];
+        dispatch.lws2 = local[2];
+    }
+
+    return dispatch;
+}
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.h

index 5ded77e..0963c29 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/resample/resample_kernel_ref.h
@@ -31,5 +31,8 @@ public:
                   FusedOpType::SCALE,
                   FusedOpType::ACTIVATION };
      }
+
+protected:
+    DispatchData SetDefault(const resample_params& arg) const override;
  };
  }  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/concatenation_gpu_simple_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/concatenation_gpu_simple_ref.cl

index 1a72007..4f2284e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/concatenation_gpu_simple_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/concatenation_gpu_simple_ref.cl
@@ -56,7 +56,7 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
  }
  
  
-KERNEL (concatenation_gpu_ref)(__global UNIT_TYPE* input, __global UNIT_TYPE* output, uint output_offset_in_concat_axis)
+KERNEL (concatenation_gpu_ref)(__global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, uint output_offset_in_concat_axis)
  {
      const uint x = (uint)get_global_id(0) % INPUT0_SIZE_X;
      const uint y = (uint)get_global_id(0) / INPUT0_SIZE_X;
@@ -91,5 +91,5 @@ KERNEL (concatenation_gpu_ref)(__global UNIT_TYPE* input, __global UNIT_TYPE* ou
      uint input_offset  = FUNC_CALL(get_input_index)(b, f, w, z, y, x);
      uint output_offset = FUNC_CALL(get_output_index)(out_b, out_f, out_w, out_z, out_y, out_x);
  
-    output[output_offset] = ACTIVATION(input[input_offset], ACTIVATION_PARAMS);
+    output[output_offset] = TO_OUTPUT_TYPE(ACTIVATION(input[input_offset], ACTIVATION_PARAMS));
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_1x1.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_1x1.cl

new file mode 100644 (file)

index 0000000..4cacde1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_1x1.cl
@@ -0,0 +1,232 @@
+// Copyright (c) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/mmad.cl"
+
+#if QUANTIZATION_TERM
+    #define ACCUMULATOR_TYPE int
+    #define TO_ACCUMULATOR_TYPE(x) convert_int(x)
+    #define ACTIVATION_TYPE float
+    #define TO_ACTIVATION_TYPE(x) convert_float(x)
+#else
+    #define ACCUMULATOR_TYPE INPUT0_TYPE
+    #define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
+    #define ACTIVATION_TYPE INPUT0_TYPE
+    #define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
+#endif
+
+#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
+#define AS_TYPE_N_(type, n, x) as_##type##n(x)
+#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+__attribute__((intel_reqd_sub_group_size(16)))
+KERNEL(convolution_gpu_b_fs_yx_fsv16_imad_1x1)(
+    const __global INPUT0_TYPE   *conv_input,
+    __global OUTPUT_TYPE         *output,
+    const __global FILTER_TYPE    *weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE     *biases,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx)
+{
+    #define LUT_VALUE_CLAMP(x) ((x) < (OUT_BLOCK_WIDTH - 1) * STRIDE_SIZE_X + 1 ? (x) : 0)
+    const int tmp[16] = {
+        LUT_VALUE_CLAMP(0),
+        LUT_VALUE_CLAMP(1),
+        LUT_VALUE_CLAMP(2),
+        LUT_VALUE_CLAMP(3),
+        LUT_VALUE_CLAMP(4),
+        LUT_VALUE_CLAMP(5),
+        LUT_VALUE_CLAMP(6),
+        LUT_VALUE_CLAMP(7),
+        LUT_VALUE_CLAMP(8),
+        LUT_VALUE_CLAMP(9),
+        LUT_VALUE_CLAMP(10),
+        LUT_VALUE_CLAMP(11),
+        LUT_VALUE_CLAMP(12),
+        LUT_VALUE_CLAMP(13),
+        LUT_VALUE_CLAMP(14),
+        LUT_VALUE_CLAMP(15)
+    };
+    #undef LUT_VALUE_CLAMP
+
+#if FEATURE_LWS_SPLIT != 1
+    const uint subgroup_id = get_sub_group_id();
+#else
+    const uint subgroup_id = 0;
+#endif
+    const uint subgroup_local_id = get_sub_group_local_id();
+
+    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint out_y = get_global_id(1);
+    const uint out_b = (uint)(get_group_id(2) * 32) / ALIGN(OUTPUT_FEATURE_NUM, 32);
+    const uint out_fg = (uint)(get_group_id(2) * 32) % ALIGN(OUTPUT_FEATURE_NUM, 32);
+    const uint out_f = out_fg + subgroup_local_id;
+
+    const uint feature_offset = subgroup_id * INPUT0_FEATURE_NUM / FEATURE_LWS_SPLIT;
+
+    ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * 2] = { 0 };
+
+    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+    
+    uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, feature_offset, 0, 0);
+    uint filter_idx2 = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f + 16, feature_offset, 0, 0);
+
+    __attribute__((opencl_unroll_hint(1)))
+    for(uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16)/FEATURE_LWS_SPLIT; k++ ) {
+        uint4 weights_val = vload4(0, (__global uint*)(weights + filter_idx));
+        uint4 weights_val2 = vload4(0, (__global uint *)(weights + filter_idx2));
+
+        uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, feature_offset + k * 16, input_y, input_x + tmp[get_sub_group_local_id()]);
+        uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
+
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for(uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+            const uint ow_offset = ow + OUT_BLOCK_WIDTH;
+            dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X)), as_char4(weights_val.s0)));
+            dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X)), as_char4(weights_val.s1)));
+            dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X)), as_char4(weights_val.s2)));
+            dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X)), as_char4(weights_val.s3)));
+
+            dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X)), as_char4(weights_val2.s0)));
+            dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X)), as_char4(weights_val2.s1)));
+            dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X)), as_char4(weights_val2.s2)));
+            dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X)), as_char4(weights_val2.s3)));
+        }
+
+        filter_idx += 16 * 16;
+        filter_idx2 += 16 * 16;
+    }
+
+#if FEATURE_LWS_SPLIT != 1
+   __local ACCUMULATOR_TYPE partial_acc[16 * OUT_BLOCK_WIDTH * (FEATURE_LWS_SPLIT - 1) * 2];
+    if (subgroup_id == 0) {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+            partial_acc[16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    } else if (subgroup_id == 1) {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+            partial_acc[i * 16 + subgroup_local_id] = dotProd[i];
+            dotProd[i] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    } else if (subgroup_id == 2) {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+            partial_acc[2 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
+            partial_acc[3 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    } else if (subgroup_id == 3) {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+            partial_acc[4 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
+            partial_acc[5 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (subgroup_id >= 2)
+        return;
+    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+    for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+        dotProd[i] += partial_acc[(i + subgroup_id * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+        dotProd[i] += partial_acc[(i + (subgroup_id + 2) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+        dotProd[i] += partial_acc[(i + (subgroup_id + 4) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+    }
+#endif
+
+#if FEATURE_LWS_SPLIT == 1
+#   define OUTPUT_FEATURES_PER_WI 2
+#   if BIAS_TERM
+    BIAS_TYPE bias[OUTPUT_FEATURES_PER_WI] = { biases[out_f], biases[out_f + 16] };
+#   endif
+#else
+#   define OUTPUT_FEATURES_PER_WI 1
+#   if BIAS_TERM
+    BIAS_TYPE bias[OUTPUT_FEATURES_PER_WI] = { biases[out_f + subgroup_id * 16] };
+#   endif
+#endif
+
+    for (uint j = 0; j < OUTPUT_FEATURES_PER_WI; j++) {
+        uint out_f_offset = subgroup_id * 16 + j * 16;
+
+#if OUTPUT_FEATURE_NUM % 32 != 0 && OUTPUT_FEATURE_NUM % 32 <= 16
+        if (out_fg + 32 > OUTPUT_FEATURE_NUM && out_f_offset >= OUTPUT_FEATURE_NUM % 32)
+            break;
+#endif
+
+        const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + out_f_offset, out_y, out_x);
+#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
+        FUSED_OPS_PRELOAD
+#endif
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+
+#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
+            if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && i >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
+                break;
+#endif
+            ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
+#if BIAS_TERM
+            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i] + bias[j];
+#else
+            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i];
+#endif
+            OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+            #if FUSED_OPS_CAN_USE_PRELOAD
+                FUSED_OPS_CALC
+            #else
+                FUSED_OPS
+            #endif
+            result = FUSED_OPS_RESULT;
+#else
+            result = TO_OUTPUT_TYPE(dequantized);
+#endif
+
+#if OUTPUT_FEATURE_NUM % 16 != 0
+            if (out_fg + out_f_offset + 16 > OUTPUT_FEATURE_NUM && subgroup_local_id >= OUTPUT_FEATURE_NUM % 16)
+                result = (OUTPUT_TYPE)0;
+#endif
+            output[dst_index + i * 16] = result;
+        }
+    }
+
+#undef OUTPUT_FEATURES_PER_WI
+}
+
+#undef AS_INPUT0_TYPE_4
+#undef AS_TYPE_N
+#undef AS_TYPE_N_
+#undef MAKE_VECTOR_TYPE
+#undef TO_ACTIVATION_TYPE
+#undef ACTIVATION_TYPE
+#undef TO_ACCUMULATOR_TYPE
+#undef ACCUMULATOR_TYPE
+
+#undef CEIL_DIV
+#undef ALIGN
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl

new file mode 100644 (file)

index 0000000..5915c84
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl
@@ -0,0 +1,187 @@
+// Copyright (c) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/mmad.cl"
+
+#if QUANTIZATION_TERM
+#define ACCUMULATOR_TYPE int
+#define TO_ACCUMULATOR_TYPE(x) convert_int(x)
+#define ACTIVATION_TYPE float
+#define TO_ACTIVATION_TYPE(x) convert_float(x)
+#else
+#define ACCUMULATOR_TYPE INPUT0_TYPE
+#define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
+#define ACTIVATION_TYPE INPUT0_TYPE
+#define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
+#endif
+
+#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
+#define AS_TYPE_N_(type, n, x) as_##type##n(x)
+#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
+#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
+
+// int8 conv_input and weights data is packed to int32 "batches",
+// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
+__attribute__((intel_reqd_sub_group_size(16)))
+__attribute__((reqd_work_group_size(1, 1, 16)))
+KERNEL(convolution_gpu_b_fs_yx_fsv16_imad_3x3)(
+    const __global INPUT0_TYPE *conv_input,
+    __global OUTPUT_TYPE *output,
+    const __global FILTER_TYPE *weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE *biases,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx) {
+
+    #define LUT_VALUE_CLAMP(x) ((x) < (OUT_BLOCK_WIDTH - 1) * STRIDE_SIZE_X + FILTER_SIZE_X ? (x) : 0)
+    const int tmp[16] = {
+        LUT_VALUE_CLAMP(0),
+        LUT_VALUE_CLAMP(1),
+        LUT_VALUE_CLAMP(2),
+        LUT_VALUE_CLAMP(3),
+        LUT_VALUE_CLAMP(4),
+        LUT_VALUE_CLAMP(5),
+        LUT_VALUE_CLAMP(6),
+        LUT_VALUE_CLAMP(7),
+        LUT_VALUE_CLAMP(8),
+        LUT_VALUE_CLAMP(9),
+        LUT_VALUE_CLAMP(10),
+        LUT_VALUE_CLAMP(11),
+        LUT_VALUE_CLAMP(12),
+        LUT_VALUE_CLAMP(13),
+        LUT_VALUE_CLAMP(14),
+        LUT_VALUE_CLAMP(15)
+    };
+    #undef LUT_VALUE_CLAMP
+
+    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint out_y = get_global_id(1);
+    const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+    const uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD);
+    const uint out_f = out_fg + get_sub_group_local_id();
+    ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * OFM_BLOCKS_PER_SIMD] = {0};
+    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+
+    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    uint filter_idx  = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, 0, 0, 0);
+#if OFM_BLOCKS_PER_SIMD == 2
+    uint filter_idx2 = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f + 16, 0, 0, 0);
+#endif
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16); k++) {
+        __attribute__((opencl_unroll_hint(1)))
+        for (uint j = 0; j < FILTER_SIZE_Y; j++) {
+            uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, k * 16, input_y + j, input_x + tmp[get_sub_group_local_id()]);
+            uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
+
+            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+            for (uint i = 0; i < FILTER_SIZE_X; i++) {
+
+                uint4 weights_val = vload4(0, (__global uint *)(weights + filter_idx));
+#if OFM_BLOCKS_PER_SIMD == 2
+                uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx2));
+#endif
+
+                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+                    const uint ow_offset = ow + OUT_BLOCK_WIDTH;
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
+
+#if OFM_BLOCKS_PER_SIMD == 2
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s0)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s1)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s2)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)),  as_char4(weights_val3.s3)));
+#endif
+                }
+                filter_idx += 16 * 16;
+#if OFM_BLOCKS_PER_SIMD == 2
+                filter_idx2 += 16 * 16;
+#endif
+            }
+        } 
+    }
+
+#if BIAS_TERM
+    BIAS_TYPE bias[OFM_BLOCKS_PER_SIMD] = { biases[out_f]
+#if OFM_BLOCKS_PER_SIMD == 2
+        , biases[out_f + 16]
+#endif
+    };
+#endif
+    __attribute__((opencl_unroll_hint(OFM_BLOCKS_PER_SIMD)))
+    for (uint j = 0; j < OFM_BLOCKS_PER_SIMD; j++) {
+        const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + j * 16, out_y, out_x);
+#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
+        FUSED_OPS_PRELOAD;
+#endif
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) {
+
+#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
+            if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && i >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH)
+                break;
+#endif
+            ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
+#if BIAS_TERM
+            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i] + bias[j];
+#else
+            dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i];
+#endif
+            OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+    #if FUSED_OPS_CAN_USE_PRELOAD
+            FUSED_OPS_CALC;
+    #else
+            FUSED_OPS;
+    #endif
+            result = FUSED_OPS_RESULT;
+#else
+            result = TO_OUTPUT_TYPE(dequantized);
+#endif
+
+#if OUTPUT_FEATURE_NUM % 16 != 0
+            if (out_fg + j * 16 + 16 > OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % 16)
+                result = (OUTPUT_TYPE)0;
+#endif
+            output[dst_index + i * 16] = result;
+        }
+    }
+}
+
+#undef AS_INPUT0_TYPE_4
+#undef AS_TYPE_N
+#undef AS_TYPE_N_
+#undef MAKE_VECTOR_TYPE
+#undef TO_ACTIVATION_TYPE
+#undef ACTIVATION_TYPE
+#undef TO_ACCUMULATOR_TYPE
+#undef ACCUMULATOR_TYPE
+
+#undef CEIL_DIV
+#undef ALIGN
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl

new file mode 100644 (file)

index 0000000..df87ae0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl
@@ -0,0 +1,197 @@
+// Copyright (c) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/mmad.cl"
+
+#if QUANTIZATION_TERM
+    #define ACCUMULATOR_TYPE int
+    #define TO_ACCUMULATOR_TYPE(x) convert_int(x)
+    #define ACTIVATION_TYPE float
+    #define TO_ACTIVATION_TYPE(x) convert_float(x)
+#else
+    #define ACCUMULATOR_TYPE INPUT0_TYPE
+    #define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x)
+    #define ACTIVATION_TYPE INPUT0_TYPE
+    #define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
+#endif
+
+#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
+#define AS_TYPE_N_(type, n, x) as_##type##n(x)
+#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
+#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b))
+
+__attribute__((intel_reqd_sub_group_size(16)))
+KERNEL(convolution_gpu_b_fs_yx_fsv16_3x3_ks)(
+    const __global INPUT0_TYPE   *conv_input,
+    __global OUTPUT_TYPE         *output,
+    const __global FILTER_TYPE    *weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE     *biases,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx)
+{
+#if OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 1
+    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0};
+#elif OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 2
+    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
+#elif OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 1
+    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0};
+#else  // OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 2
+    const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+#endif
+
+    const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint out_y = get_global_id(1);
+    const uint out_f = (uint)(get_group_id(2) * 32 + get_sub_group_local_id());
+    const uint subgroup_id = get_sub_group_id();
+    const uint subgroup_local_id = get_sub_group_local_id();
+    const uint feature_offset = subgroup_id * INPUT0_FEATURE_NUM / 4;
+    const uint out_b = (uint)(get_group_id(2) * 32) / OUTPUT_FEATURE_NUM;
+
+    ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * 2] = { 0 };
+    const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, feature_offset, 0, 0);
+    uint diff_filter_idx = 16*3*3*FILTER_IFM_NUM;
+
+    __attribute__((opencl_unroll_hint(1)))
+    for(uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16)/4; k++ ) {
+        __attribute__((opencl_unroll_hint(1)))
+        for(uint j = 0; j < FILTER_SIZE_Y; j++) {
+            uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, feature_offset + k * 16, input_y + j, input_x + tmp[subgroup_local_id]);
+            uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx));
+            
+             __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+            for(uint i = 0; i < FILTER_SIZE_X; i++) {
+
+                uint4 weights_val = vload4(0, (__global uint*)(weights + filter_idx));                
+                uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx + diff_filter_idx));
+
+                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                for(uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) {
+                    const uint ow_offset = ow + OUT_BLOCK_WIDTH;
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2)));
+                    dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3)));
+
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2)));
+                    dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3)));
+                }
+                filter_idx += 16 * 16;
+            }
+        }
+    }
+
+    //k slicing summing up with SLM
+    __local ACCUMULATOR_TYPE partial_acc[16 * OUT_BLOCK_WIDTH * 6];
+    if(subgroup_id == 0)
+    {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+        {
+            partial_acc[16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+        }       
+    }
+    else if(subgroup_id == 1)
+    {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for(uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+        {
+            partial_acc[i * 16 + subgroup_local_id] = dotProd[i];
+            dotProd[i] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    }
+    else if (subgroup_id == 2)
+    {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+        {
+            partial_acc[2 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
+            partial_acc[3 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+
+        }
+    }
+    else if (subgroup_id == 3)
+    {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+        {
+            partial_acc[4 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i];
+            partial_acc[5 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH];
+        }
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (subgroup_id < 2) {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+        {
+            dotProd[i] += partial_acc[(i + subgroup_id * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+            dotProd[i] += partial_acc[(i + (subgroup_id + 2) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+            dotProd[i] += partial_acc[(i + (subgroup_id + 4) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id];
+        }
+#if BIAS_TERM
+    BIAS_TYPE bias = biases[out_f + get_sub_group_id() * 16];
+#endif
+
+#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
+    FUSED_OPS_PRELOAD;
+#endif
+    const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + subgroup_id * 16, out_y, out_x);
+     __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+    for (uint i = 0; i < OUT_BLOCK_WIDTH; i++)
+    {
+        ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0;
+#if BIAS_TERM
+        dequantized = (ACTIVATION_TYPE)dotProd[i] + bias;
+#else
+        dequantized = (ACTIVATION_TYPE)dotProd[i];
+#endif
+#if HAS_FUSED_OPS
+    #if FUSED_OPS_CAN_USE_PRELOAD
+        FUSED_OPS_CALC;
+    #else
+        FUSED_OPS;
+    #endif
+        output[dst_index + i * 16] = FUSED_OPS_RESULT;
+#else
+        output[dst_index + i * 16] = TO_OUTPUT_TYPE(dequantized);
+#endif
+    }
+    }
+}
+
+#undef AS_INPUT0_TYPE_4
+#undef AS_TYPE_N
+#undef AS_TYPE_N_
+#undef MAKE_VECTOR_TYPE
+#undef TO_ACTIVATION_TYPE
+#undef ACTIVATION_TYPE
+#undef TO_ACCUMULATOR_TYPE
+#undef ACCUMULATOR_TYPE
+
+#undef CEIL_DIV
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl

new file mode 100644 (file)

index 0000000..742e544
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl
@@ -0,0 +1,700 @@
+/*
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "include/imad.cl"
+#include "include/data_types.cl"
+#include "include/fetch.cl"
+#include "include/mmad.cl"
+
+// ======================================================================================
+// Host side jit-constants:
+// ======================================================================================
+// SIMD   [{8, 16}] - Sub-group/simd size for the kernel. Used as third dimension of
+//                    local work size.
+// TILE_X [uint] - Number of output values along x dimension calculated by single
+//                 work-item/sub-group.
+// LWS0 [uint] - Local work size 0th dimension.
+// LWS1 [uint] - Local work size 1st dimension.
+// FILTER_BLOCKED - Number of filter spatial elements to process using IMAD. Must be less
+//                  or equal to total filter spatial size.
+//                  Currently only supported to be multiple of 4.
+// ======================================================================================
+// Supported operations:
+// input/output format: any b_fs_yx_fsv<k> - where <k> >= SIMD,
+//                      input and output formats must be the same
+// weights format:      os_i_yxs_oxv<k>_yxsv4 - where <k> same as in input format
+// input data types:   uchar8, char8
+// weights data types: uchar8, char8
+// output data types:  uchar8, char8, half, float
+// asymetric quantization: weights zero points, compensation term
+// ======================================================================================
+
+#if OUTPUT_LAYOUT_B_FS_YX_FSV16
+#   define FSV 16
+#elif OUTPUT_LAYOUT_B_FS_YX_FSV32
+#   define FSV 32
+#else
+#   error convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl - unsupported output layout.
+#endif
+
+#define F_PER_WI ((FSV) / (SIMD))
+
+#define DEQUANTIZED_TYPE float
+#define DEQUANTIZED_TYPE2 MAKE_VECTOR_TYPE(DEQUANTIZED_TYPE, 2)
+#define DEQUANTIZED_TYPE4 MAKE_VECTOR_TYPE(DEQUANTIZED_TYPE, 4)
+
+#define INPUT_TYPE        INPUT0_TYPE
+#define INPUT_TYPE2       MAKE_VECTOR_TYPE(INPUT0_TYPE, 2)
+#define INPUT_TYPE4       MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+#define INPUT_TYPE8       MAKE_VECTOR_TYPE(INPUT0_TYPE, 8)
+#define INPUT_TYPE16      MAKE_VECTOR_TYPE(INPUT0_TYPE, 16)
+
+#define FILTER_TYPE4      MAKE_VECTOR_TYPE(FILTER_TYPE, 4)
+
+#define OUTPUT_TYPE2      MAKE_VECTOR_TYPE(OUTPUT_TYPE, 2)
+#define OUTPUT_TYPE4      MAKE_VECTOR_TYPE(OUTPUT_TYPE, 4)
+#define OUTPUT_TYPE8      MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8)
+#define OUTPUT_TYPE16     MAKE_VECTOR_TYPE(OUTPUT_TYPE, 16)
+
+#define AS_INPUT_TYPE(val)        CAT(as_, INPUT_TYPE)(val)
+#define AS_INPUT_TYPE2(val)       CAT(as_, INPUT_TYPE2)(val)
+#define AS_INPUT_TYPE4(val)       CAT(as_, INPUT_TYPE4)(val)
+#define AS_INPUT_TYPE8(val)       CAT(as_, INPUT_TYPE8)(val)
+#define AS_INPUT_TYPE16(val)      CAT(as_, INPUT_TYPE16)(val)
+
+#define AS_FILTER_TYPE4(val)      CAT(as_, FILTER_TYPE4)(val)
+
+#define TO_DEQUANTIZED_TYPE(val)  CAT(convert_, DEQUANTIZED_TYPE)(val)
+
+#define GET_INPUT_INDEX(b, f, y, x)    INPUT0_GET_INDEX(b, f, y, x)
+#if FSV == 16
+#   define GET_WEIGHTS_INDEX(g, o, i, y, x)  GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(FILTER, g, 0, 0, y, x)
+#else
+#   define GET_WEIGHTS_INDEX(g, o, i, y, x)  GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(FILTER, g, 0, 0, y, x)
+#endif
+#define GET_OUTPUT_INDEX(b, f, y, x)   OUTPUT_GET_INDEX(b, f, y, x)
+#define GET_BIAS_INDEX(b, f, y, x)     BIAS_GET_INDEX(b, f, y, x)
+
+#define INPUT_X_PITCH FSV
+#define INPUT_Y_PITCH (FSV * (INPUT0_SIZE_X + INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X))
+
+#define WEIGHTS_YXS_PITCH (4 * FSV)
+
+#define FILTER_SPATIAL_SIZE (FILTER_SIZE_X * FILTER_SIZE_Y)
+
+#if OUTPUT_TYPE_SIZE == 1
+#   define OUTPUT_BLOCK_WRITE(ptr, val)    BLOCK_WRITE_UC_1((__global uchar*)(ptr), as_uchar(val));
+#   define OUTPUT_BLOCK_WRITE2(ptr, val)   BLOCK_WRITE_UC_2((__global uchar*)(ptr), as_uchar2(val));
+#   define OUTPUT_BLOCK_WRITE4(ptr, val)   BLOCK_WRITE_UC_4((__global uchar*)(ptr), as_uchar4(val));
+#   define OUTPUT_BLOCK_WRITE8(ptr, val)   BLOCK_WRITE_UC_8((__global uchar*)(ptr), as_uchar8(val));
+#   define OUTPUT_BLOCK_WRITE16(ptr, val)  BLOCK_WRITE_UC_16((__global uchar*)(ptr), as_uchar16(val));
+#elif OUTPUT_TYPE_SIZE == 2
+#   define OUTPUT_BLOCK_WRITE(ptr, val)    intel_sub_group_block_write_us((__global ushort*)(ptr), as_ushort(val));
+#   define OUTPUT_BLOCK_WRITE2(ptr, val)   intel_sub_group_block_write_us2((__global ushort*)(ptr), as_ushort2(val));
+#   define OUTPUT_BLOCK_WRITE4(ptr, val)   intel_sub_group_block_write_us4((__global ushort*)(ptr), as_ushort4(val));
+#   define OUTPUT_BLOCK_WRITE8(ptr, val)   intel_sub_group_block_write_us8((__global ushort*)(ptr), as_ushort8(val));
+#   define OUTPUT_BLOCK_WRITE16(ptr, val)                                               \
+    OUTPUT_BLOCK_WRITE8(ptr, (val).lo)                                                  \
+    OUTPUT_BLOCK_WRITE8((__global ushort*)(ptr) + 8 * get_max_sub_group_size(), (val).hi)
+#elif OUTPUT_TYPE_SIZE == 4
+#   define OUTPUT_BLOCK_WRITE(ptr, val)    intel_sub_group_block_write((__global uint*)(ptr), as_uint(val));
+#   define OUTPUT_BLOCK_WRITE2(ptr, val)   intel_sub_group_block_write2((__global uint*)(ptr), as_uint2(val));
+#   define OUTPUT_BLOCK_WRITE4(ptr, val)   intel_sub_group_block_write4((__global uint*)(ptr), as_uint4(val));
+#   define OUTPUT_BLOCK_WRITE8(ptr, val)   intel_sub_group_block_write8((__global uint*)(ptr), as_uint8(val));
+#   define OUTPUT_BLOCK_WRITE16(ptr, val)                                               \
+    OUTPUT_BLOCK_WRITE8(ptr, (val).lo)                                                  \
+    OUTPUT_BLOCK_WRITE8((__global uint*)(ptr) + 8 * get_max_sub_group_size(), (val).hi)
+#else
+#   error convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl - unsupported output type.
+#endif
+
+#define VEC_TO_ARRAY_2(arr, vec, offset)                \
+    (arr)[(offset) + 0] = (vec).s0;                     \
+    (arr)[(offset) + 1] = (vec).s1
+#define VEC_TO_ARRAY_4(arr, vec, offset)                \
+    VEC_TO_ARRAY_2(arr, (vec).s01, offset);             \
+    VEC_TO_ARRAY_2(arr, (vec).s23, (offset) + 2)
+#define VEC_TO_ARRAY_8(arr, vec, offset)                \
+    VEC_TO_ARRAY_4(arr, (vec).s0123, offset);           \
+    VEC_TO_ARRAY_4(arr, (vec).s4567, (offset) + 4)
+#define VEC_TO_ARRAY_16(arr, vec, offset)               \
+    VEC_TO_ARRAY_8(arr, (vec).s01234567, offset);       \
+    VEC_TO_ARRAY_8(arr, (vec).s89abcdef, (offset) + 8)
+
+#define ARRAY_TO_VEC_2(vec, arr, offset)                \
+    (vec).s0 = (arr)[(offset)];                         \
+    (vec).s1 = (arr)[(offset) + 1]
+
+#define ARRAY_TO_VEC_4(vec, arr, offset)                \
+    ARRAY_TO_VEC_2((vec).s01, arr, offset);             \
+    ARRAY_TO_VEC_2((vec).s23, arr, (offset) + 2)
+
+#define ARRAY_TO_VEC_8(vec, arr, offset)                \
+    ARRAY_TO_VEC_4((vec).s0123, arr, offset);           \
+    ARRAY_TO_VEC_4((vec).s4567, arr, (offset) + 4)
+
+#define ARRAY_TO_VEC_16(vec, arr, offset)               \
+    ARRAY_TO_VEC_8((vec).s01234567, arr, offset);       \
+    ARRAY_TO_VEC_8((vec).s89abcdef, arr, (offset) + 8)
+
+#if FILTER_BLOCKED % 4 != 0
+#   error convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl - FILTER_BLOCKED must be multiple of 4.
+#endif
+
+#ifndef OUTPUT_PAD_VALUE
+#   define OUTPUT_PAD_VALUE (OUTPUT_TYPE)(0)
+#   define OUTPUT_PAD_VALUE_undef
+#endif
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS0, LWS1, SIMD)))
+KERNEL(convolution)(
+    const __global  INPUT0_TYPE  *input,
+    __global        OUTPUT_TYPE  *output,
+    const __global  FILTER_TYPE  *weights,
+#if BIAS_TERM
+    const __global BIAS_TYPE     *biases,
+#endif
+#if ASYMMETRIC_WEIGHTS_QUANTIZATION
+    const __global WEIGHTS_ZERO_POINTS_TYPE *weights_zp,
+#endif
+#if ASYMMETRIC_DATA_QUANTIZATION
+    const __global ACTIVATIONS_ZERO_POINTS_TYPE *activations_zp,
+#endif
+#if COMPENSATION_TERM
+    const __global COMPENSATION_TYPE *compensation,
+#endif
+#if HAS_FUSED_OPS_DECLS
+    FUSED_OPS_DECLS,
+#endif
+    uint split_idx
+) {
+    uint x = get_global_id(0) * TILE_X;
+    uint y = get_global_id(1);
+    uint bf = get_group_id(2);
+    uint b = bf % OUTPUT_BATCH_NUM;
+    uint f = bf / OUTPUT_BATCH_NUM * FSV;
+
+    uint input_offset = GET_INPUT_INDEX(b, f, (int)y * STRIDE_SIZE_Y - PADDING_SIZE_Y, (int)x * STRIDE_SIZE_X - PADDING_SIZE_X);
+    uint weights_offset = GET_WEIGHTS_INDEX(f, 0, 0, 0, 0);
+
+    int acc[TILE_X * F_PER_WI] = { };
+#if ASYMMETRIC_WEIGHTS_QUANTIZATION
+    int src_sum[TILE_X * F_PER_WI] = { };
+#endif
+
+    __attribute__((opencl_unroll_hint))
+    for (uint fi = 0; fi < FILTER_BLOCKED / 4 * 4; fi += 4) {
+        // Loop over 4 filter spatials that match imad case
+        uint4 fis = (uint4)(fi, fi + 1, fi + 2, fi + 3);
+
+        uint4 fx = fis % FILTER_SIZE_X;
+        uint4 fy = fis / FILTER_SIZE_X;
+
+        // Input loading:
+        INPUT_TYPE in_trans0[TILE_X * F_PER_WI];
+        INPUT_TYPE in_trans1[TILE_X * F_PER_WI];
+        INPUT_TYPE in_trans2[TILE_X * F_PER_WI];
+        INPUT_TYPE in_trans3[TILE_X * F_PER_WI];
+#if STRIDE_SIZE_X == 1
+        // Without strides block reads can be used to load whole TILE_X inputs
+        // Block read ladder to select optimal combination of block reads for TILE_X
+        uint4 input_x_offset = fx * (DILATION_SIZE_X * INPUT_X_PITCH);
+        uint4 input_y_offset = fy * (DILATION_SIZE_Y * INPUT_Y_PITCH);
+        uint4 input_spatial_offset = input_x_offset + input_y_offset;
+        uint4 input_idx = input_spatial_offset + input_offset;
+
+        uint tx = 0;
+        __attribute__((opencl_unroll_hint))
+        for (; tx + 16 <= TILE_X * F_PER_WI; tx += 16) {
+            INPUT_TYPE16 tmp_in0 = AS_INPUT_TYPE16(BLOCK_READ_UC_16((const __global uchar*)(input + input_idx.s0)));
+            INPUT_TYPE16 tmp_in1 = AS_INPUT_TYPE16(BLOCK_READ_UC_16((const __global uchar*)(input + input_idx.s1)));
+            INPUT_TYPE16 tmp_in2 = AS_INPUT_TYPE16(BLOCK_READ_UC_16((const __global uchar*)(input + input_idx.s2)));
+            INPUT_TYPE16 tmp_in3 = AS_INPUT_TYPE16(BLOCK_READ_UC_16((const __global uchar*)(input + input_idx.s3)));
+
+            VEC_TO_ARRAY_16(in_trans0, tmp_in0, tx);
+            VEC_TO_ARRAY_16(in_trans1, tmp_in1, tx);
+            VEC_TO_ARRAY_16(in_trans2, tmp_in2, tx);
+            VEC_TO_ARRAY_16(in_trans3, tmp_in3, tx);
+
+            input_idx += 16 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 16 >= 8) {
+            INPUT_TYPE8 tmp_in0 = AS_INPUT_TYPE8(BLOCK_READ_UC_8((const __global uchar*)(input + input_idx.s0)));
+            INPUT_TYPE8 tmp_in1 = AS_INPUT_TYPE8(BLOCK_READ_UC_8((const __global uchar*)(input + input_idx.s1)));
+            INPUT_TYPE8 tmp_in2 = AS_INPUT_TYPE8(BLOCK_READ_UC_8((const __global uchar*)(input + input_idx.s2)));
+            INPUT_TYPE8 tmp_in3 = AS_INPUT_TYPE8(BLOCK_READ_UC_8((const __global uchar*)(input + input_idx.s3)));
+
+            VEC_TO_ARRAY_8(in_trans0, tmp_in0, tx);
+            VEC_TO_ARRAY_8(in_trans1, tmp_in1, tx);
+            VEC_TO_ARRAY_8(in_trans2, tmp_in2, tx);
+            VEC_TO_ARRAY_8(in_trans3, tmp_in3, tx);
+
+            input_idx += 8 * SIMD;
+            tx += 8;
+        }
+        if (TILE_X * F_PER_WI % 8 >= 4) {
+            INPUT_TYPE4 tmp_in0 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s0)));
+            INPUT_TYPE4 tmp_in1 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s1)));
+            INPUT_TYPE4 tmp_in2 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s2)));
+            INPUT_TYPE4 tmp_in3 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s3)));
+
+            VEC_TO_ARRAY_4(in_trans0, tmp_in0, tx);
+            VEC_TO_ARRAY_4(in_trans1, tmp_in1, tx);
+            VEC_TO_ARRAY_4(in_trans2, tmp_in2, tx);
+            VEC_TO_ARRAY_4(in_trans3, tmp_in3, tx);
+
+            input_idx += 4 * SIMD;
+            tx += 4;
+        }
+        if (TILE_X * F_PER_WI % 4 >= 2) {
+            INPUT_TYPE2 tmp_in0 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s0)));
+            INPUT_TYPE2 tmp_in1 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s1)));
+            INPUT_TYPE2 tmp_in2 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s2)));
+            INPUT_TYPE2 tmp_in3 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s3)));
+
+            VEC_TO_ARRAY_2(in_trans0, tmp_in0, tx);
+            VEC_TO_ARRAY_2(in_trans1, tmp_in1, tx);
+            VEC_TO_ARRAY_2(in_trans2, tmp_in2, tx);
+            VEC_TO_ARRAY_2(in_trans3, tmp_in3, tx);
+
+            input_idx += 2 * SIMD;
+            tx += 2;
+        }
+        if (TILE_X * F_PER_WI % 2 == 1) {
+            in_trans0[tx] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s0)));
+            in_trans1[tx] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s1)));
+            in_trans2[tx] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s2)));
+            in_trans3[tx] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s3)));
+        }
+#else
+        uint4 input_x_offset = fx * DILATION_SIZE_X * INPUT_X_PITCH;
+        uint4 input_y_offset = fy * DILATION_SIZE_Y * INPUT_Y_PITCH;
+        uint4 input_spatial_offset = input_x_offset + input_y_offset;
+        uint4 input_start_offset = input_spatial_offset + input_offset;
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            uint4 input_idx = input_start_offset + tx * STRIDE_SIZE_X * INPUT_X_PITCH;
+            // Block reads along feature slice
+            uint fw = 0;
+            __attribute__((opencl_unroll_hint))
+            for (; fw + 4 <= F_PER_WI; fw += 4) {
+                INPUT_TYPE4 tmp_in0 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s0)));
+                INPUT_TYPE4 tmp_in1 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s1)));
+                INPUT_TYPE4 tmp_in2 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s2)));
+                INPUT_TYPE4 tmp_in3 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx.s3)));
+
+                VEC_TO_ARRAY_4(in_trans0, tmp_in0, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_4(in_trans1, tmp_in1, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_4(in_trans2, tmp_in2, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_4(in_trans3, tmp_in3, tx * F_PER_WI + fw);
+
+                input_idx += 4 * SIMD;
+            }
+            if (F_PER_WI % 4 >= 2) {
+                INPUT_TYPE2 tmp_in0 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s0)));
+                INPUT_TYPE2 tmp_in1 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s1)));
+                INPUT_TYPE2 tmp_in2 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s2)));
+                INPUT_TYPE2 tmp_in3 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx.s3)));
+
+                VEC_TO_ARRAY_2(in_trans0, tmp_in0, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_2(in_trans1, tmp_in1, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_2(in_trans2, tmp_in2, tx * F_PER_WI + fw);
+                VEC_TO_ARRAY_2(in_trans3, tmp_in3, tx * F_PER_WI + fw);
+
+                input_idx += 2 * SIMD;
+                fw += 2;
+            }
+            if (F_PER_WI % 2 == 1) {
+                in_trans0[tx * F_PER_WI + fw] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s0)));
+                in_trans1[tx * F_PER_WI + fw] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s1)));
+                in_trans2[tx * F_PER_WI + fw] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s2)));
+                in_trans3[tx * F_PER_WI + fw] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx.s3)));
+            }
+        }
+#endif
+        // Weights loading:
+        FILTER_TYPE4 wei[F_PER_WI];
+        __attribute__((opencl_unroll_hint))
+        for (uint fw = 0; fw < F_PER_WI; ++fw) {
+            wei[fw] = AS_FILTER_TYPE4(intel_sub_group_block_read((const __global uint*)(weights + weights_offset) + fw * SIMD));
+        }
+
+        // Transpose input:
+        INPUT_TYPE4 in[TILE_X * F_PER_WI];
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                uint in_offset = tx * F_PER_WI + fw;
+                in[in_offset] = (INPUT_TYPE4)(in_trans0[in_offset], in_trans1[in_offset], in_trans2[in_offset], in_trans3[in_offset]);
+            }
+        }
+
+        // IMAD:
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                acc[tx * F_PER_WI + fw] = IMAD(acc[tx * F_PER_WI + fw], in[tx * F_PER_WI + fw], wei[fw]);
+            }
+        }
+
+#if ASYMMETRIC_WEIGHTS_QUANTIZATION
+        // Accumulate for input values for asymmetric weights:
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                src_sum[tx * F_PER_WI + fw] = IMAD(src_sum[tx * F_PER_WI + fw], in[tx * F_PER_WI + fw], (char4)(1, 1, 1, 1));
+            }
+        }
+#endif
+
+        weights_offset += WEIGHTS_YXS_PITCH;
+    }
+
+
+#if FILTER_BLOCKED < FILTER_SPATIAL_SIZE
+    // Leftovers in filters spatial - use raw multiplication instead of imad
+    // Load inputs before loop to avoid byte scattered reads + there are at most 3 leftovers
+    FILTER_TYPE4 wei[F_PER_WI];
+    __attribute__((opencl_unroll_hint))
+    for (uint fw = 0; fw < F_PER_WI; ++fw) {
+        wei[fw] = AS_FILTER_TYPE4(intel_sub_group_block_read((const __global uint*)(weights + weights_offset) + fw * SIMD));
+    }
+
+    __attribute__((opencl_unroll_hint))
+    for (uint fi = 0; fi < FILTER_SPATIAL_SIZE - FILTER_BLOCKED; ++fi) {
+        // Input loading:
+        uint fx = (fi + FILTER_BLOCKED) % FILTER_SIZE_X;
+        uint fy = (fi + FILTER_BLOCKED) / FILTER_SIZE_X;
+
+        INPUT_TYPE in_trans0[TILE_X * F_PER_WI];
+#   if STRIDE_SIZE_X == 1
+        uint input_x_offset = fx * (DILATION_SIZE_X * INPUT_X_PITCH);
+        uint input_y_offset = fy * (DILATION_SIZE_Y * INPUT_Y_PITCH);
+        uint input_spatial_offset = input_x_offset + input_y_offset;
+        uint input_idx = input_spatial_offset + input_offset;
+
+        uint tx = 0;
+        __attribute__((opencl_unroll_hint))
+        for (; tx + 16 <= TILE_X * F_PER_WI; tx += 16) {
+            INPUT_TYPE16 tmp_in0 = AS_INPUT_TYPE16(BLOCK_READ_UC_16((const __global uchar*)(input + input_idx)));
+            VEC_TO_ARRAY_16(in_trans0, tmp_in0, tx);
+            input_idx += 16 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 16 >= 8) {
+            INPUT_TYPE8 tmp_in0 = AS_INPUT_TYPE8(BLOCK_READ_UC_8((const __global uchar*)(input + input_idx)));
+            VEC_TO_ARRAY_8(in_trans0, tmp_in0, tx);
+            input_idx += 8 * SIMD;
+            tx += 8;
+        }
+        if (TILE_X * F_PER_WI % 8 >= 4) {
+            INPUT_TYPE4 tmp_in0 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx)));
+            VEC_TO_ARRAY_4(in_trans0, tmp_in0, tx);
+            input_idx += 4 * SIMD;
+            tx += 4;
+        }
+        if (TILE_X * F_PER_WI % 4 >= 2) {
+            INPUT_TYPE2 tmp_in0 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx)));
+            VEC_TO_ARRAY_2(in_trans0, tmp_in0, tx);
+            input_idx += 2 * SIMD;
+            tx += 2;
+        }
+        if (TILE_X * F_PER_WI % 2 == 1) {
+            in_trans0[tx] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx)));
+        }
+#   else
+        uint input_x_offset = fx * DILATION_SIZE_X * INPUT_X_PITCH;
+        uint input_y_offset = fy * DILATION_SIZE_Y * INPUT_Y_PITCH;
+        uint input_spatial_offset = input_x_offset + input_y_offset;
+        uint input_start_offset = input_spatial_offset + input_offset;
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            uint input_idx = input_start_offset + tx * STRIDE_SIZE_X * INPUT_X_PITCH;
+            uint fw = 0;
+            __attribute__((opencl_unroll_hint))
+            for (; fw + 4 <= F_PER_WI; fw += 4) {
+                INPUT_TYPE4 tmp_in0 = AS_INPUT_TYPE4(BLOCK_READ_UC_4((const __global uchar*)(input + input_idx)));
+                VEC_TO_ARRAY_4(in_trans0, tmp_in0, tx * F_PER_WI + fw);
+                input_idx += 4 * SIMD;
+            }
+            if (F_PER_WI % 4 >= 2) {
+                INPUT_TYPE2 tmp_in0 = AS_INPUT_TYPE2(BLOCK_READ_UC_2((const __global uchar*)(input + input_idx)));
+                VEC_TO_ARRAY_2(in_trans0, tmp_in0, tx * F_PER_WI + fw);
+                input_idx += 2 * SIMD;
+                fw += 2;
+            }
+            if (F_PER_WI % 2 == 1) {
+                in_trans0[tx * F_PER_WI + fw] = AS_INPUT_TYPE(BLOCK_READ_UC_1((const __global uchar*)(input + input_idx)));
+            }
+        }
+#   endif
+        // Raw multiply accumulate:
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                acc[tx * F_PER_WI + fw] += (int)in_trans0[tx * F_PER_WI + fw] * (int)wei[fw][fi];
+            }
+        }
+
+#if ASYMMETRIC_WEIGHTS_QUANTIZATION
+        // Accumulate input values for asymmetric weights:
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                src_sum[tx * F_PER_WI + fw] += (int)in_trans0[tx * F_PER_WI + fw];
+            }
+        }
+#endif
+    }
+#endif
+
+    DEQUANTIZED_TYPE dequantized[TILE_X * F_PER_WI];
+    for (uint tx = 0; tx < TILE_X * F_PER_WI; ++tx) {
+        dequantized[tx] = TO_DEQUANTIZED_TYPE(acc[tx]);
+    }
+
+#if BIAS_TERM
+#   if BIAS_PER_OFM
+    __attribute__((opencl_unroll_hint))
+    for (uint fw = 0; fw < F_PER_WI; ++fw) {
+        uint bias_offset = f + fw * SIMD + get_sub_group_local_id();
+        BIAS_TYPE bias = biases[bias_offset];
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            dequantized[tx * F_PER_WI + fw] += TO_DEQUANTIZED_TYPE(bias);
+        }
+    }
+#   elif BIAS_PER_OUTPUT
+    __attribute__((opencl_unroll_hint))
+    for (uint tx = 0; tx < TILE_X; ++tx) {
+        __attribute__((opencl_unroll_hint))
+        for (uint fw = 0; fw < F_PER_WI; ++fw) {
+            uint bias_offset = GET_BIAS_INDEX(b, f + fw * SIMD + get_sub_group_local_id(), y, x + tx);
+            BIAS_TYPE bias = biases[bias_offset];
+            dequantized[tx * F_PER_WI + fw] += TO_DEQUANTIZED_TYPE(bias);
+        }
+    }
+#   else
+#       error convolution_gpu_b_fs_yx_fsv_16_32_imad_dw.cl - unsupported bias mode.
+#   endif
+#endif
+
+#if ASYMMETRIC_WEIGHTS_QUANTIZATION
+    {
+        __attribute__((opencl_unroll_hint))
+        for (uint fw = 0; fw < F_PER_WI; ++fw) {
+            WEIGHTS_ZERO_POINTS_TYPE wzp = weights_zp[f + fw * SIMD + get_sub_group_local_id()];
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+                dequantized[tx * F_PER_WI + fw] -= TO_DEQUANTIZED_TYPE(src_sum[tx * F_PER_WI + fw]) * TO_DEQUANTIZED_TYPE(wzp);
+            }
+        }
+    }
+#endif
+
+#if COMPENSATION_TERM
+    {
+        __attribute__((opencl_unroll_hint))
+        for (uint fw = 0; fw < F_PER_WI; ++fw) {
+            COMPENSATION_TYPE comp = compensation[f + fw * SIMD + get_sub_group_local_id()];
+            __attribute__((opencl_unroll_hint))
+            for (uint tx = 0; tx < TILE_X; ++tx) {
+                dequantized[tx * F_PER_WI + fw] += TO_DEQUANTIZED_TYPE(comp);
+            }
+        }
+    }
+#endif
+
+    OUTPUT_TYPE out[TILE_X * F_PER_WI];
+    // Fused ops and conversion to output type
+    __attribute__((opencl_unroll_hint))
+    for (uint tx = 0; tx < TILE_X; ++tx) {
+#if HAS_FUSED_OPS
+        uint fused_ops_x = x + tx;
+        uint fused_ops_f = f;
+        uint fw = 0;
+        __attribute__((opencl_unroll_hint))
+        for (; fw + 4 <= F_PER_WI; fw += 4) {
+            DEQUANTIZED_TYPE4 fused_ops_in;
+            ARRAY_TO_VEC_4(fused_ops_in, dequantized, tx * F_PER_WI + fw);
+            FUSED_OPS_4;
+            VEC_TO_ARRAY_4(out, FUSED_OPS_RESULT_4, tx * F_PER_WI + fw);
+            fused_ops_f += 4 * SIMD;
+        }
+        if (F_PER_WI % 4 >= 2) {
+            DEQUANTIZED_TYPE2 fused_ops_in;
+            ARRAY_TO_VEC_2(fused_ops_in, dequantized, tx * F_PER_WI + fw);
+            FUSED_OPS_2;
+            VEC_TO_ARRAY_2(out, FUSED_OPS_RESULT_2, tx * F_PER_WI + fw);
+            fw += 2;
+            fused_ops_f += 2 * SIMD;
+        }
+        if (F_PER_WI % 2 == 1) {
+            DEQUANTIZED_TYPE fused_ops_in;
+            fused_ops_in = dequantized[tx * F_PER_WI + fw];
+            FUSED_OPS_1;
+            out[tx * F_PER_WI + fw] = FUSED_OPS_RESULT_1;
+        }
+#else
+        __attribute__((opencl_unroll_hint))
+        for (uint fw = 0; fw < F_PER_WI; ++fw) {
+            out[tx * F_PER_WI + fw] = TO_OUTPUT_TYPE(dequantized[tx * F_PER_WI + fw]);
+        }
+#endif
+    }
+
+    // Fill results outside output in features with OUTPUT_PAD_VALUE.
+    if (OUTPUT_FEATURE_NUM % FSV != 0 && f + FSV > OUTPUT_FEATURE_NUM) {
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            __attribute__((opencl_unroll_hint))
+            for (uint fw = 0; fw < F_PER_WI; ++fw) {
+                bool outside = fw * SIMD + get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % FSV;
+                out[tx * F_PER_WI + fw] = outside ? OUTPUT_PAD_VALUE : out[tx * F_PER_WI + fw];
+            }
+        }
+    }
+
+    uint output_offset = GET_OUTPUT_INDEX(b, f, y, x);
+
+    if (OUTPUT_SIZE_X % TILE_X == 0 || x + TILE_X <= OUTPUT_SIZE_X) {
+        // Full output tile x write using block write ladder
+        uint tx = 0;
+        __attribute__((opencl_unroll_hint))
+        for (; tx + 16 <= TILE_X * F_PER_WI; tx += 16) {
+            OUTPUT_TYPE16 tmp_write;
+            ARRAY_TO_VEC_16(tmp_write, out, tx);
+            OUTPUT_BLOCK_WRITE16(output + output_offset, tmp_write);
+            output_offset += 16 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 16 >= 8) {
+            OUTPUT_TYPE8 tmp_write;
+            ARRAY_TO_VEC_8(tmp_write, out, tx);
+            OUTPUT_BLOCK_WRITE8(output + output_offset, tmp_write);
+            tx += 8;
+            output_offset += 8 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 8 >= 4) {
+            OUTPUT_TYPE4 tmp_write;
+            ARRAY_TO_VEC_4(tmp_write, out, tx);
+            OUTPUT_BLOCK_WRITE4(output + output_offset, tmp_write);
+            tx += 4;
+            output_offset += 4 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 4 >= 2) {
+            OUTPUT_TYPE2 tmp_write;
+            ARRAY_TO_VEC_2(tmp_write, out, tx);
+            OUTPUT_BLOCK_WRITE2(output + output_offset, tmp_write);
+            tx += 2;
+            output_offset += 2 * SIMD;
+        }
+        if (TILE_X * F_PER_WI % 2 == 1) {
+            OUTPUT_BLOCK_WRITE(output + output_offset, out[tx]);
+        }
+    } else {
+        // Leftovers write, block writes in f dimension only
+        __attribute__((opencl_unroll_hint))
+        for (uint tx = 0; tx < TILE_X; ++tx) {
+            if (tx < OUTPUT_SIZE_X % TILE_X) {
+                uint fw = 0;
+                __attribute__((opencl_unroll_hint))
+                for (; fw + 4 <= F_PER_WI; fw += 4) {
+                    OUTPUT_TYPE4 tmp_write;
+                    ARRAY_TO_VEC_4(tmp_write, out, tx * F_PER_WI + fw);
+                    OUTPUT_BLOCK_WRITE4(output + output_offset + fw * SIMD, tmp_write);
+                }
+                if (F_PER_WI % 4 >= 2) {
+                    OUTPUT_TYPE2 tmp_write;
+                    ARRAY_TO_VEC_2(tmp_write, out, tx * F_PER_WI + fw);
+                    OUTPUT_BLOCK_WRITE2(output + output_offset + fw * SIMD, tmp_write);
+                    fw += 2;
+                }
+                if (F_PER_WI % 2 == 1) {
+                    OUTPUT_BLOCK_WRITE(output + output_offset + fw * SIMD, out[tx * F_PER_WI + fw]);
+                }
+            }
+            output_offset += FSV;
+        }
+    }
+}
+
+#undef FSV
+
+#undef F_PER_WI
+
+#undef DEQUANTIZED_TYPE
+#undef DEQUANTIZED_TYPE2
+#undef DEQUANTIZED_TYPE4
+
+#undef INPUT_TYPE
+#undef INPUT_TYPE2
+#undef INPUT_TYPE4
+#undef INPUT_TYPE8
+#undef INPUT_TYPE16
+
+#undef FILTER_TYPE4
+
+#undef OUTPUT_TYPE2
+#undef OUTPUT_TYPE4
+#undef OUTPUT_TYPE8
+#undef OUTPUT_TYPE16
+
+#undef AS_INPUT_TYPE
+#undef AS_INPUT_TYPE2
+#undef AS_INPUT_TYPE4
+#undef AS_INPUT_TYPE8
+#undef AS_INPUT_TYPE16
+
+#undef AS_FILTER_TYPE
+
+#undef TO_DEQUANTIZED_TYPE
+
+#undef GET_INPUT_INDEX
+#undef GET_WEIGHTS_INDEX
+#undef GET_OUTPUT_INDEX
+
+#undef INPUT_X_PITCH
+#undef INPUT_Y_PITCH
+
+#undef WEIGHTS_YXS_PITCH 
+
+#undef FILTER_SPATIAL_SIZE
+
+#undef OUTPUT_BLOCK_WRITE
+#undef OUTPUT_BLOCK_WRITE2
+#undef OUTPUT_BLOCK_WRITE4
+#undef OUTPUT_BLOCK_WRITE8
+#undef OUTPUT_BLOCK_WRITE16
+
+#undef VEC_TO_ARRAY_2
+#undef VEC_TO_ARRAY_4
+#undef VEC_TO_ARRAY_8
+#undef VEC_TO_ARRAY_16
+
+#undef ARRAY_TO_VEC_2
+#undef ARRAY_TO_VEC_4
+#undef ARRAY_TO_VEC_8
+#undef ARRAY_TO_VEC_16
+
+#ifdef OUTPUT_PAD_VALUE_undef
+#   undef OUTPUT_PAD_VALUE
+#   undef OUTPUT_PAD_VALUE_undef
+#endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_iyxo.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_iyxo.cl

new file mode 100644 (file)

index 0000000..a29ee09
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_iyxo.cl
@@ -0,0 +1,119 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+#include "include/fetch.cl"
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
+KERNEL(convolution_gpu_bfyx_iyxo_5x5)(
+    const __global UNIT_TYPE* input,
+    __global UNIT_TYPE* output,
+    const __global UNIT_TYPE* weights,
+#if BIAS_TERM
+    const __global UNIT_TYPE* bias,
+#endif
+    uint split_idx)
+{
+    const uint idx = 4 * ((uint)get_global_id(0) * 16 + (uint)get_global_id(2));
+    const uint idy = (uint)get_global_id(1);
+    uint filter_idx = 0;
+    uint output_idx = 0;
+    uint input_idx = 0;
+    UNIT_TYPE inp[8] = { 0 };
+
+#if FILTER_OFM_NUM > 16
+#define FILTER_OFM_MAX 16
+#else
+#define FILTER_OFM_MAX FILTER_OFM_NUM
+#endif
+    __attribute__((opencl_unroll_hint(1)))
+        for (int iter = 0; iter < FILTER_OFM_NUM / FILTER_OFM_MAX + (FILTER_OFM_NUM % FILTER_OFM_MAX != 0); iter++) {
+            UNIT_TYPE out1[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out2[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out3[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out4[FILTER_OFM_MAX] = { 0 };
+
+            filter_idx = FILTER_OFM_MAX * iter;
+
+            __attribute__((opencl_unroll_hint(FILTER_IFM_NUM)))
+                for (int ifm = 0; ifm < FILTER_IFM_NUM; ifm++) {
+                    __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+                        for (int yy = 0; yy < FILTER_SIZE_Y; yy++) {
+                            uint inp_idx = ifm * (INPUT0_FEATURE_PITCH)+(idy + yy) * (INPUT0_Y_PITCH)+idx;
+                            half8 tmp = as_half8(vload4(0, (__global uint*)(input + inp_idx)));
+
+                            inp[0] = tmp.s0;
+                            inp[1] = tmp.s1;
+                            inp[2] = tmp.s2;
+                            inp[3] = tmp.s3;
+                            inp[4] = tmp.s4;
+                            inp[5] = tmp.s5;
+                            inp[6] = tmp.s6;
+                            inp[7] = tmp.s7;
+
+                            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+                                for (int xx = 0; xx < FILTER_SIZE_X; xx++) {
+#if FILTER_OFM_NUM == 4
+                                    half4 w = as_half4(vload2(0, (__global uint*)(weights + filter_idx)));
+#elif FILTER_OFM_NUM == 8
+                                    half8 w = as_half8(vload4(0, (__global uint*)(weights + filter_idx)));
+#else
+                                    half16 w = as_half16(vload8(0, (__global uint*)(weights + filter_idx)));
+#endif
+                                    __attribute__((opencl_unroll_hint(FILTER_OFM_MAX)))
+                                        for (int ofm = 0; ofm < FILTER_OFM_MAX; ofm++) {
+                                            out1[ofm] = mad(inp[0 + xx], w[ofm], out1[ofm]);
+                                            out2[ofm] = mad(inp[1 + xx], w[ofm], out2[ofm]);
+                                            out3[ofm] = mad(inp[2 + xx], w[ofm], out3[ofm]);
+                                            out4[ofm] = mad(inp[3 + xx], w[ofm], out4[ofm]);
+                                        }
+                                    filter_idx += FILTER_OFM_NUM;
+                                }
+                        }
+                }
+
+            __attribute__((opencl_unroll_hint(FILTER_OFM_MAX)))
+                for (int ofm = 0; ofm < FILTER_OFM_MAX; ofm++) {
+#if BIAS_TERM
+                    out1[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out2[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out3[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out4[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+#endif
+                    out1[ofm] = ACTIVATION(out1[ofm], ACTIVATION_PARAMS);
+                    out2[ofm] = ACTIVATION(out2[ofm], ACTIVATION_PARAMS);
+                    out3[ofm] = ACTIVATION(out3[ofm], ACTIVATION_PARAMS);
+                    out4[ofm] = ACTIVATION(out4[ofm], ACTIVATION_PARAMS);
+                    output_idx = (iter * FILTER_OFM_MAX * OUTPUT_FEATURE_PITCH) + ofm * OUTPUT_FEATURE_PITCH +
+                        idy * OUTPUT_Y_PITCH + idx;
+#if OUTPUT_OFFSET > 0
+#if (OUTPUT_OFFSET % 2) > 0
+                    output[output_idx + OUTPUT_OFFSET + 0] = out1[ofm];
+                    output[output_idx + OUTPUT_OFFSET + 1] = out2[ofm];
+                    output[output_idx + OUTPUT_OFFSET + 2] = out3[ofm];
+                    output[output_idx + OUTPUT_OFFSET + 3] = out4[ofm];
+#else
+                    __global float* out_fl = output + output_idx + OUTPUT_OFFSET;
+                    out_fl[0] = as_float((half2)(out1[ofm], out2[ofm]));
+                    out_fl[1] = as_float((half2)(out3[ofm], out4[ofm]));
+#endif
+#else
+                    vstore2((float2)(as_float((half2)(out1[ofm], out2[ofm])), as_float((half2)(out3[ofm], out4[ofm]))),
+                        0, (__global float*)(output + output_idx));
+#endif
+                }
+        }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_fs_byx_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_fs_byx_fsv32.cl

index b48d8cd..db9b893 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_fs_byx_fsv32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_fs_byx_fsv32.cl
@@ -99,8 +99,7 @@ KERNEL(convolution_gpu_bfyx_to_fs_byx_fsv32)(
          out[out_i] = UNIT_VAL_ZERO;
      }
  
-    uint input_offset = INPUT0_OFFSET_WITH_PADDING;
-    input_offset += oc * STRIDE_SIZE_X + INPUT0_PADDING_OFFSET_SIZE_X;
+    uint input_offset = oc * STRIDE_SIZE_X + INPUT0_PADDING_OFFSET_SIZE_X;
      input_offset += (or * STRIDE_SIZE_Y + INPUT0_PADDING_OFFSET_SIZE_Y) * INPUT0_SIZE_X_WITH_PADDING;
      input_offset += b * INPUT0_BATCH_PITCH;
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_b_fs_yx_fsv32_dw.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_b_fs_yx_fsv32_dw.cl

index f912ad1..b679854 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_b_fs_yx_fsv32_dw.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_b_fs_yx_fsv32_dw.cl
@@ -73,7 +73,7 @@ KERNEL(convolution_mmad_b_fs_yx_fsv32_dw)(
                      in = input[input_idx];
  #if ASYMMETRIC_DATA_QUANTIZATION
                  else
-                    in = activations_zp[k];
+                    in = activations_zp[g*FILTER_IFM_NUM + k];
  #endif
  
                  uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_block2_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_block2_opt.cl

new file mode 100644 (file)

index 0000000..9ce9302
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_block2_opt.cl
@@ -0,0 +1,44 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(depth_to_space_block2_opt)(const __global half* input, __global half* output)
+{
+    const int in_height  = get_global_size(1);
+    const int2 pos = { get_global_id(0), get_global_id(1) };
+
+    if (pos.x >= (IN_WIDTH) || pos.y >= in_height) return;
+
+    const int offset = IN_WIDTH * in_height;
+
+    __attribute__((opencl_unroll_hint(OUTPUT_FEATURE_NUM)))
+    for (uint ofm_id=0; ofm_id < OUTPUT_FEATURE_NUM; ofm_id++){
+        int add_off = offset * 2 * ofm_id * BLOCK_SIZE * BLOCK_SIZE;
+        int ofm_x_offset = offset * ofm_id;
+        const int inIdx = IN_WIDTH * pos.y + pos.x + ofm_x_offset;
+
+        half2 conv_out_0 = ACTIVATION(vload2(inIdx+(offset * 0 * OUTPUT_FEATURE_NUM), input ), ACTIVATION_PARAMS);
+        half2 conv_out_1 = ACTIVATION(vload2(inIdx+(offset * 1 * OUTPUT_FEATURE_NUM), input ), ACTIVATION_PARAMS);
+        half2 conv_out_2 = ACTIVATION(vload2(inIdx+(offset * 2 * OUTPUT_FEATURE_NUM), input ), ACTIVATION_PARAMS);
+        half2 conv_out_3 = ACTIVATION(vload2(inIdx+(offset * 3 * OUTPUT_FEATURE_NUM), input ), ACTIVATION_PARAMS);
+        
+        int outIdx1 = IN_WIDTH * BLOCK_SIZE * pos.y + pos.x;
+        int outIdx2 = outIdx1 + IN_WIDTH;
+
+        vstore2((float2)(as_float((half2)(conv_out_0.s0, conv_out_1.s0)), as_float((half2)(conv_out_0.s1, conv_out_1.s1))), outIdx1, (__global float*) (output + add_off));
+        vstore2((float2)(as_float((half2)(conv_out_2.s0, conv_out_3.s0)), as_float((half2)(conv_out_2.s1, conv_out_3.s1))), outIdx2, (__global float*) (output + add_off));
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_iyxo.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_iyxo.cl

new file mode 100644 (file)

index 0000000..f45cc3f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_iyxo.cl
@@ -0,0 +1,199 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+#include "include/fetch.cl"
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
+KERNEL(fused_conv_eltwise_gpu_bfyx_iyxo)(
+    const __global UNIT_TYPE* input,
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+    write_only image2d_t output,
+#else
+    __global UNIT_TYPE* output,
+#endif
+    const __global UNIT_TYPE* weights,
+#if BIAS_TERM
+    const __global UNIT_TYPE* bias,
+#endif
+    uint split_idx,
+    const __global UNIT_TYPE* eltw_input)
+{
+    const uint idx = 4 * ((uint)get_global_id(0) * 16 + (uint)get_global_id(2));
+    const uint idy = (uint)get_global_id(1);
+    uint filter_idx = 0;
+    uint output_idx = 0;
+    uint output_idx_eltwise = 0;
+    uint input_idx = 0;
+    UNIT_TYPE inp[8] = { 0 };
+    const uint input0_pitch_Y = INPUT0_SIZE_X + 2 * (INPUT0_PAD_BEFORE_SIZE_X);
+    const uint input0_pitch_feature = input0_pitch_Y * (INPUT0_SIZE_Y + 2 * (INPUT0_PAD_BEFORE_SIZE_Y));
+
+#if FILTER_OFM_NUM > 16
+#define FILTER_OFM_MAX 16
+#else
+#define FILTER_OFM_MAX FILTER_OFM_NUM
+#endif
+    __attribute__((opencl_unroll_hint(1)))
+        for (int iter = 0; iter < FILTER_OFM_NUM / FILTER_OFM_MAX + (FILTER_OFM_NUM % FILTER_OFM_MAX != 0); iter++) {
+            UNIT_TYPE out1[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out2[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out3[FILTER_OFM_MAX] = { 0 };
+            UNIT_TYPE out4[FILTER_OFM_MAX] = { 0 };
+
+            filter_idx = FILTER_OFM_MAX * iter;
+
+            __attribute__((opencl_unroll_hint(FILTER_IFM_NUM)))
+                for (int ifm = 0; ifm < FILTER_IFM_NUM; ifm++) {
+                    __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+                        for (int yy = 0; yy < FILTER_SIZE_Y; yy++) {
+                            uint inp_idx = ifm * input0_pitch_feature + (idy + yy) * input0_pitch_Y + idx;
+                            half8 tmp = as_half8(vload4(0, (__global uint*)(input + inp_idx)));
+
+                            inp[0] = tmp.s0;
+                            inp[1] = tmp.s1;
+                            inp[2] = tmp.s2;
+                            inp[3] = tmp.s3;
+                            inp[4] = tmp.s4;
+                            inp[5] = tmp.s5;
+                            inp[6] = tmp.s6;
+                            inp[7] = tmp.s7;
+
+                            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+                                for (int xx = 0; xx < FILTER_SIZE_X; xx++) {
+#if FILTER_OFM_NUM == 4
+                                    half4 w = as_half4(vload2(0, (__global uint*)(weights + filter_idx)));
+#elif FILTER_OFM_NUM == 8
+                                    half8 w = as_half8(vload4(0, (__global uint*)(weights + filter_idx)));
+#else
+                                    half16 w = as_half16(vload8(0, (__global uint*)(weights + filter_idx)));
+#endif
+                                    __attribute__((opencl_unroll_hint(FILTER_OFM_MAX)))
+                                        for (int ofm = 0; ofm < FILTER_OFM_MAX; ofm++) {
+                                            out1[ofm] = mad(inp[0 + xx], w[ofm], out1[ofm]);
+                                            out2[ofm] = mad(inp[1 + xx], w[ofm], out2[ofm]);
+                                            out3[ofm] = mad(inp[2 + xx], w[ofm], out3[ofm]);
+                                            out4[ofm] = mad(inp[3 + xx], w[ofm], out4[ofm]);
+                                        }
+                                    filter_idx += FILTER_OFM_NUM;
+                                }
+                        }
+                }
+
+            __attribute__((opencl_unroll_hint(FILTER_OFM_MAX)))
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+                for (int ofm = 0; ofm < FILTER_OFM_MAX; ofm+=3) {
+#else
+                for (int ofm = 0; ofm < FILTER_OFM_MAX; ofm++) {
+#endif
+#if BIAS_TERM
+                    out1[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out2[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out3[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+                    out4[ofm] += bias[(iter * FILTER_OFM_MAX) + ofm];
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+                    out1[ofm + 1] += bias[(iter * FILTER_OFM_MAX) + ofm + 1];
+                    out2[ofm + 1] += bias[(iter * FILTER_OFM_MAX) + ofm + 1];
+                    out3[ofm + 1] += bias[(iter * FILTER_OFM_MAX) + ofm + 1];
+                    out4[ofm + 1] += bias[(iter * FILTER_OFM_MAX) + ofm + 1];
+
+                    out1[ofm + 2] += bias[(iter * FILTER_OFM_MAX) + ofm + 2];
+                    out2[ofm + 2] += bias[(iter * FILTER_OFM_MAX) + ofm + 2];
+                    out3[ofm + 2] += bias[(iter * FILTER_OFM_MAX) + ofm + 2];
+                    out4[ofm + 2] += bias[(iter * FILTER_OFM_MAX) + ofm + 2];
+#endif
+#endif
+                    out1[ofm] = ACTIVATION(out1[ofm], ACTIVATION_PARAMS);
+                    out2[ofm] = ACTIVATION(out2[ofm], ACTIVATION_PARAMS);
+                    out3[ofm] = ACTIVATION(out3[ofm], ACTIVATION_PARAMS);
+                    out4[ofm] = ACTIVATION(out4[ofm], ACTIVATION_PARAMS);
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+                    out1[ofm + 1] = ACTIVATION(out1[ofm + 1], ACTIVATION_PARAMS);
+                    out2[ofm + 1] = ACTIVATION(out2[ofm + 1], ACTIVATION_PARAMS);
+                    out3[ofm + 1] = ACTIVATION(out3[ofm + 1], ACTIVATION_PARAMS);
+                    out4[ofm + 1] = ACTIVATION(out4[ofm + 1], ACTIVATION_PARAMS);
+
+                    out1[ofm + 2] = ACTIVATION(out1[ofm + 2], ACTIVATION_PARAMS);
+                    out2[ofm + 2] = ACTIVATION(out2[ofm + 2], ACTIVATION_PARAMS);
+                    out3[ofm + 2] = ACTIVATION(out3[ofm + 2], ACTIVATION_PARAMS);
+                    out4[ofm + 2] = ACTIVATION(out4[ofm + 2], ACTIVATION_PARAMS);
+#endif
+                    uint ofm_alignment = 4;
+                    int idx_for_image = 0;
+                    int idy_for_image = 0;
+
+                    if (ofm / OUTPUT_FEATURE_NUM == 0) {
+                        output_idx_eltwise = (iter * FILTER_OFM_MAX * OUTPUT_FEATURE_PITCH) + (ofm % OUTPUT_FEATURE_NUM) * OUTPUT_FEATURE_PITCH +
+                            2 * idy * OUTPUT_Y_PITCH + 2 * idx;
+                        output_idx = (ofm % OUTPUT_FEATURE_NUM) + 2 * idy * OUTPUT_SIZE_X * ofm_alignment + 2 * idx * ofm_alignment;
+                        idx_for_image = 2 * idx;
+                        idy_for_image = 2 * idy;
+                    }
+                    else if (ofm / OUTPUT_FEATURE_NUM == 1) {
+                        output_idx_eltwise = (iter * FILTER_OFM_MAX * OUTPUT_FEATURE_PITCH) + (ofm % OUTPUT_FEATURE_NUM) * OUTPUT_FEATURE_PITCH +
+                            2 * idy * OUTPUT_Y_PITCH + 2 * idx + 1;
+                        output_idx = (ofm % OUTPUT_FEATURE_NUM) + 2 * idy * OUTPUT_SIZE_X * ofm_alignment + (2 * idx + 1) * ofm_alignment;
+                        idx_for_image = 2 * idx + 1;
+                        idy_for_image = 2 * idy;
+                    }
+                    else if (ofm / OUTPUT_FEATURE_NUM == 2) {
+                        output_idx_eltwise = (iter * FILTER_OFM_MAX * OUTPUT_FEATURE_PITCH) + (ofm % OUTPUT_FEATURE_NUM) * OUTPUT_FEATURE_PITCH +
+                            (2 * idy + 1) * OUTPUT_Y_PITCH + 2 * idx;
+                        output_idx = (ofm % OUTPUT_FEATURE_NUM) + (2 * idy + 1) * OUTPUT_SIZE_X * ofm_alignment + 2 * idx * ofm_alignment;
+                        idx_for_image = 2 * idx;
+                        idy_for_image = 2 * idy + 1;
+                    }
+                    else if (ofm / OUTPUT_FEATURE_NUM == 3) {
+                        output_idx_eltwise = (iter * FILTER_OFM_MAX * OUTPUT_FEATURE_PITCH) + (ofm % OUTPUT_FEATURE_NUM) * OUTPUT_FEATURE_PITCH +
+                            (2 * idy + 1) * OUTPUT_Y_PITCH + 2 * idx + 1;
+                        output_idx = (ofm % OUTPUT_FEATURE_NUM) + (2 * idy + 1) * OUTPUT_SIZE_X * ofm_alignment + (2 * idx + 1) * ofm_alignment;
+                        idx_for_image = 2 * idx + 1;
+                        idy_for_image = 2 * idy + 1;
+                    }
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+                    half4 output_half1 = {
+                        out1[ofm + 0] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 0 + OUTPUT_FEATURE_PITCH * 0],
+                        out1[ofm + 1] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 0 + OUTPUT_FEATURE_PITCH * 1],
+                        out1[ofm + 2] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 0 + OUTPUT_FEATURE_PITCH * 2],
+                        0 };
+                    IMAGE_WRITE(output, (int2)(idx_for_image, idy_for_image), output_half1);
+                    half4 output_half2 = {
+                        out2[ofm + 0] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 2 + OUTPUT_FEATURE_PITCH * 0],
+                        out2[ofm + 1] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 2 + OUTPUT_FEATURE_PITCH * 1],
+                        out2[ofm + 2] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 2 + OUTPUT_FEATURE_PITCH * 2],
+                        0 };
+                    IMAGE_WRITE(output, (int2)(idx_for_image +2, idy_for_image), output_half2);
+                    half4 output_half3 = {
+                        out3[ofm + 0] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 4 + OUTPUT_FEATURE_PITCH * 0],
+                        out3[ofm + 1] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 4 + OUTPUT_FEATURE_PITCH * 1],
+                        out3[ofm + 2] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 4 + OUTPUT_FEATURE_PITCH * 2],
+                        0 };
+                    IMAGE_WRITE(output, (int2)(idx_for_image+4, idy_for_image), output_half3);
+                    half4 output_half4 = {
+                        out4[ofm + 0] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 6 + OUTPUT_FEATURE_PITCH * 0],
+                        out4[ofm + 1] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 6 + OUTPUT_FEATURE_PITCH * 1],
+                        out4[ofm + 2] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 6 + OUTPUT_FEATURE_PITCH * 2],
+                        0 };
+                    IMAGE_WRITE(output, (int2)(idx_for_image+6, idy_for_image), output_half4);
+#else
+                    output[output_idx_eltwise + OUTPUT_OFFSET + 0] = out1[ofm] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 0];
+                    output[output_idx_eltwise + OUTPUT_OFFSET + 2] = out2[ofm] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 2];
+                    output[output_idx_eltwise + OUTPUT_OFFSET + 4] = out3[ofm] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 4];
+                    output[output_idx_eltwise + OUTPUT_OFFSET + 6] = out4[ofm] + eltw_input[output_idx_eltwise + OUTPUT_OFFSET + 6];
+#endif
+                }
+        }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_imad.cl

index 2911701..2e622e9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_imad.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_imad.cl
@@ -36,13 +36,9 @@
  // Input reading operation is always blocked.
  #define BLOCK_LOAD_INPUTS
  
-// for now kernel stride is square
-#define K_WSTRIDE K_STRIDE
-#define K_HSTRIDE K_STRIDE
-
  // need KERNEL width for first output + STRIDE more for each additional.
-#define IN_BLOCK_WIDTH  (K_WIDTH  + K_WSTRIDE * (OUT_BLOCK_WIDTH  - 1))
-#define IN_BLOCK_HEIGHT (K_HEIGHT + K_HSTRIDE * (OUT_BLOCK_HEIGHT - 1))
+#define IN_BLOCK_WIDTH  ((FILTER_SIZE_X - 1) * DILATION_SIZE_X + STRIDE_SIZE_X * (OUT_BLOCK_WIDTH  - 1) + 1)
+#define IN_BLOCK_HEIGHT ((FILTER_SIZE_Y - 1) * DILATION_SIZE_Y + STRIDE_SIZE_Y * (OUT_BLOCK_HEIGHT - 1) + 1)
  
  // for imad we are packing 4 8bit activations per 32 bit SIMD lane
  // if we later add 4bit, then PACK would be 8.
@@ -51,13 +47,22 @@
  #define AS_TYPE_N_(type, n, x) as_##type##n(x)
  #define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
  #define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
+#define AS_FILTER_TYPE_4(x) AS_TYPE_N(FILTER_TYPE, 4, x)
+
+#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
+#define ALIGN(a, b) ((a % b == 0) ? a : a - a % b + b)
  
  // int8 conv_input and weights data is packed to int32 "batches",
  // int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience
  __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, SIMD_SIZE)))
  KERNEL (fused_convolution_eltwise_gpu_imad)(
+#if INPUT0_LAYOUT_B_FS_YX_FSV16
+    const __global INPUT0_TYPE* conv_input,
+#else
      const __global PACKED_TYPE   *conv_input,
-    __global OUTPUT_TYPE         *output,
+#endif
+    __global OUTPUT_TYPE         *restrict output,
      const __global int           *weights,
  #if BIAS_TERM
      const __global BIAS_TYPE     *biases,
@@ -69,48 +74,64 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
  {
      const uint oc = (uint)get_global_id(0) * OUT_BLOCK_WIDTH;  // oc = Output Column
      const uint or = (uint)get_global_id(1) * OUT_BLOCK_HEIGHT; // or = Output Row
-    const uint fm = get_global_id(2);                    // fm = Feature Map = od = Output Depth, SIMD is across this dimension, WG is 1x1x16
+    const uint fm = get_global_id(2);                          // fm = Feature Map = od = Output Depth, SIMD is across this dimension, WG is 1x1x16
      const uint fmg = get_group_id(2);
      const uint lid = get_local_id(2);
-    const uint batch = fm / _OD;
-    const uint f = fm % _OD;
+    const uint batch = fm / (ALIGN(FILTER_OFM_NUM, SIMD_SIZE) * FILTER_GROUPS_NUM);
+#if GROUPED
+    const uint g = (fm / ALIGN(FILTER_OFM_NUM, SIMD_SIZE) % FILTER_GROUPS_NUM);
+    const uint ofmg = fmg % CEIL_DIV(FILTER_OFM_NUM, SIMD_SIZE);
+#else
+    const uint g = 0;
+    const uint ofmg = (fmg % (_OD  / SIMD_SIZE));
+#endif
+    const uint f = fm % ALIGN(FILTER_OFM_NUM, SIMD_SIZE) + g * FILTER_OFM_NUM;
+    const uint sglid = get_sub_group_local_id();
+
+    const int input_x = oc * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = or * STRIDE_SIZE_Y - PADDING_SIZE_Y;
  
      PACKED_TYPE in[IN_BLOCK_HEIGHT];
      ACCUMULATOR_TYPE out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT] = { 0 };  // this is the 32 bit signed accumulator that must be converted to 8 bits before final write.
  
-    #define NUM_FILTERS (K_HEIGHT * K_WIDTH)
+    #define NUM_FILTERS (FILTER_SIZE_Y * FILTER_SIZE_X)
      int w[NUM_FILTERS];
-
      int in_addr;
  
  #ifdef BLOCK_LOAD_WEIGHTS
-    int weight_addr = (fmg % ((_OD + SIMD_SIZE - 1) / SIMD_SIZE)) * ((_ID * K_HEIGHT * K_WIDTH * SIMD_SIZE) / PACK);
+    int weight_addr = (ofmg * CEIL_DIV(FILTER_IFM_NUM, PACK) * FILTER_SIZE_Y * FILTER_SIZE_X * SIMD_SIZE) + (g * FILTER_GROUPS_PITCH / 4);
  #else
-    int weight_addr = (fmg % ((_OD + SIMD_SIZE - 1) / SIMD_SIZE)) * ((_ID * K_HEIGHT * K_WIDTH * SIMD_SIZE) / PACK) + lid;
+    int weight_addr = (ofmg * CEIL_DIV(FILTER_IFM_NUM, PACK) * FILTER_SIZE_Y * FILTER_SIZE_X * SIMD_SIZE) + (g * FILTER_GROUPS_PITCH / 4) + sglid;
  #endif
-
-    uint input_size = (_ID * (_IH + IHPAD) * (_IW + IWPAD)) / PACK; // dividing by PACK to get right number of 32bit entities.
+    uint input_size = (_ID * (INPUT0_SIZE_Y + IHPAD) * (INPUT0_SIZE_X + IWPAD)) / PACK; // dividing by PACK to get right number of 32bit entities.
  
      // For imad we do 4X less input feature map iterations since we are packing 4 of them in each uchar4.
-    // _ID provided by host is multiple of packing factor.
      __attribute__((opencl_unroll_hint(1)))
-    for(int kd = 0; kd < (_ID / PACK); kd++)
+    for(int kd = 0; kd < CEIL_DIV(FILTER_IFM_NUM, PACK); kd++)
      {
-
-#ifdef BLOCK_LOAD_INPUTS
-        in_addr = INPUT0_OFFSET + kd*INPUT0_FEATURE_PITCH + (or * K_STRIDE - PADDING_SIZE_Y)*INPUT0_Y_PITCH + (oc * K_STRIDE - PADDING_SIZE_X);
+#if INPUT0_LAYOUT_B_FS_YX_FSV16
+        in_addr = INPUT0_GET_INDEX(batch, (kd + g * CEIL_DIV(FILTER_IFM_NUM, PACK)) * PACK, input_y, input_x + sglid);
  #else
-        in_addr = INPUT0_OFFSET + kd*INPUT0_FEATURE_PITCH + (or * K_STRIDE - PADDING_SIZE_Y)*INPUT0_Y_PITCH + (oc * K_STRIDE - PADDING_SIZE_X) + lid;
-#endif
+    #ifdef BLOCK_LOAD_INPUTS
+        in_addr = INPUT0_OFFSET + (kd + g * CEIL_DIV(FILTER_IFM_NUM, PACK)) * INPUT0_FEATURE_PITCH + input_y * INPUT0_Y_PITCH + input_x;
+    #else
+        in_addr = INPUT0_OFFSET + (kd + g * CEIL_DIV(FILTER_IFM_NUM, PACK)) * INPUT0_FEATURE_PITCH + input_y * INPUT0_Y_PITCH + input_x + sglid;
+    #endif
          in_addr += batch * input_size;  // adjust for batching
-
+#endif
          for(uint reg = 0; reg < IN_BLOCK_HEIGHT; reg++) {
-#ifdef BLOCK_LOAD_INPUTS
-            in[reg] = AS_PACKED_TYPE(intel_sub_group_block_read(&conv_input[in_addr]));
+#if INPUT0_LAYOUT_B_FS_YX_FSV16
+            in[reg] = *(__global PACKED_TYPE*)(conv_input + in_addr);
+            in_addr += (INPUT0_SIZE_X + IWPAD) * 16;
  #else
+    #ifdef BLOCK_LOAD_INPUTS
+            in[reg] = AS_PACKED_TYPE(intel_sub_group_block_read(&conv_input[in_addr]));
+    #else
              in[reg] = AS_PACKED_TYPE(conv_input[in_addr]);// read SIMD_SIZE elements wide
+    #endif
+            // TODO This will cause errors for byxf_af32 format on input
+            in_addr += (INPUT0_SIZE_X + IWPAD);  // move to next row down
  #endif
-            in_addr += (_IW + IWPAD);  // move to next row down
          }
  
  #ifdef BLOCK_LOAD_WEIGHTS
@@ -126,17 +147,19 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
  
          int wi = 0;
          // This loop is temporarily not unrolled because the unroll causes TeamCity hangs.
-        //__attribute__((opencl_unroll_hint(K_HEIGHT)))
-        for (int kr = 0; kr < K_HEIGHT; ++kr) // kr = Kernel Row
+        //__attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+        for (int kr = 0; kr < FILTER_SIZE_Y; ++kr) // kr = Kernel Row
          {
-            __attribute__((opencl_unroll_hint(K_WIDTH)))
-            for (int kc = 0; kc < K_WIDTH; ++kc) // kc = Kernel Column
+            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+            for (int kc = 0; kc < FILTER_SIZE_X; ++kc) // kc = Kernel Column
              {
+                __attribute__((opencl_unroll_hint))
                  for (int br = 0; br < OUT_BLOCK_HEIGHT; br++) {
+                    __attribute__((opencl_unroll_hint))
                      for (int bc = 0; bc < OUT_BLOCK_WIDTH; bc++) {
-                        PACKED_TYPE input = sub_group_broadcast(in[br * K_HSTRIDE + kr], bc * K_WSTRIDE + kc);
+                        PACKED_TYPE input = sub_group_broadcast(in[br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y], bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X);
  
-                        out[br * OUT_BLOCK_WIDTH + bc] = TO_ACCUMULATOR_TYPE(IMAD(out[br * OUT_BLOCK_WIDTH + bc], AS_INPUT0_TYPE_4(input), as_char4(w[wi])));
+                        out[br * OUT_BLOCK_WIDTH + bc] = TO_ACCUMULATOR_TYPE(IMAD(out[br * OUT_BLOCK_WIDTH + bc], AS_INPUT0_TYPE_4(input), AS_FILTER_TYPE_4(w[wi])));
                      }
                  }
                  wi++;
@@ -148,7 +171,7 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
      // to calculate out_idx and eltw_idx. Calculate offsets with GET_DATA_B_FS_YX_FSV4_INDEX before
      // entering the loop, and have a simple expressions for indexes inside the loop.
      const uint output_idx_offset = GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, batch, f, or, oc);
-    const uint output_row_size_bytes = (_OW + OWPAD) * PACK;
+    const uint output_row_size_bytes = (OUTPUT_SIZE_X + OWPAD) * PACK;
  
  #if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD
      FUSED_OPS_PRELOAD;
@@ -156,14 +179,14 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
  
      for (int r = 0; r < OUT_BLOCK_HEIGHT; r++)
      {
-        #if NEED_TO_VERIFY_OUTPUT_RANGES == 1
+        #if OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT != 0
          const bool zero_r = or + r >= OUTPUT_SIZE_Y;
          if(!zero_r)
          #endif
          {
          for (int c = 0; c < OUT_BLOCK_WIDTH; c++)
          {
-            #if NEED_TO_VERIFY_OUTPUT_RANGES == 1
+            #if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0
              const bool zero_c = oc + c >= OUTPUT_SIZE_X;
              if(!zero_c)
              #endif
@@ -172,6 +195,8 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
                  uint out_idx = OUTPUT_GET_INDEX(batch, f, or + r, oc + c);
              #elif OUTPUT_LAYOUT_B_FS_YX_FSV4 == 1
                  uint out_idx = output_idx_offset + r * output_row_size_bytes + (c*PACK);
+            #elif OUTPUT_LAYOUT_B_FS_YX_FSV16 == 1
+                uint out_idx = OUTPUT_GET_INDEX(batch, f, or + r, oc + c);
              #else
                  #error "Incorrect output layout"
              #endif
@@ -188,16 +213,21 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
                  ACTIVATION_TYPE res = TO_ACTIVATION_TYPE(dotProd);
  #endif
  
+                OUTPUT_TYPE final_result;
  #if HAS_FUSED_OPS
      #if FUSED_OPS_CAN_USE_PRELOAD
                  FUSED_OPS_CALC;
      #else
                  FUSED_OPS;
      #endif
-                output[out_idx] = FUSED_OPS_RESULT;
+                final_result = FUSED_OPS_RESULT;
  #else
-                output[out_idx] = TO_OUTPUT_TYPE(res);
+                final_result = TO_OUTPUT_TYPE(res);
+#endif
+#if FILTER_OFM_NUM % SIMD_SIZE != 0
+                if (fmg % CEIL_DIV(FILTER_OFM_NUM, SIMD_SIZE) != CEIL_DIV(FILTER_OFM_NUM, SIMD_SIZE) - 1 || sglid < FILTER_OFM_NUM % SIMD_SIZE)
  #endif
+                    output[out_idx] = final_result;
              }// if(!zero_c)
          } // for (int c = 0; c < OUT_BLOCK_WIDTH; c++)
          }// if(!zero_r)
@@ -209,12 +239,13 @@ KERNEL (fused_convolution_eltwise_gpu_imad)(
  #endif
  
  #undef BLOCK_LOAD_INPUTS
-#undef K_WSTRIDE
-#undef K_HSTRIDE
  #undef IN_BLOCK_WIDTH
  #undef IN_BLOCK_HEIGHT
  #undef PACK
  #undef AS_TYPE_N_
  #undef AS_TYPE_N
  #undef AS_INPUT0_TYPE_4
+#undef AS_FILTER_TYPE_4
  #undef NUM_FILTERS
+#undef CEIL_DIV
+#undef ALIGN
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl

index 8d35591..eb8032f 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl
@@ -14,6 +14,8 @@
  // limitations under the License.
  */
  
+#include "mmad.cl"
+
  // TODO: currently we calculate on float32 because it's lot of "add" operation and it stuck on the value "8192.0f"
  #if !defined(ACCUMULATOR_TYPE)
      #define ACCUMULATOR_TYPE float
@@ -22,4 +24,140 @@
  #endif
  
  // Creates vector type.
-#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
-\ No newline at end of file
+#define MAKE_VECTOR_TYPE_IMPL_1(elem_type)  elem_type
+#define MAKE_VECTOR_TYPE_IMPL_2(elem_type)  CAT(elem_type, 2)
+#define MAKE_VECTOR_TYPE_IMPL_3(elem_type)  CAT(elem_type, 3)
+#define MAKE_VECTOR_TYPE_IMPL_4(elem_type)  CAT(elem_type, 4)
+#define MAKE_VECTOR_TYPE_IMPL_8(elem_type)  CAT(elem_type, 8)
+#define MAKE_VECTOR_TYPE_IMPL_16(elem_type) CAT(elem_type, 16)
+#define MAKE_VECTOR_TYPE(elem_type, size)   CAT(MAKE_VECTOR_TYPE_IMPL_, size)(elem_type)
+
+#define AS_TYPE(type, val) CAT(as_, type)(val)
+
+// ====================================================================================================================
+// TYPE_SIZE(type) - evaluates to size of "type" in bytes
+// type [PP] - Must evaluate to non-vectorized type.
+// ====================================================================================================================
+#define TYPE_SIZE_uchar  1
+#define TYPE_SIZE_char   1
+#define TYPE_SIZE_ushort 2
+#define TYPE_SIZE_short  2
+#define TYPE_SIZE_half   2
+#define TYPE_SIZE_int    4
+#define TYPE_SIZE_uint   4
+#define TYPE_SIZE_float  4
+#define TYPE_SIZE(type) CAT(TYPE_SIZE_, type)
+
+// ====================================================================================================================
+// BLOCK_READN(type, vector_size, ptr, offset)
+//    - evaluates to intel_sub_group_block_read operation for specified "type" and "vector size", reading
+//      "vector_size" elements from memory starting at "ptr" + "offset"
+// BLOCK_WRITEN(type, vector_size, ptr, offset, val)
+//    - evaluates to intel_sub_group_block_write operation for specified "type" and "vector size", writing
+//      "vector_size"-element vector "val" to memory starting at "ptr" + "offset"
+//  For more details and description of intel_sub_group_block_read/write functions please,
+//  refer to cl_intel_subgroups extension documentation.
+//
+// type        [PP] - Must evaluate to non-vectorized type, ex. float, half, char, etc..
+// vector_size [PP] - Number of elements to read/write, ex 2 for intel_sub_group_block_read2.
+// ptr              - Pointer to global memory where to read from/write to.
+// offset           - Additional offset added to ptr in "type" elements, equivalent to passing ((ptr) + (offset)) as "ptr".
+// val              - For write function vector of "vector_size" of "type" elements (or scalar) to write.
+//
+// ====================================================================================================================
+// Pre-defined commonly used definitions:
+//   DT_<tensor>_BLOCK_READ<n>(ptr, offset)
+//   DT_<tensor>_BLOCK_WRITE<n>(ptr, offset, offset)
+// Where:
+//    <tensor> is one of: INPUT - referencing type jitted as INPUT0,
+//                        OUTPUT,
+//                        BIAS,
+//                        FILTER
+//    <n> is a vector size, one of {2,4,8,16} or none, meaning the output will be a scalar
+// 
+// ====================================================================================================================
+#define BLOCK_RW_TYPE_size1 uchar
+#define BLOCK_RW_TYPE_size2 ushort
+#define BLOCK_RW_TYPE_size4 uint
+#define BLOCK_RW_TYPE(type_size) CAT(BLOCK_RW_TYPE_size, type_size)
+
+#define BLOCK_READ_FUNC_size2       intel_sub_group_block_read_us
+#define BLOCK_READ_FUNC_size4       intel_sub_group_block_read
+#define BLOCK_READ_FUNC(type_size)  CAT(BLOCK_READ_FUNC_size, type_size)
+
+#define BLOCK_WRITE_FUNC_size2       intel_sub_group_block_write_us
+#define BLOCK_WRITE_FUNC_size4       intel_sub_group_block_write
+#define BLOCK_WRITE_FUNC(type_size)  CAT(BLOCK_WRITE_FUNC_size, type_size)
+
+#define BLOCK_READN_FUNC_size1(vector_size)                 CAT(BLOCK_READ_UC_, vector_size)
+#define BLOCK_READN_FUNC_SIZE_DEF(type_size, vector_size)   MAKE_VECTOR_TYPE(BLOCK_READ_FUNC(type_size), vector_size)
+#define BLOCK_READN_FUNC_size2(vector_size)                 BLOCK_READN_FUNC_SIZE_DEF(2, vector_size)
+#define BLOCK_READN_FUNC_size4(vector_size)                 BLOCK_READN_FUNC_SIZE_DEF(4, vector_size)
+#define BLOCK_READN_FUNC(type_size, vector_size)            CAT(BLOCK_READN_FUNC_size, type_size)(vector_size)
+
+#define BLOCK_WRITEN_FUNC_size1(vector_size)                CAT(BLOCK_WRITE_UC_, vector_size)
+#define BLOCK_WRITEN_FUNC_SIZE_DEF(type_size, vector_size)  MAKE_VECTOR_TYPE(BLOCK_WRITE_FUNC(type_size), vector_size)
+#define BLOCK_WRITEN_FUNC_size2(vector_size)                BLOCK_WRITEN_FUNC_SIZE_DEF(2, vector_size)
+#define BLOCK_WRITEN_FUNC_size4(vector_size)                BLOCK_WRITEN_FUNC_SIZE_DEF(4, vector_size)
+#define BLOCK_WRITEN_FUNC(type_size, vector_size)           CAT(BLOCK_WRITEN_FUNC_size, type_size)(vector_size)
+
+#define BLOCK_READN_RAW(type_size, vector_size, ptr, offset)                                                    \
+    BLOCK_READN_FUNC(type_size, vector_size)((const __global BLOCK_RW_TYPE(type_size)*)(ptr) + (offset))
+#define BLOCK_WRITEN_RAW(type_size, vector_size, ptr, offset, val)                                              \
+    BLOCK_WRITEN_FUNC(type_size, vector_size)(                                                                  \
+        (__global BLOCK_RW_TYPE(type_size)*)(ptr) + (offset),                                                   \
+        AS_TYPE(MAKE_VECTOR_TYPE(BLOCK_RW_TYPE(type_size), vector_size), val))
+
+#define BLOCK_READN(type, vector_size, ptr, offset)                                                             \
+    AS_TYPE(MAKE_VECTOR_TYPE(type, vector_size), BLOCK_READN_RAW(TYPE_SIZE(type), vector_size, ptr, offset))
+#define BLOCK_WRITEN(type, vector_size, ptr, offset, val)                                                       \
+    BLOCK_WRITEN_RAW(TYPE_SIZE(type), vector_size, ptr, offset, val)
+
+#define DT_INPUT_BLOCK_READ(ptr, offset)            BLOCK_READN(INPUT0_TYPE, 1, ptr, offset)
+#define DT_INPUT_BLOCK_READ2(ptr, offset)           BLOCK_READN(INPUT0_TYPE, 2, ptr, offset)
+#define DT_INPUT_BLOCK_READ4(ptr, offset)           BLOCK_READN(INPUT0_TYPE, 4, ptr, offset)
+#define DT_INPUT_BLOCK_READ8(ptr, offset)           BLOCK_READN(INPUT0_TYPE, 8, ptr, offset)
+#define DT_INPUT_BLOCK_READ16(ptr, offset)          BLOCK_READN(INPUT0_TYPE, 16, ptr, offset)
+
+#define DT_INPUT_BLOCK_WRITE(ptr, offset, val)      BLOCK_WRITEN(INPUT0_TYPE, 1, ptr, offset, val)
+#define DT_INPUT_BLOCK_WRITE2(ptr, offset, val)     BLOCK_WRITEN(INPUT0_TYPE, 2, ptr, offset, val)
+#define DT_INPUT_BLOCK_WRITE4(ptr, offset, val)     BLOCK_WRITEN(INPUT0_TYPE, 4, ptr, offset, val)
+#define DT_INPUT_BLOCK_WRITE8(ptr, offset, val)     BLOCK_WRITEN(INPUT0_TYPE, 8, ptr, offset, val)
+#define DT_INPUT_BLOCK_WRITE16(ptr, offset, val)    BLOCK_WRITEN(INPUT0_TYPE, 16, ptr, offset, val)
+
+#define DT_OUTPUT_BLOCK_READ(ptr, offset)           BLOCK_READN(OUTPUT_TYPE, 1, ptr, offset)
+#define DT_OUTPUT_BLOCK_READ2(ptr, offset)          BLOCK_READN(OUTPUT_TYPE, 2, ptr, offset)
+#define DT_OUTPUT_BLOCK_READ4(ptr, offset)          BLOCK_READN(OUTPUT_TYPE, 4, ptr, offset)
+#define DT_OUTPUT_BLOCK_READ8(ptr, offset)          BLOCK_READN(OUTPUT_TYPE, 8, ptr, offset)
+#define DT_OUTPUT_BLOCK_READ16(ptr, offset)         BLOCK_READN(OUTPUT_TYPE, 16, ptr, offset)
+
+#define DT_OUTPUT_BLOCK_WRITE(ptr, offset, val)     BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, val)
+#define DT_OUTPUT_BLOCK_WRITE2(ptr, offset, val)    BLOCK_WRITEN(OUTPUT_TYPE, 2, ptr, offset, val)
+#define DT_OUTPUT_BLOCK_WRITE4(ptr, offset, val)    BLOCK_WRITEN(OUTPUT_TYPE, 4, ptr, offset, val)
+#define DT_OUTPUT_BLOCK_WRITE8(ptr, offset, val)    BLOCK_WRITEN(OUTPUT_TYPE, 8, ptr, offset, val)
+#define DT_OUTPUT_BLOCK_WRITE16(ptr, offset, val)   BLOCK_WRITEN(OUTPUT_TYPE, 16, ptr, offset, val)
+
+#define DT_BIAS_BLOCK_READ(ptr, offset)             BLOCK_READN(BIAS_TYPE, 1, ptr, offset)
+#define DT_BIAS_BLOCK_READ2(ptr, offset)            BLOCK_READN(BIAS_TYPE, 2, ptr, offset)
+#define DT_BIAS_BLOCK_READ4(ptr, offset)            BLOCK_READN(BIAS_TYPE, 4, ptr, offset)
+#define DT_BIAS_BLOCK_READ8(ptr, offset)            BLOCK_READN(BIAS_TYPE, 8, ptr, offset)
+#define DT_BIAS_BLOCK_READ16(ptr, offset)           BLOCK_READN(BIAS_TYPE, 16, ptr, offset)
+
+#define DT_BIAS_BLOCK_WRITE(ptr, offset, val)       BLOCK_WRITEN(BIAS_TYPE, 1, ptr, offset, val)
+#define DT_BIAS_BLOCK_WRITE2(ptr, offset, val)      BLOCK_WRITEN(BIAS_TYPE, 2, ptr, offset, val)
+#define DT_BIAS_BLOCK_WRITE4(ptr, offset, val)      BLOCK_WRITEN(BIAS_TYPE, 4, ptr, offset, val)
+#define DT_BIAS_BLOCK_WRITE8(ptr, offset, val)      BLOCK_WRITEN(BIAS_TYPE, 8, ptr, offset, val)
+#define DT_BIAS_BLOCK_WRITE16(ptr, offset, val)     BLOCK_WRITEN(BIAS_TYPE, 16, ptr, offset, val)
+
+#define DT_FILTER_BLOCK_READ(ptr, offset)           BLOCK_READN(FILTER_TYPE, 1, ptr, offset)
+#define DT_FILTER_BLOCK_READ2(ptr, offset)          BLOCK_READN(FILTER_TYPE, 2, ptr, offset)
+#define DT_FILTER_BLOCK_READ4(ptr, offset)          BLOCK_READN(FILTER_TYPE, 4, ptr, offset)
+#define DT_FILTER_BLOCK_READ8(ptr, offset)          BLOCK_READN(FILTER_TYPE, 8, ptr, offset)
+#define DT_FILTER_BLOCK_READ16(ptr, offset)         BLOCK_READN(FILTER_TYPE, 16, ptr, offset)
+
+#define DT_FILTER_BLOCK_WRITE(ptr, offset, val)     BLOCK_WRITEN(FILTER_TYPE, 1, ptr, offset, val)
+#define DT_FILTER_BLOCK_WRITE2(ptr, offset, val)    BLOCK_WRITEN(FILTER_TYPE, 2, ptr, offset, val)
+#define DT_FILTER_BLOCK_WRITE4(ptr, offset, val)    BLOCK_WRITEN(FILTER_TYPE, 4, ptr, offset, val)
+#define DT_FILTER_BLOCK_WRITE8(ptr, offset, val)    BLOCK_WRITEN(FILTER_TYPE, 8, ptr, offset, val)
+#define DT_FILTER_BLOCK_WRITE16(ptr, offset, val)   BLOCK_WRITEN(FILTER_TYPE, 16, ptr, offset, val)
+// ====================================================================================================================
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl

index 41a8853..99ac419 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
@@ -284,6 +284,32 @@ inline uint FUNC(get_b_fs_yx_fsv_index_safe)(uint b, uint f, uint y, uint x,
          CAT(prefix, _OFFSET)                                                              \
      )
  
+#define GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(prefix, o, i, y, x) \
+    FUNC_CALL(get_os_is_yx_osv16_isv16_index)(                    \
+        o, i, y, x,                                               \
+        CAT(prefix, _SIZE_X),                                     \
+        CAT(prefix, _SIZE_Y),                                     \
+        CAT(prefix, _IFM_NUM),                                    \
+        CAT(prefix, _OFM_NUM))
+
+inline uint FUNC(get_os_is_yx_osv16_isv16_index)(uint o, uint i, uint y, uint x,
+    uint x_size, uint y_size, uint i_size, uint o_size)
+{
+    const uint isv = i % 16;
+    const uint osv = o % 16;
+    const uint is = i / 16;
+    const uint os = o / 16;
+
+    const uint x_pitch = 16 * 16;
+    const uint y_pitch = x_pitch * x_size;
+    const uint is_pitch = y_pitch * y_size;
+    const uint os_pitch = is_pitch * ((i_size + 16 - 1) / 16);
+
+    const uint output_offset = isv + osv * 16 + x * x_pitch + y * y_pitch + is * is_pitch + os * os_pitch;
+
+    return output_offset;
+}
+
  #define GET_FILTER_G_OS_IS_YX_ISV8_OSV16_ISV2_INDEX(prefix, g, o, i, y, x, sub_group_size) \
      FUNC_CALL(get_os_is_zyx_isv8_osv16_isv2_index)(                                        \
          g, o, i, 0, y, x,                                                                  \
@@ -847,6 +873,45 @@ inline uint FUNC(get_b_fs_yx_fsv4)(uint o, uint i, uint y, uint x,
      return idx;
  }
  
+#define GET_FILTER_G_OS_IS_YX_OSV16_ISV4_INDEX(prefix, g, o, i, y, x) \
+    FUNC_CALL(get_g_os_is_yx_osv16_isv4)(                         \
+        g, o, i, y, x,                                            \
+        CAT(prefix, _IFM_PITCH),                                 \
+        CAT(prefix, _OFM_PITCH),                                 \
+        CAT(prefix, _SIZE_X),                                    \
+        CAT(prefix, _SIZE_Y),                                    \
+        CAT(prefix, _OFM_NUM),                                   \
+        CAT(prefix, _IFM_NUM))
+
+inline uint FUNC(get_g_os_is_yx_osv16_isv4)(uint g, uint o, uint i, uint y, uint x,
+                                          uint i_size,
+                                          uint o_size,
+                                          uint x_size,
+                                          uint y_size,
+                                          uint o_num,
+                                          uint i_num)
+{
+    const uint otd = 16;
+    uint out_depth_tile = o / otd;
+    uint od             = o - out_depth_tile * otd;
+    uint output_slice_size = (o_num + otd - 1) / otd;
+
+    const uint tile = 4;
+    uint id_tile = i / tile;
+    uint id      = i - id_tile * tile;
+    uint input_slice_size = (i_num + tile - 1) / tile;
+
+    uint idx = g * output_slice_size * input_slice_size * y_size * x_size * otd * tile
+                                       + out_depth_tile * (o_size / tile) * otd * tile
+                                       + id_tile                 * i_size * otd * tile
+                                       + y                       * x_size * otd * tile
+                                       + x                                * otd * tile
+                                       + od                                     * tile
+                                       + id;
+
+    return idx;
+}
+
  #define GET_FILTER_OS_IS_YX_OSV16_ISV4_INDEX(prefix, o, i, y, x) \
      FUNC_CALL(get_os_is_yx_osv16_isv4)(                          \
          o, i, y, x,                                              \
@@ -1239,9 +1304,8 @@ inline uint FUNC(get_os_is_osv32_isv32_swizzled_by_4_index)(uint o, uint i, uint
          CAT(prefix, _OFM_NUM),\
          CAT(prefix, _OFFSET))
  
-inline uint FUNC(get_os_i_yxs_osv4_yxsv4_index)(uint o, uint i, uint y, uint x, uint i_size, uint size_x, uint size_y) {
+inline uint FUNC(get_os_i_yxs_osv_yxsv4_index)(uint o, uint i, uint y, uint x, uint i_size, uint size_x, uint size_y, uint osv) {
      const uint yxsv = 4;
-    const uint osv = 4;
      uint yx = y * size_x + x;
      uint yx_size_aligned = (size_x * size_y + yxsv - 1) / yxsv * yxsv;
      uint os_index = o / osv;
@@ -1259,11 +1323,12 @@ inline uint FUNC(get_os_i_yxs_osv4_yxsv4_index)(uint o, uint i, uint y, uint x,
  }
  
  #define GET_FILTER_OS_I_YXS_OSV4_YXSV4_INDEX(prefix, o, i, y, x)    \
-    FUNC_CALL(get_os_i_yxs_osv4_yxsv4_index)(                       \
+    FUNC_CALL(get_os_i_yxs_osv_yxsv4_index)(                        \
          o, i, y, x,                                                 \
          CAT(prefix, _IFM_NUM),                                      \
          CAT(prefix, _SIZE_X),                                       \
-        CAT(prefix, _SIZE_Y))
+        CAT(prefix, _SIZE_Y),                                       \
+        4)
  
  #define GET_FILTER_OS_IYX_OSV32__AI32_INDEX(prefix, o, i, y, x, sub_group_size) \
      CAT(prefix, _OFFSET) +                                                      \
@@ -1380,9 +1445,8 @@ inline uint FUNC(get_os_i_yxs_osv4_yxsv4_index)(uint o, uint i, uint y, uint x,
          CAT(prefix, _OFFSET),                                                       \
          sub_group_size)
  
-inline uint FUNC(get_gs_oi_yxs_gsv4_yxsv4_index)(uint g, uint o, uint i, uint y, uint x, uint o_size, uint i_size, uint size_x, uint size_y) {
+inline uint FUNC(get_gs_oi_yxs_gsv_yxsv4_index)(uint g, uint o, uint i, uint y, uint x, uint o_size, uint i_size, uint size_x, uint size_y, const uint gsv) {
      const uint yxsv = 4;
-    const uint gsv = 4;
      uint yx = y * size_x + x;
      uint yx_size_aligned = (size_x * size_y + yxsv - 1) / yxsv * yxsv;
      uint gs_index = g / gsv;
@@ -1400,12 +1464,31 @@ inline uint FUNC(get_gs_oi_yxs_gsv4_yxsv4_index)(uint g, uint o, uint i, uint y,
  }
  
  #define GET_FILTER_GS_OI_YXS_GSV4_YXSV4_INDEX(prefix, g, o, i, y, x) \
-    FUNC_CALL(get_gs_oi_yxs_gsv4_yxsv4_index)(                       \
+    FUNC_CALL(get_gs_oi_yxs_gsv_yxsv4_index)(                        \
          g, o, i, y, x,                                               \
          CAT(prefix, _OFM_NUM),                                       \
          CAT(prefix, _IFM_NUM),                                       \
          CAT(prefix, _SIZE_X),                                        \
-        CAT(prefix, _SIZE_Y))
+        CAT(prefix, _SIZE_Y),                                        \
+        4)
+
+#define GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(prefix, g, o, i, y, x) \
+    FUNC_CALL(get_gs_oi_yxs_gsv_yxsv4_index)(                         \
+        g, o, i, y, x,                                                \
+        CAT(prefix, _OFM_NUM),                                        \
+        CAT(prefix, _IFM_NUM),                                        \
+        CAT(prefix, _SIZE_X),                                         \
+        CAT(prefix, _SIZE_Y),                                         \
+        16)
+
+#define GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(prefix, g, o, i, y, x) \
+    FUNC_CALL(get_gs_oi_yxs_gsv_yxsv4_index)(                         \
+        g, o, i, y, x,                                                \
+        CAT(prefix, _OFM_NUM),                                        \
+        CAT(prefix, _IFM_NUM),                                        \
+        CAT(prefix, _SIZE_X),                                         \
+        CAT(prefix, _SIZE_Y),                                         \
+        32)
  
  #define GET_FILTER_G_OS_IS_YX_ISV16_OSV16_INDEX(prefix, g, o, i, y, x, sub_group_size) \
      CAT(prefix, _OFFSET) +                                                             \
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl

index 2b1f501..80fab34 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl
@@ -177,6 +177,64 @@ inline int8 FUNC(mmad8x8)(int8 A_vectors, int8 B_vectors, int8 acc) __attribute_
  }
  
  // TODO: remove it when cl_intel_subgroups_char extension will work
+inline void FUNC(sub_group_block_write_uchar16)(__global uchar* outPtr, uchar16 v)
+{
+#ifdef cl_intel_subgroups_char
+    intel_sub_group_block_write_uc16(outPtr, v);
+#else
+    uint idx = get_sub_group_local_id();
+
+    outPtr[idx] = v.s0; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s1; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s2; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s3; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s4; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s5; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s6; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s7; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s8; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s9; idx += get_max_sub_group_size();
+    outPtr[idx] = v.sa; idx += get_max_sub_group_size();
+    outPtr[idx] = v.sb; idx += get_max_sub_group_size();
+    outPtr[idx] = v.sc; idx += get_max_sub_group_size();
+    outPtr[idx] = v.sd; idx += get_max_sub_group_size();
+    outPtr[idx] = v.se; idx += get_max_sub_group_size();
+    outPtr[idx] = v.sf; idx += get_max_sub_group_size();
+#endif
+}
+
+inline uchar16 FUNC(sub_group_block_read_uchar16)(const __global uchar* ptr)
+{
+#ifdef cl_intel_subgroups_char
+    // WA for compiler support
+    // return intel_sub_group_block_read_uc16(ptr);
+    return (uchar16)(intel_sub_group_block_read_uc8(ptr), intel_sub_group_block_read_uc8(ptr + 8 * get_max_sub_group_size()));
+#else
+    uint idx = get_sub_group_local_id();
+
+    uchar16 ret;
+
+    ret.s0 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s1 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s2 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s3 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s4 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s5 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s6 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s7 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s8 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s9 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.sa = ptr[idx]; idx += get_max_sub_group_size();
+    ret.sb = ptr[idx]; idx += get_max_sub_group_size();
+    ret.sc = ptr[idx]; idx += get_max_sub_group_size();
+    ret.sd = ptr[idx]; idx += get_max_sub_group_size();
+    ret.se = ptr[idx]; idx += get_max_sub_group_size();
+    ret.sf = ptr[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+#endif
+}
+
  inline void FUNC(sub_group_block_write_uchar8)(__global uchar* outPtr, uchar8 v)
  {
  #ifdef cl_intel_subgroups_char
@@ -184,7 +242,7 @@ inline void FUNC(sub_group_block_write_uchar8)(__global uchar* outPtr, uchar8 v)
  #else
      uint idx = get_sub_group_local_id();
  
-       outPtr[idx] = v.s0; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s0; idx += get_max_sub_group_size();
      outPtr[idx] = v.s1; idx += get_max_sub_group_size();
      outPtr[idx] = v.s2; idx += get_max_sub_group_size();
      outPtr[idx] = v.s3; idx += get_max_sub_group_size();
@@ -214,7 +272,92 @@ inline uchar8 FUNC(sub_group_block_read_uchar8)(const __global uchar* ptr)
      ret.s7 = ptr[idx]; idx += get_max_sub_group_size();
  
      return ret;
+#endif
+}
+
+inline void FUNC(sub_group_block_write_uchar4)(__global uchar* outPtr, uchar4 v)
+{
+#ifdef cl_intel_subgroups_char
+    intel_sub_group_block_write_uc4(outPtr, v);
+#else
+    uint idx = get_sub_group_local_id();
+
+    outPtr[idx] = v.s0; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s1; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s2; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s3; idx += get_max_sub_group_size();
+#endif
+}
+
+inline uchar4 FUNC(sub_group_block_read_uchar4)(const __global uchar* ptr)
+{
+#ifdef cl_intel_subgroups_char
+    return intel_sub_group_block_read_uc4(ptr);
+#else
+    uint idx = get_sub_group_local_id();
+
+    uchar4 ret;
+
+    ret.s0 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s1 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s2 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s3 = ptr[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+#endif
+}
+
+inline void FUNC(sub_group_block_write_uchar2)(__global uchar* outPtr, uchar2 v)
+{
+#ifdef cl_intel_subgroups_char
+    intel_sub_group_block_write_uc2(outPtr, v);
+#else
+    uint idx = get_sub_group_local_id();
  
+    outPtr[idx] = v.s0; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s1; idx += get_max_sub_group_size();
+#endif
+}
+
+inline uchar2 FUNC(sub_group_block_read_uchar2)(const __global uchar* ptr)
+{
+#ifdef cl_intel_subgroups_char
+    return intel_sub_group_block_read_uc2(ptr);
+#else
+    uint idx = get_sub_group_local_id();
+
+    uchar2 ret;
+
+    ret.s0 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s1 = ptr[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+#endif
+}
+
+inline void FUNC(sub_group_block_write_uchar)(__global uchar* outPtr, uchar v)
+{
+#ifdef cl_intel_subgroups_char
+    intel_sub_group_block_write_uc(outPtr, v);
+#else
+    uint idx = get_sub_group_local_id();
+
+    outPtr[idx] = v;
+#endif
+}
+
+inline uchar FUNC(sub_group_block_read_uchar)(const __global uchar* ptr)
+{
+#ifdef cl_intel_subgroups_char
+    return intel_sub_group_block_read_uc(ptr);
+#else
+    uint idx = get_sub_group_local_id();
+
+    uchar ret;
+
+    ret = ptr[idx];
+
+    return ret;
  #endif
  }
  
@@ -227,3 +370,15 @@ inline uchar8 FUNC(sub_group_block_read_uchar8)(const __global uchar* ptr)
  #define SLM_BLOCK_WRITE_4(A, B) (FUNC_CALL(intel_sub_group_block_write_4)(A, B))
  #define SLM_BLOCK_READ_4(A) (FUNC_CALL(intel_sub_group_block_read_uint4)(A))
  #define SLM_BLOCK_READ_8(A) (FUNC_CALL(intel_sub_group_block_read_uint8)(A))
+
+#define BLOCK_READ_UC_1(ptr)  FUNC_CALL(sub_group_block_read_uchar)(ptr)
+#define BLOCK_READ_UC_2(ptr)  FUNC_CALL(sub_group_block_read_uchar2)(ptr)
+#define BLOCK_READ_UC_4(ptr)  FUNC_CALL(sub_group_block_read_uchar4)(ptr)
+#define BLOCK_READ_UC_8(ptr)  FUNC_CALL(sub_group_block_read_uchar8)(ptr)
+#define BLOCK_READ_UC_16(ptr) FUNC_CALL(sub_group_block_read_uchar16)(ptr)
+
+#define BLOCK_WRITE_UC_1(ptr, val)  FUNC_CALL(sub_group_block_write_uchar)(ptr, val)
+#define BLOCK_WRITE_UC_2(ptr, val)  FUNC_CALL(sub_group_block_write_uchar2)(ptr, val)
+#define BLOCK_WRITE_UC_4(ptr, val)  FUNC_CALL(sub_group_block_write_uchar4)(ptr, val)
+#define BLOCK_WRITE_UC_8(ptr, val)  FUNC_CALL(sub_group_block_write_uchar8)(ptr, val)
+#define BLOCK_WRITE_UC_16(ptr, val) FUNC_CALL(sub_group_block_write_uchar16)(ptr, val)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl

new file mode 100644 (file)

index 0000000..59e1f5f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl
@@ -0,0 +1,537 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/imad.cl"
+#include "include/data_types.cl"
+#include "include/common.cl"
+#include "include/mmad.cl"
+
+#include "mvn_gpu_b_fs_yx_fsv16_imad_accumulate.cl"
+#include "mvn_gpu_b_fs_yx_fsv16_imad_reduce.cl"
+
+// MVN - performs mean-variance normalization, that is normalizes the input data to have
+//       0 mean and if NORMALIZE_VARIANCE is set to have variance 1.
+//
+// Below is a set of 5 kernels:
+//   mvn_mean_1, mvn_mean_2, mvn_var_1, mvn_var_2, mvn_final
+// that can perform mvn operation in two modes.
+//
+// Basic mode:
+//   In this mode only mvn_final kernel is used. It performs required reductions for mean
+//   and variance in this single kernel using single work-group for slice of data-sets
+//   and reducing intermidiate values with local memory.
+//   It does not require any additional jit constants.
+//   lws:          LWS x 1 x 1
+//   gws:          LWS x feature x batch
+//
+// Parallel mode:
+//   In this mode all kernels are used to provide extra paralellism with global memory
+//   and host side synchronization with evets/in-order queue.
+//   To calculate mean:
+//   mvn_mean_1 kernel should be first enqueued, provided extra global memory on second input
+//     allowing to store intermidate results from all work-groups.
+//     To activate this kernel MVN_KERNEL_MEAN_1 must be defined and evaluate to true/1.
+//     lws:           LWS x 1 x 1
+//     gws:           LWS * ITEM_GROUPS x feature x batch
+//     This kernel will calculate partial results for each ITEM_GROUPS work-groups and store it into global memory.
+//
+//   mvn_mean_2 kernel must be next enqueued in order to further reduce previous results using single work-group.
+//     This kernel expects on first input the result of mvn_mean_1 and on second input global memory of size
+//     batch * align(feature, FSV) should be provided to store final mean values.
+//     It needs to be ensured that mvn_mean_1 kernel has finished and stored its partial results into memory.
+//     To activate this kernel MVN_KERNEL_MEAN_2 must be defined and evaluate to true/1.
+//     lws:          LWS x 1 x 1
+//     gws:          LWS x feature x batch
+//
+//  If required analogously the mvn_var_1 and mvn_var_2 kernels should be enqueud, additionally providing results from
+//  mvn_mean_2 kernel.
+//
+//  Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels (mvn_mean_2, mvn_var_2).
+//  To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be used.
+//  As at this stage there is no further need to synchronize and this kernel will perform simple normalization given known mean and inverse of variance.
+//  Due to this this kernel can be enqueued with full paralellization, not limiting it to single work-group.
+//     lws:          SIMD x 1 x 1
+//     gws:          (x * y) / SIMD * SIMD x feature x batch
+//
+// Required jit constants:
+// SIMD         - Sub-group/simd size.
+// LWS          - Local work-size along 0th dimension, must be multiple of SIMD.
+// GWS          - Global work-size along 0th dimension.
+//                In basic mode this must be equal to LWS.
+//                In parallel mode this must be equal to LWS * ITEM_GROUPS, except in mvn_final kernel where it has no restrictions.
+// ITEM_GROUPS  - Number of work-groups performing accumulation in parallel mode. Should be the same in both stages of parallel kernels.
+
+
+#define FSV                   16
+#define INPUT_SLICE_PITCH     16
+#define SG_NUM                (LWS / SIMD)
+
+#define INPUT_TYPE2           MAKE_VECTOR_TYPE(INPUT0_TYPE, 2)
+#define INPUT_TYPE4           MAKE_VECTOR_TYPE(INPUT0_TYPE, 4)
+#define INPUT_TYPE8           MAKE_VECTOR_TYPE(INPUT0_TYPE, 8)
+#define INPUT_PACKED_TYPE     MAKE_VECTOR_TYPE(INPUT0_TYPE, FSV)
+#define OUTPUT_PACKED_TYPE    MAKE_VECTOR_TYPE(OUTPUT_TYPE, FSV)
+#define MEAN_PACKED_TYPE      MAKE_VECTOR_TYPE(MEAN_TYPE, FSV)
+#define INT_PACKED_TYPE       MAKE_VECTOR_TYPE(int, FSV)
+
+#define TO_MEAN_PACKED_TYPE   CAT(convert_, MEAN_PACKED_TYPE)
+
+#define ITEMS_NUM             (OUTPUT_SIZE_X * OUTPUT_SIZE_Y)
+
+#define CEIL_DIV(a, b)        (((a) + (b) - 1) / (b))
+
+// ================================================================================================
+#if MVN_KERNEL_MEAN_1
+
+DECLARE_PACKED_ACCUMULATE(accumulate_sum_input, int, INPUT0_TYPE, FSV, INPUT_SLICE_PITCH, ITEMS_NUM, GWS, ACCUMULATE_SUM)
+
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_sum_across_sg, int, FSV, SG_NUM, REDUCE_NO_POST_OP)
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS, 1, 1)))
+KERNEL(mvn_mean_1)(const __global INPUT0_TYPE* input,
+                   __global int* intermidiate_sum) {
+    uint b = get_global_id(2);
+    uint f = get_global_id(1) * FSV;
+    uint flat_data_set_group = b * CEIL_DIV(OUTPUT_FEATURE_NUM, FSV) + get_global_id(1);
+
+    uint items_group = get_group_id(0);
+    const uint sgid = get_sub_group_id();
+    const uint sglid = get_sub_group_local_id();
+
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
+
+    __local int slm_acc[(SG_NUM - 1) * FSV];
+
+    INT_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_input)(input, data_sets_offset, get_global_id(0));
+    int full_sum = FUNC_CALL(reduce_sum_across_sg)(partial_sum, slm_acc);
+
+    if (sgid == 0 && (sglid < FSV || SIMD == FSV)) {
+        intermidiate_sum[flat_data_set_group * ITEM_GROUPS * FSV + items_group * FSV + sglid] = full_sum;
+    }
+}
+// ================================================================================================
+#elif MVN_KERNEL_MEAN_2
+
+DECLARE_PACKED_ACCUMULATE(accumulate_sum_input, int, int, FSV, INPUT_SLICE_PITCH, ITEM_GROUPS, LWS, ACCUMULATE_SUM)
+
+#define CALC_MEAN(sum) ((sum) / ITEMS_NUM)
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_mean_across_sg, MEAN_TYPE, FSV, SG_NUM, CALC_MEAN)
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS, 1, 1)))
+KERNEL(mvn_mean_2)(const __global int* intermidiate_sum,
+                   __global MEAN_TYPE* intermidiate_mean) {
+    uint b = get_global_id(2);
+    uint f = get_global_id(1) * FSV;
+    uint flat_data_set_group = b * CEIL_DIV(OUTPUT_FEATURE_NUM, FSV) + get_global_id(1);
+
+    const uint sgid = get_sub_group_id();
+    const uint sglid = get_sub_group_local_id();
+
+    const uint data_sets_offset = flat_data_set_group * ITEM_GROUPS * FSV;
+
+    INT_PACKED_TYPE complete_sum = FUNC_CALL(accumulate_sum_input)(intermidiate_sum, data_sets_offset, get_local_id(0));
+    __local MEAN_TYPE slm_acc[(SG_NUM - 1) * FSV];
+    MEAN_TYPE mean = FUNC_CALL(reduce_mean_across_sg)(TO_MEAN_PACKED_TYPE(complete_sum), slm_acc);
+
+    if (sgid == 0 && (sglid < FSV || SIMD == FSV)) {
+        intermidiate_mean[flat_data_set_group * FSV + sglid] = mean;
+    }
+}
+// ================================================================================================
+#elif MVN_KERNEL_VAR_1
+
+#define EXTRA_ARGS_DECL_IMPL    , MEAN_TYPE mean
+#define EXTRA_ARGS_IMPL         , mean
+#define EXTRA_ARGS_DECL         EXTRA_ARGS_DECL_IMPL
+#define EXTRA_ARGS              EXTRA_ARGS_IMPL
+#define ACCUMULATE_SUM_SQ_DEV(curr, next, idx, mean)   ACCUMULATE_SUM_SQ(curr, TO_MEAN_TYPE(next) - intel_sub_group_shuffle(mean, idx), idx)
+DECLARE_PACKED_ACCUMULATE_EARGS(accumulate_sum_sq_dev, MEAN_TYPE, INPUT0_TYPE, FSV, INPUT_SLICE_PITCH, ITEMS_NUM, GWS, ACCUMULATE_SUM_SQ_DEV, EXTRA_ARGS_DECL, EXTRA_ARGS)
+
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_sum_across_sg, MEAN_TYPE, FSV, SG_NUM, REDUCE_NO_POST_OP)
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS, 1, 1)))
+KERNEL(mvn_var_1)(const __global INPUT0_TYPE* input,
+                  const __global MEAN_TYPE* means,
+                  __global MEAN_TYPE* intermidiate_sum) {
+    uint b = get_global_id(2);
+    uint f = get_global_id(1) * FSV;
+    uint flat_data_set_group = b * CEIL_DIV(OUTPUT_FEATURE_NUM, FSV) + get_global_id(1);
+
+    uint items_group = get_group_id(0);
+    const uint sgid = get_sub_group_id();
+    const uint sglid = get_sub_group_local_id();
+
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
+
+    __local MEAN_TYPE slm_acc[(SG_NUM - 1) * FSV];
+
+    MEAN_TYPE mean = means[flat_data_set_group * FSV + sglid];
+    MEAN_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_sq_dev)(input, data_sets_offset, get_global_id(0), mean);
+    MEAN_TYPE full_sum = FUNC_CALL(reduce_sum_across_sg)(partial_sum, slm_acc);
+
+    if (sgid == 0 && (sglid < FSV || SIMD == FSV)) {
+        intermidiate_sum[flat_data_set_group * ITEM_GROUPS * FSV + items_group * FSV + sglid] = full_sum;
+    }
+}
+// ================================================================================================
+#elif MVN_KERNEL_VAR_2
+
+DECLARE_PACKED_ACCUMULATE(accumulate_sum, MEAN_TYPE, MEAN_TYPE, FSV, INPUT_SLICE_PITCH, ITEM_GROUPS, LWS, ACCUMULATE_SUM)
+
+#define CALC_INVERSE_VARIANCE(sum_diff_sq)   native_powr((sum_diff_sq) / ITEMS_NUM + (MEAN_TYPE)EPSILON, -0.5f)
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_var_across_sg, MEAN_TYPE, FSV, SG_NUM, CALC_INVERSE_VARIANCE)
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS, 1, 1)))
+KERNEL(mvn_var_2)(const __global MEAN_TYPE* intermidiate_sum,
+                   __global MEAN_TYPE* intermidiate_ivar) {
+    uint b = get_global_id(2);
+    uint f = get_global_id(1) * FSV;
+    uint flat_data_set_group = b * CEIL_DIV(OUTPUT_FEATURE_NUM, FSV) + get_global_id(1);
+
+    uint items_group = get_group_id(0);
+    const uint sgid = get_sub_group_id();
+    const uint sglid = get_sub_group_local_id();
+
+    const uint data_sets_offset = flat_data_set_group * ITEM_GROUPS * FSV;
+
+    MEAN_PACKED_TYPE complete_sum = FUNC_CALL(accumulate_sum)(intermidiate_sum, data_sets_offset, get_local_id(0));
+
+    __local MEAN_TYPE slm_acc[(SG_NUM - 1) * FSV];
+    MEAN_TYPE inv_variance = FUNC_CALL(reduce_var_across_sg)(complete_sum, slm_acc);
+
+    if (sgid == 0 && (sglid < FSV || SIMD == FSV)) {
+        intermidiate_ivar[flat_data_set_group * FSV + sglid] = inv_variance;
+    }
+}
+// ================================================================================================
+#else // MVN_KERNEL_MAIN
+
+// Mean:
+DECLARE_PACKED_ACCUMULATE(accumulate_sum_input, int, INPUT0_TYPE, FSV, INPUT_SLICE_PITCH, ITEMS_NUM, LWS, ACCUMULATE_SUM)
+
+#define CALC_MEAN(sum) ((sum) / ITEMS_NUM)
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_mean, MEAN_TYPE, FSV, SG_NUM, CALC_MEAN)
+
+// Variance:
+#define EXTRA_ARGS_DECL_IMPL    , MEAN_TYPE mean
+#define EXTRA_ARGS_IMPL         , mean
+#define EXTRA_ARGS_DECL         EXTRA_ARGS_DECL_IMPL
+#define EXTRA_ARGS              EXTRA_ARGS_IMPL
+#define ACCUMULATE_SUM_SQ_DEV(curr, next, idx, mean)   ACCUMULATE_SUM_SQ(curr, next - intel_sub_group_shuffle(mean, idx), idx)
+DECLARE_PACKED_ACCUMULATE_EARGS(accumulate_sum_sq_dev, MEAN_TYPE, INPUT0_TYPE, FSV, INPUT_SLICE_PITCH, ITEMS_NUM, LWS, ACCUMULATE_SUM_SQ_DEV, EXTRA_ARGS_DECL, EXTRA_ARGS)
+
+#define CALC_INVERSE_VARIANCE(sum_diff_sq)   native_powr((sum_diff_sq) / ITEMS_NUM + (MEAN_TYPE)EPSILON, -0.5f)
+DECLARE_WG_PACKED_REDUCE_ADD(reduce_inverse_variance, MEAN_TYPE, FSV, SG_NUM, CALC_INVERSE_VARIANCE)
+
+#define INPUT_PACKED_BLOCK_READ(ptr)   CAT(as_, INPUT_PACKED_TYPE)(CAT(BLOCK_READ_UC_, FSV)((const __global uchar*)ptr))
+
+#define OUTPUT_PAD_IN_ITEMS (OUTPUT_PAD_BEFORE_SIZE_X != 0 || OUTPUT_PAD_AFTER_SIZE_X != 0 || OUTPUT_PAD_BEFORE_SIZE_Y != 0)
+
+__attribute__((intel_reqd_sub_group_size(SIMD)))
+__attribute__((reqd_work_group_size(LWS, 1, 1)))
+KERNEL(mvn_final)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* restrict output
+#if HAS_FUSED_OPS_DECLS
+    , FUSED_OPS_DECLS
+#endif
+#if PRECALC_MEAN
+    , const __global MEAN_TYPE* means
+#endif
+#if PRECALC_VARIANCE
+    , const __global MEAN_TYPE* variances
+#endif
+) {
+    uint b = get_global_id(2);
+    uint f = get_global_id(1) * FSV;
+    uint flat_data_set_group = b * CEIL_DIV(OUTPUT_FEATURE_NUM, FSV) + get_global_id(1);
+#if GWS != LWS
+    uint items_group = get_group_id(0);
+#else
+    uint items_group = 0;
+#endif
+    const uint sgid = get_sub_group_id() + items_group * SG_NUM;
+    const uint sglid = get_sub_group_local_id();
+
+    const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0);
+    uint input_offset;
+
+#if !PRECALC_MEAN || (NORMALIZE_VARIANCE && !PRECALC_VARIANCE)
+    __local MEAN_TYPE slm_acc[(SG_NUM - 1) * FSV];
+#endif
+
+#if PRECALC_MEAN
+    MEAN_TYPE mean = means[flat_data_set_group * FSV + sglid];
+#else
+    INT_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_input)(input, data_sets_offset, get_local_id(0));
+    MEAN_TYPE mean = FUNC_CALL(reduce_mean)(TO_MEAN_PACKED_TYPE(partial_sum), slm_acc);
+#endif
+
+#if NORMALIZE_VARIANCE
+#   if PRECALC_VARIANCE
+    MEAN_TYPE inv_variance = variances[flat_data_set_group * FSV + sglid];
+#   else
+    MEAN_PACKED_TYPE partial_dev = FUNC_CALL(accumulate_sum_sq_dev)(input, data_sets_offset, get_local_id(0), mean);
+    MEAN_TYPE inv_variance = FUNC_CALL(reduce_inverse_variance)(partial_dev, slm_acc);
+#   endif
+#else
+    MEAN_TYPE inv_variance = 1;
+#endif
+
+#if OUTPUT_IS_FP
+    input_offset = data_sets_offset + sgid * SIMD * FSV;
+    uint output_spatial_base = sgid * SIMD;
+    uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+    // For fused ops to align with non-fp path
+    const uint set_idx = sglid;
+
+    for (uint spatial_idx = 0; spatial_idx < ITEMS_NUM / GWS; ++spatial_idx) {
+        INPUT_PACKED_TYPE in_pack = INPUT_PACKED_BLOCK_READ(input + input_offset);
+
+        __attribute__((opencl_unroll_hint))
+        for (uint si = 0; si < SIMD; ++si) {
+            uint output_spatial = output_spatial_base + si;
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
+            OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+                FUSED_OPS;
+                result = FUSED_OPS_RESULT;
+#else
+                result = TO_OUTPUT_TYPE(normalized);
+#endif
+#if !OUTPUT_PAD_IN_ITEMS
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
+#else
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            uint y = output_spatial / OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
+#endif
+        }
+        input_offset += GWS * FSV;
+        output_offset += GWS * FSV;
+        output_spatial_base += GWS;
+    }
+
+    // [constexpr] Number of leftovers after full local work-group iterations.
+    const uint lws_uniform_leftovers = ITEMS_NUM % GWS;
+    // [constexpr] Number of sub-groups that can process leftovers loading SIMD items.
+    const uint lws_uniform_leftovers_full_simds = lws_uniform_leftovers / SIMD;
+    // [constexpr] Number of leftovers after full sub-group processing.
+    const uint sg_uniform_leftovers = lws_uniform_leftovers % SIMD;
+
+    if (lws_uniform_leftovers_full_simds > 0 && sgid < lws_uniform_leftovers_full_simds) {
+        // Process leftovers that can use full sub-group.
+        INPUT_PACKED_TYPE in_pack = INPUT_PACKED_BLOCK_READ(input + input_offset);
+
+        __attribute__((opencl_unroll_hint))
+        for (uint si = 0; si < SIMD; ++si) {
+            uint output_spatial = output_spatial_base + si;
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
+            OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+                FUSED_OPS;
+                result = FUSED_OPS_RESULT;
+#else
+                result = TO_OUTPUT_TYPE(normalized);
+#endif
+#if !OUTPUT_PAD_IN_ITEMS
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
+#else
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            uint y = output_spatial / OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
+#endif
+        }
+    } else if (lws_uniform_leftovers > 0 &&
+               sg_uniform_leftovers > 0 &&
+               sgid == lws_uniform_leftovers_full_simds) {
+        // TODO: May be worth to consider the data here as across sub-group
+        // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
+        INPUT_PACKED_TYPE in_pack;
+        uint pack_idx = 0;
+        if (sg_uniform_leftovers >= 8) {
+            INPUT_TYPE8 tmp_in = DT_INPUT_BLOCK_READ8(input, input_offset + pack_idx * SIMD);
+            in_pack[pack_idx + 0] = tmp_in[0];
+            in_pack[pack_idx + 1] = tmp_in[1];
+            in_pack[pack_idx + 2] = tmp_in[2];
+            in_pack[pack_idx + 3] = tmp_in[3];
+            in_pack[pack_idx + 4] = tmp_in[4];
+            in_pack[pack_idx + 5] = tmp_in[5];
+            in_pack[pack_idx + 6] = tmp_in[6];
+            in_pack[pack_idx + 7] = tmp_in[7];
+            pack_idx += 8;
+        }
+        if (sg_uniform_leftovers % 8 >= 4) {
+            INPUT_TYPE4 tmp_in = DT_INPUT_BLOCK_READ4(input, input_offset + pack_idx * SIMD);
+            in_pack[pack_idx + 0] = tmp_in[0];
+            in_pack[pack_idx + 1] = tmp_in[1];
+            in_pack[pack_idx + 2] = tmp_in[2];
+            in_pack[pack_idx + 3] = tmp_in[3];
+            pack_idx += 4;
+        }
+        if (sg_uniform_leftovers % 4 >= 2) {
+            INPUT_TYPE2 tmp_in = DT_INPUT_BLOCK_READ2(input, input_offset + pack_idx * SIMD);
+            in_pack[pack_idx + 0] = tmp_in[0];
+            in_pack[pack_idx + 1] = tmp_in[1];
+            pack_idx += 2;
+        }
+        if (sg_uniform_leftovers % 2 == 1) {
+            in_pack[pack_idx] = DT_INPUT_BLOCK_READ(input, input_offset + pack_idx * SIMD);
+        }
+
+        OUTPUT_PACKED_TYPE result;
+        __attribute__((opencl_unroll_hint))
+        for (uint si = 0; si < sg_uniform_leftovers; ++si) {
+            uint output_spatial = output_spatial_base + si;
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance;
+            OUTPUT_TYPE result;
+#if HAS_FUSED_OPS
+            FUSED_OPS;
+            result = FUSED_OPS_RESULT;
+#else
+            result = TO_OUTPUT_TYPE(normalized);
+#endif
+#if !OUTPUT_PAD_IN_ITEMS
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result);
+#else
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            uint y = output_spatial / OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+            DT_OUTPUT_BLOCK_WRITE(output, output_offset, result);
+#endif
+        }
+    }
+#else // => !OUTPUT_IS_FP
+    input_offset = data_sets_offset + sgid * SIMD * FSV;
+    uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV;
+    uint output_spatial = sgid * SIMD + sglid;
+
+    for (uint spatial_idx = 0; spatial_idx < ITEMS_NUM / GWS; ++spatial_idx) {
+        INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid];
+
+        OUTPUT_PACKED_TYPE result;
+        __attribute__((opencl_unroll_hint))
+        for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
+            #if HAS_FUSED_OPS
+                FUSED_OPS;
+                result[set_idx] = FUSED_OPS_RESULT;
+            #else
+                result[set_idx] = TO_OUTPUT_TYPE(normalized);
+            #endif
+        }
+#if !OUTPUT_PAD_IN_ITEMS
+        ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
+#else
+        uint x = output_spatial % OUTPUT_SIZE_X;
+        uint y = output_spatial / OUTPUT_SIZE_X;
+        output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+        ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
+#endif
+
+        input_offset += GWS * FSV;
+        output_offset += GWS * FSV;
+        output_spatial += GWS;
+    }
+
+    // [constexpr] Number of leftovers after full local work-group iterations.
+    const uint lws_uniform_leftovers = ITEMS_NUM % GWS;
+    // [constexpr] Number of sub-groups that can process leftovers loading SIMD items.
+    const uint lws_uniform_leftovers_full_simds = lws_uniform_leftovers / SIMD;
+    // [constexpr] Number of leftovers after full sub-group processing.
+    const uint sg_uniform_leftovers = lws_uniform_leftovers % SIMD;
+
+    if (lws_uniform_leftovers_full_simds > 0 && sgid < lws_uniform_leftovers_full_simds) {
+        // Process leftovers that can use full sub-group.
+        INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid];
+
+        OUTPUT_PACKED_TYPE result;
+        __attribute__((opencl_unroll_hint))
+        for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
+            #if HAS_FUSED_OPS
+                FUSED_OPS;
+                result[set_idx] = FUSED_OPS_RESULT;
+            #else
+                result[set_idx] = TO_OUTPUT_TYPE(normalized);
+            #endif
+        }
+#if !OUTPUT_PAD_IN_ITEMS
+        ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
+#else
+        uint x = output_spatial % OUTPUT_SIZE_X;
+        uint y = output_spatial / OUTPUT_SIZE_X;
+        output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+        ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
+#endif
+    } else if (lws_uniform_leftovers > 0 &&
+               sg_uniform_leftovers > 0 &&
+               sgid == lws_uniform_leftovers_full_simds) {
+        // TODO: May be worth to consider the data here as across sub-group
+        // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data.
+        INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid % sg_uniform_leftovers];
+
+        OUTPUT_PACKED_TYPE result;
+        __attribute__((opencl_unroll_hint))
+        for (uint set_idx = 0; set_idx < FSV; ++set_idx) {
+            MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx);
+            #if HAS_FUSED_OPS
+                FUSED_OPS;
+                result[set_idx] = FUSED_OPS_RESULT;
+            #else
+                result[set_idx] = TO_OUTPUT_TYPE(normalized);
+            #endif
+        }
+        if (sglid < sg_uniform_leftovers) {
+#if !OUTPUT_PAD_IN_ITEMS
+            ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result;
+#else
+            uint x = output_spatial % OUTPUT_SIZE_X;
+            uint y = output_spatial / OUTPUT_SIZE_X;
+            output_offset = OUTPUT_GET_INDEX(b, f, y, x);
+            ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result;
+#endif
+        }
+    }
+#endif
+}
+
+#endif
+// ================================================================================================
+
+#undef FSV
+#undef INPUT_SLICE_PITCH
+#undef SG_NUM
+
+#undef INPUT_TYPE2
+#undef INPUT_TYPE4
+#undef INPUT_TYPE8
+#undef INPUT_PACKED_TYPE
+#undef OUTPUT_PACKED_TYPE
+#undef INT_PACKED_TYPE
+#undef MEAN_PACKED_TYPE
+#undef TO_MEAN_PACKED_TYPE
+
+#undef INPUT_PACKED_BLOCK_READ
+#undef OUTPUT_PAD_IN_ITEMS
+
+#undef CEIL_DIV
+#undef USE_IMAD
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_accumulate.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_accumulate.cl

new file mode 100644 (file)

index 0000000..fbbc8a2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_accumulate.cl
@@ -0,0 +1,107 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/data_types.cl"
+
+// ==============================================================================================================================
+// DECLARE_PACKED_ACCUMULATE(Name, AccT, InputT, SliceSize, SlicePitch, Items, Workers, AccOp)
+// DECLARE_PACKED_ACCUMULATE_EARGS(Name, AccT, InputT, SliceSize, SlicePitch, Items, Workers, AccOp, ExtraArgsDecl, ExtraArgs)
+//
+// Declares function "Name" performing parallel packed accumulation:
+// AccT<SliceSize> Name (const __global InputT* input, uint offset, uint worker_id  ExtraArgsDecl)
+//
+// Template arguments:
+//   Name             - Name of function to declare.
+//   AccT             - Type of accumulator variable. Can't be vector type. Examples: int, float, half.
+//   InputT           - Type of input data. Can't be vector type. Examples: int, float, half.
+//   SliceSize        - Number values in packed slice to accumulate in each work-item. One of: 2, 4, 8, 16.
+//   SlicePitch       - Pitch between consecutive input slices in "input".
+//   Items            - Total number of items to accumulate across all work-items.
+//   Workers          - Number of work-items performing accumulation.
+//   AccOp              Name of operation used to perform accumulation.
+//                      Calling it "function-like" must return value of new accumulation variable.
+//                      Expected interface:
+//                          AccT AccOp(AccT current, InputT val, uint index ExtraArgs)
+//                          current - current accumulation value
+//                          val - currently processed input value
+//                          index - number of item inside slice currently processed
+//                          ExtraArgs - optional extra arguments passed as is from template argument
+//                          returns: new accumulator value after accumulating "val" with "current"
+//   ExtraArgsDecl    - Optional extra arguments declaration to pass to function.
+//   ExtraArgs        - Optional extra arguments to pass to "AccOp" using names declared in "ExtraArgsDecl".
+//
+// Function arguments:
+//   input          - Pointer to global memory from which values will be read to accumulate
+//   offset         - Offset into "input" from where accumulation should start
+//   worker_id      - Number of current work-item
+//   ExtraArgsDecl  - Optional extra arguments, declared from template argument.
+// 
+// Pseduocode:
+//  function Name(input, offset, worker_id, ExtraArgs... eargs) {
+//      AccT<SliceSize> accumulator = 0;
+//      for (uint idx = worker_id; idx < Items; idx += Workers) {
+//          InputT<SliceSize> in = vload<SliceSize>(0, &input[offset + idx * SlicePitch];
+//          for (uint si = 0; si < SliceSize; ++si) {
+//              accumulator[si] = AccOp(accumulator[si], in[si], si, eargs...)
+//          }
+//      }
+//      return accumulator;
+//  }
+//
+// ==============================================================================================================================
+
+#define ACCUMULATE_SUM(a, b, idx)       ((a) + (b))
+#define ACCUMULATE_SUM_SQ(a, b, idx)    ((a) + ((b) * (b)))
+
+#define DECLARE_PACKED_ACCUMULATE_EARGS(Name, AccT, InputT, SliceSize, SlicePitch, Items, Workers, AccOp, ExtraArgsDecl, ExtraArgs)     \
+inline MAKE_VECTOR_TYPE(AccT, SliceSize) FUNC(Name)(const __global InputT* input,                                                       \
+                                                    uint offset,                                                                        \
+                                                    uint worker_id                                                                      \
+                                                    ExtraArgsDecl) {                                                                    \
+    typedef MAKE_VECTOR_TYPE(InputT, SliceSize) packed_in_t;                                                                            \
+    typedef MAKE_VECTOR_TYPE(AccT, SliceSize) packed_acc_t;                                                                             \
+                                                                                                                                        \
+    packed_acc_t acc = 0;  /* Accumulation variable */                                                                                  \
+                                                                                                                                        \
+    uint input_offset = offset + worker_id * (SlicePitch);  /* Current input offset */                                                  \
+                                                                                                                                        \
+    /* Uniform loop to help compiler in unrolling */                                                                                    \
+    for (uint spatial_idx = 0; spatial_idx < (Items) / (Workers); ++spatial_idx) {                                                      \
+        packed_in_t in_pack = ((const __global packed_in_t*)(input + input_offset))[0];                                                 \
+                                                                                                                                        \
+        input_offset += (Workers) * (SlicePitch);                                                                                       \
+                                                                                                                                        \
+        __attribute__((opencl_unroll_hint))                                                                                             \
+        for (uint set_idx = 0; set_idx < (SliceSize); ++set_idx) {                                                                      \
+            acc[set_idx] = AccOp(acc[set_idx], in_pack[set_idx], set_idx  ExtraArgs);                                                   \
+        }                                                                                                                               \
+    }                                                                                                                                   \
+                                                                                                                                        \
+    /* [constexpr] Number of leftovers after all uniform iterations */                                                                  \
+    const uint leftovers = (Items) % (Workers);                                                                                         \
+                                                                                                                                        \
+    if (leftovers > 0 && worker_id < leftovers) {                                                                                       \
+        packed_in_t in_pack = ((const __global packed_in_t*)(input + input_offset))[0];                                                 \
+                                                                                                                                        \
+        __attribute__((opencl_unroll_hint))                                                                                             \
+        for (uint set_idx = 0; set_idx < (SliceSize); ++set_idx) {                                                                      \
+            acc[set_idx] = AccOp(acc[set_idx], in_pack[set_idx], set_idx  ExtraArgs);                                                   \
+        }                                                                                                                               \
+    }                                                                                                                                   \
+                                                                                                                                        \
+    return acc;                                                                                                                         \
+}
+
+#define DECLARE_PACKED_ACCUMULATE(Name, AccT, InputT, SliceSize, SlicePitch, Items, Workers, AccOp)                                     \
+    DECLARE_PACKED_ACCUMULATE_EARGS(Name, AccT, InputT, SliceSize, SlicePitch, Items, Workers, AccOp, , )
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_reduce.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_reduce.cl

new file mode 100644 (file)

index 0000000..1b61b79
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad_reduce.cl
@@ -0,0 +1,125 @@
+// Copyright (c) 2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+
+// ==============================================================================================================================
+// DECLARE_WG_PACKED_REDUCE_ADD(Name, Type, VecSize, SgNum, PostOp)
+//
+// Declares function "Name" performing work-group reduction on vector data, using addition operator:
+//   Type Name (Type<VecSize> value, __local Type* slm_acc)
+// Returns reduction result as sub-group vector, for example when VecSize equals 4:
+//   work-item for which get_sub_group_local_id() == 0 will hold reduced values from value.s0
+//   work-item for which get_sub_group_local_id() == 1 will hold reduced values from value.s1
+//   work-item for which get_sub_group_local_id() == 2 will hold reduced values from value.s2
+//   work-item for which get_sub_group_local_id() == 3 will hold reduced values from value.s2
+//  for other work-items in sub-group the result will be undefined.
+// All work-items in sub-group must enter declared function.
+//
+// Template arguments:
+//   Name    - Name of function to declare.
+//   Type    - Type of values to reduce.  Can't be vector type. Examples: int, float, half.
+//   VecSize - Vector size of input, one of 2,4,8,16. Must be smaller or equal to sub-group size.
+//   SgNum   - Number of sub-groups inside work-group.
+//   PostOp  - Operation to perform on reduced values.
+//             Called as PostOp(value), where "value" is reduction result, and call should evaluate to expression returning final result.
+//
+// Function arguments:
+//   value   - vector of "VecSize" elements of "Type" holding values to reduce.
+//   slm_acc - pointer to local memory used for reduction. Must have size of at least ("SgNum" - 1) * "VecSize".
+//
+// Pseudocode:
+//  function Name(value, slm_acc) {
+//      Type result;
+//      for (uint vi = 0; vi < VecSize; ++vi) {
+//          Type tmp = work_group_reduce_add(value[vi]);
+//          if (get_sub_group_local_id() == vi) {
+//              result = tmp;
+//          }
+//      }
+//      return result;
+// }
+//
+// Notes:
+//   If local memory is going to be reused additiona barrier(CLK_LOCAL_MEM_FENCE) is required to ensure that all usage inside
+//   declared function has finished.
+// ==============================================================================================================================
+
+#define REDUCE_NO_POST_OP(val) (val)
+
+#define DECLARE_WG_PACKED_REDUCE_ADD(Name, Type, VecSize, SgNum, PostOp)                                                \
+    inline Type FUNC(Name) (MAKE_VECTOR_TYPE(Type, VecSize) value, __local Type* slm_acc) {                             \
+        typedef MAKE_VECTOR_TYPE(Type, VecSize) packed_t;                                                               \
+                                                                                                                        \
+        Type result;                                                                                                    \
+                                                                                                                        \
+        /* [uniform] Current sub-groups id */                                                                           \
+        const uint sgid = get_sub_group_id();                                                                           \
+        /* Id of work-item inside sub-group */                                                                          \
+        const uint sglid = get_sub_group_local_id();                                                                    \
+        /* [constexpr] Maximum simd/sub-group size */                                                                   \
+        const uint simd = get_max_sub_group_size();                                                                     \
+                                                                                                                        \
+        /* Accumulation inside sub-group */                                                                             \
+        packed_t acc;  /* [uniform] Accumulator variable */                                                             \
+        __attribute__((opencl_unroll_hint))                                                                             \
+        for (uint idx = 0; idx < VecSize; ++idx) {                                                                      \
+            acc[idx] = sub_group_reduce_add(value[idx]);                                                                \
+        }                                                                                                               \
+        if ((SgNum) != 1) {                                                                                             \
+            /* More than one sub-group in work-group, reduce using local memory */                                      \
+            /* Store partial results into local memory from sub-groups other than first one */                          \
+            if (sgid != 0 && (sglid < VecSize || simd == VecSize)) {                                                    \
+                slm_acc[(sgid - 1) * VecSize + sglid] = acc[sglid];                                                     \
+            }                                                                                                           \
+            barrier(CLK_LOCAL_MEM_FENCE);                                                                               \
+            /* Accumulate partial results inside first sub-group */                                                     \
+            if (sgid == 0) {                                                                                            \
+                __attribute__((opencl_unroll_hint))                                                                     \
+                for (uint vi = 0; vi < VecSize; ++vi) {                                                                 \
+                    /* Accumulate single vector element using sub_group_reduce_add */                                   \
+                    /* Last work-item inside sub-group holds previous value (iteration or sub-group reduction stage) */ \
+                                                                                                                        \
+                    Type tmp = acc[vi];                                                                                 \
+                    __attribute__((opencl_unroll_hint))                                                                 \
+                    for (uint sg = 0; sg < (SgNum) - 1; sg += (simd - 1)) {                                             \
+                        bool last_sglid = sglid == simd - 1;                                                            \
+                        bool sglid_inside_sgs = sg + simd - 1 <= (SgNum) - 1 || sg + sglid < (SgNum) - 1;               \
+                        Type tmp_in_slm = slm_acc[sg * VecSize + sglid * VecSize + vi];                                 \
+                        tmp = last_sglid ? tmp :                                                                        \
+                              sglid_inside_sgs ? tmp_in_slm                                                             \
+                              : 0;                                                                                      \
+                        tmp = sub_group_reduce_add(tmp);                                                                \
+                    }                                                                                                   \
+                    acc[vi] = tmp;                                                                                      \
+                }                                                                                                       \
+                if (sglid < VecSize || simd == VecSize) {                                                               \
+                    result = PostOp(acc[sglid]);                                                                        \
+                    slm_acc[sglid] = result;                                                                            \
+                }                                                                                                       \
+            }                                                                                                           \
+            barrier(CLK_LOCAL_MEM_FENCE);                                                                               \
+            /* Read result in all other sub-groups */                                                                   \
+            if (sgid != 0 && (sglid < VecSize || simd == VecSize)) {                                                    \
+                result = slm_acc[sglid];                                                                                \
+            }                                                                                                           \
+        } else {                                                                                                        \
+            /* Single sub-group case, just transpose the data to correct layout */                                      \
+            if (sglid < VecSize || simd == VecSize) {                                                                   \
+                result = PostOp(acc[sglid]);                                                                            \
+                slm_acc[sglid] = result;                                                                                \
+            }                                                                                                           \
+        }                                                                                                               \
+        return result;                                                                                                  \
+    }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_int8_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_int8_ref.cl

index 6c8d8e2..cdb4cd1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_int8_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_int8_ref.cl
@@ -87,7 +87,7 @@ KERNEL(pooling_gpu_int8_ref)(
  #elif OUTPUT_LAYOUT_B_FS_YX_FSV16
      const uint x = get_global_id(1);
      const uint y = get_global_id(2);
-    const uint bf = get_global_id(0);
+    const uint bf = (uint)get_global_id(0);
      const uint f = bf / INPUT0_BATCH_NUM;
      const uint b = bf % INPUT0_BATCH_NUM;
      const uint z = 0;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl

index 445d69a..acb9f6d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
@@ -18,6 +18,9 @@
  
  #include "include/data_types.cl"
  
+#define INPUT_TYPE4 MAKE_VECTOR_TYPE(INPUT_REORDER_TYPE, 4)
+#define OUTPUT_TYPE4 MAKE_VECTOR_TYPE(OUTPUT_REORDER_TYPE, 4)
+
  ///////////////////////// Input Index /////////////////////////
  inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
  {
@@ -48,12 +51,16 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint w, uint z, uint y, uint
  }
  
  KERNEL (reorder_data)(
-#if defined INPUT0_LAYOUT_NV12
+#if INPUT0_LAYOUT_NV12 || INPUT0_LAYOUT_IMAGE_2D_RGBA
      read_only image2d_t input,
  #else
      const __global INPUT_REORDER_TYPE* input,
  #endif
+#if OUTPUT_LAYOUT_IMAGE_2D_RGBA
+    write_only image2d_t output
+#else
      __global OUTPUT_REORDER_TYPE* output
+#endif
  #ifdef MEAN_SUBTRACT_IN_BUFFER
      , __global MEAN_SUBTRACT_TYPE* mean_subtract
  #endif
@@ -95,7 +102,7 @@ KERNEL (reorder_data)(
  #if defined INPUT0_LAYOUT_NV12
      const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP;
      float4 colorVYU = read_imagef(input, sampler, (int2)(x, y));
-    
+
      float Ycomponent = mad(colorVYU.s1, 296.82f, -18.624f);
      float Ucomponent = mad(colorVYU.s2, 255.0f, -128.f);
      float Vcomponent = mad(colorVYU.s0, 255.0f, -128.f);
@@ -103,7 +110,20 @@ KERNEL (reorder_data)(
      float B = clamp(mad(Vcomponent, 1.596f, Ycomponent), 0.f, 255.f);
      float R = clamp(mad(Ucomponent, 2.018f, Ycomponent), 0.f, 255.f);
      float G = clamp(mad(Vcomponent, -0.813f, mad(Ucomponent, -0.391f, Ycomponent)), 0.f, 255.f);
-
+#elif defined INPUT0_LAYOUT_IMAGE_2D_RGBA
+    const sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP;
+    OUTPUT_TYPE4 colorRGBA = IMAGE_READ(input, (int2)(x, y));
+#elif defined OUTPUT_LAYOUT_IMAGE_2D_RGBA
+    uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, f, w, z, y, x);
+    const uint input_idx_R  = FUNC_CALL(get_input_index)(b, 0, w, z, y, x);
+    const uint input_idx_G  = FUNC_CALL(get_input_index)(b, 1, w, z, y, x);
+    const uint input_idx_B  = FUNC_CALL(get_input_index)(b, 2, w, z, y, x);
+#if OUTPUT_FEATURE_NUM == 3
+    INPUT_TYPE4 colorRGBA = { TO_INPUT_REORDER_TYPE(input[input_idx_R]), TO_INPUT_REORDER_TYPE(input[input_idx_G]), TO_INPUT_REORDER_TYPE(input[input_idx_B]), TO_INPUT_REORDER_TYPE(0.f) };
+#else
+    const uint input_idx_A  = FUNC_CALL(get_input_index)(b, 3, w, z, y, x);
+    INPUT_TYPE4 colorRGBA = { TO_INPUT_REORDER_TYPE(input[input_idx_R]), TO_INPUT_REORDER_TYPE(input[input_idx_G]), TO_INPUT_REORDER_TYPE(input[input_idx_B]), TO_INPUT_REORDER_TYPE(input[input_idx_A]) };
+#endif
  #else
      uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, f, w, z, y, x);
      const uint input_idx  = FUNC_CALL(get_input_index)(b, f, w, z, y, x);
@@ -137,6 +157,23 @@ KERNEL (reorder_data)(
      ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 2, w, z, y, x);
      output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
      output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(B), NL_M, NL_N);
+#elif INPUT0_LAYOUT_IMAGE_2D_RGBA
+    uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 0, w, z, y, x);
+    uint output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
+    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s0), NL_M, NL_N);
+    ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 1, w, z, y, x);
+    output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
+    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s1), NL_M, NL_N);
+    ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 2, w, z, y, x);
+    output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
+    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s2), NL_M, NL_N);
+#if INPUT0_FEATURE_NUM == 4
+    ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 3, w, z, y, x);
+    output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
+    output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s3), NL_M, NL_N);
+#endif
+#elif OUTPUT_LAYOUT_IMAGE_2D_RGBA
+    IMAGE_WRITE(output, (int2)(x, y), colorRGBA);
  #else
  #if INPUT0_IS_FP && !OUTPUT_IS_FP
      // TODO: check if this round really needed. Right now it's added to have the same behavior as CPU plugin
@@ -147,3 +184,6 @@ KERNEL (reorder_data)(
  #endif
  #endif
  }
+
+#undef INPUT_TYPE4
+#undef OUTPUT_TYPE4
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl

index 07b69f2..5a42d46 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
@@ -91,6 +91,12 @@ inline uint FUNC(get_input_index)(uint g, uint o, uint i, uint z, uint y, uint x
  #elif defined INPUT0_LAYOUT_GYXIO || \
        defined INPUT0_LAYOUT_GOIYX
      return GET_FILTER_GOIYX(INPUT0, g, o, i, y, x);
+#elif defined INPUT0_LAYOUT_OS_IS_YX_OSV16_ISV16
+    return GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(INPUT0, o, i, y, x);
+#elif defined INPUT0_LAYOUT_GS_OI_YXS_GSV16_YXSV4
+    return GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(INPUT0, g, o, i, y, x);
+#elif defined INPUT0_LAYOUT_GS_OI_YXS_GSV32_YXSV4
+    return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(INPUT0, g, o, i, y, x);
  #else
  #error reorder_weights.cl: input format - not supported
  #endif
@@ -198,6 +204,14 @@ inline uint FUNC(get_output_index)(uint g, uint o, uint i, uint z, uint y, uint
      return GET_FILTER_GS_OI_YXS_GSV4_YXSV4_INDEX(OUTPUT, g, o, i, y, x);
  #elif defined OUTPUT_LAYOUT_G_OS_IS_YX_ISV16_OSV16
      return GET_FILTER_G_OS_IS_YX_ISV16_OSV16_INDEX(OUTPUT, g, o, i, y, x, SUB_GROUP_SIZE);
+#elif defined OUTPUT_LAYOUT_OS_IS_YX_OSV16_ISV16
+    return GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_GS_OI_YXS_GSV16_YXSV4
+    return GET_FILTER_GS_OI_YXS_GSV16_YXSV4_INDEX(OUTPUT, g, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_GS_OI_YXS_GSV32_YXSV4
+    return GET_FILTER_GS_OI_YXS_GSV32_YXSV4_INDEX(OUTPUT, g, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_G_OS_IS_YX_OSV16_ISV4
+    return GET_FILTER_G_OS_IS_YX_OSV16_ISV4_INDEX(OUTPUT, g, o, i, y, x);
  #else
  #error reorder_weights.cl: output format - not supported
  #endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl

index e45c1dd..c965d04 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/resample_ref.cl
@@ -49,7 +49,37 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
  #endif
  )
  {
-#if defined(SAMPLE_TYPE_NEAREST)
+#if defined(SAMPLE_TYPE_NEAREST) && FEATURE_PACKED_MODE
+    typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, PACK_SIZE) in_pack_t;
+    typedef MAKE_VECTOR_TYPE(OUTPUT_TYPE, PACK_SIZE) out_pack_t;
+
+    const int ox = get_global_id(0);
+    const int oy = get_global_id(1) % OUTPUT_SIZE_Y;
+    const int oz = get_global_id(1) / OUTPUT_SIZE_Y;
+    const int feature = (get_global_id(2) * PACK_SIZE) % OUTPUT_FEATURE_NUM;
+    const int batch = (get_global_id(2) * PACK_SIZE) / OUTPUT_FEATURE_NUM;
+    const int ix = floor(ox * X_RATIO);
+    const int iy = floor(oy * Y_RATIO);
+    const int iz = floor(oz * Z_RATIO);
+
+    uint input_idx = FUNC_CALL(get_input_index)(batch, feature, iz, iy, ix);
+    uint output_idx = FUNC_CALL(get_output_index)(batch, feature, oz, oy, ox);
+
+    in_pack_t interp_val_pack = ((const __global in_pack_t*)(input + input_idx))[0];
+    out_pack_t res;
+    unroll_for (uint pi = 0; pi < PACK_SIZE; ++pi) {
+        INPUT0_TYPE interp_val = interp_val_pack[pi];
+    #if HAS_FUSED_OPS
+        #define OF_ID (feature + pi)
+        FUSED_OPS;
+        res[pi] = FUSED_OPS_RESULT;
+    #else
+        res[pi] = ACTIVATION(interp_val, ACTIVATION_PARAMS);
+    #endif
+    }
+    ((__global out_pack_t*)(output + output_idx))[0] = res;
+
+#elif defined(SAMPLE_TYPE_NEAREST)
      const int ox = get_global_id(0);
  #if OUTPUT_DIMS <= 4
      const int oy = get_global_id(1);
@@ -79,29 +109,29 @@ KERNEL (resample_gpu_ref)(__global INPUT0_TYPE* input,
      const int oy = get_global_id(1);
      const int feature = 0;
      const int batch = get_global_id(2);
-    const INPUT0_TYPE ix = TO_INPUT0_TYPE(X_RATIO) * ox;
-    const INPUT0_TYPE iy = TO_INPUT0_TYPE(Y_RATIO) * oy;
+    const float ix = X_RATIO * ox;
+    const float iy = Y_RATIO * oy;
  
  #ifdef LEFTOVERS
      if (ox >= OUTPUT_SIZE_X)
          return;
  #endif
  
-    const int top_y_index    = (int)(floor(iy));
-    const int bottom_y_index = (int)(min(ceil(iy), TO_INPUT0_TYPE(INPUT0_SIZE_Y) - 1));
-    const int left_x_index   = (int)(floor(ix));
-    const int right_x_index  = (int)(min(ceil(ix), TO_INPUT0_TYPE(INPUT0_SIZE_X) - 1));
+    const int top_y_index = (int)(floor(iy));
+    const int bottom_y_index = (int)(min(TO_INPUT0_TYPE(ceil(iy)), TO_INPUT0_TYPE(INPUT0_SIZE_Y) - 1));
+    const int left_x_index = (int)(floor(ix));
+    const int right_x_index = (int)(min(TO_INPUT0_TYPE(ceil(ix)), TO_INPUT0_TYPE(INPUT0_SIZE_X) - 1));
  
-    const INPUT0_TYPE dx = ix - left_x_index;
-    const INPUT0_TYPE dy = iy - top_y_index;
+    const INPUT0_TYPE dx = TO_INPUT0_TYPE(ix - left_x_index);
+    const INPUT0_TYPE dy = TO_INPUT0_TYPE(iy - top_y_index);
  
-    unroll_for (int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
-        INPUT0_TYPE top_left     = input[INPUT0_GET_INDEX(batch, in_f, top_y_index, left_x_index)];
-        INPUT0_TYPE top_right    = input[INPUT0_GET_INDEX(batch, in_f, top_y_index, right_x_index)];
-        INPUT0_TYPE bottom_left  = input[INPUT0_GET_INDEX(batch, in_f, bottom_y_index, left_x_index)];
+    unroll_for(int in_f = 0; in_f < OUTPUT_FEATURE_NUM; in_f++) {
+        INPUT0_TYPE top_left = input[INPUT0_GET_INDEX(batch, in_f, top_y_index, left_x_index)];
+        INPUT0_TYPE top_right = input[INPUT0_GET_INDEX(batch, in_f, top_y_index, right_x_index)];
+        INPUT0_TYPE bottom_left = input[INPUT0_GET_INDEX(batch, in_f, bottom_y_index, left_x_index)];
          INPUT0_TYPE bottom_right = input[INPUT0_GET_INDEX(batch, in_f, bottom_y_index, right_x_index)];
  
-        INPUT0_TYPE top    = top_left + (top_right - top_left) * dx;
+        INPUT0_TYPE top = top_left + (top_right - top_left) * dx;
          INPUT0_TYPE bottom = bottom_left + (bottom_right - bottom_left) * dx;
  
          INPUT0_TYPE interp_val = top + (bottom - top) * dy;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp

index d36a49a..7bbe95e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -431,10 +431,14 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
      } else if (_tensor.LogicalSize() == _tensor.Feature().v) {
          // We support broadcast only if corresponding dimension is equal to 1.
          // Otherwise, dimensions should be equal and using "f" should be safe.
-        if (_tensor.PitchesDifferFromLogicalDims()) {
+        if (_tensor.PitchesDifferFromLogicalDims() && _tensor.SimpleLayout()) {
              std::string f_pitch = std::to_string(_tensor.Feature().pitch);
              definitions.push_back({ safe_index_func_name, "(" + offset + " + (f) * " + f_pitch + ")" });
              definitions.push_back({ index_func_name, "(" + offset + " + (f) * " + f_pitch + ")" });
+        } else if (_tensor.PitchesDifferFromLogicalDims()) {
+            // TODO This should be solved differently, by setting the macro arguments to zero
+            definitions.push_back({ safe_index_func_name, safe_index_func_val });
+            definitions.push_back({ index_func_name, index_func_val });
          } else {
              definitions.push_back({ safe_index_func_name, "f" });
              definitions.push_back({ index_func_name, "f" });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py

index 41e78f0..0901ce2 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py
@@ -61,13 +61,17 @@ class OpenCL2CHeaders(object):
          res = '{{"{}",\n(std::string) R"__krnl(\n'.format(kernel_name)
          content = self.append_file_content(filename, filename)
          max_lines = 200
+        max_characters = 16350
+        characters = 1  # Newline character above
  
          for i, line in enumerate(content.split('\n')):
-            if i % max_lines == 0:
+            if (i + 1) % max_lines == 0 or characters + len(line) + 1 > max_characters:
                  res += ')__krnl"\n + R"__krnl('
+                characters = 0
              res += line + '\n'
+            characters += len(line) + 1
  
-        res += ')__krnl"}},\n\n'.format(kernel_name, self.append_file_content(filename, filename))
+        res += ')__krnl"}},\n\n'.format(kernel_name)
  
          return res
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp

index 0aa29a6..3c35eea 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp
@@ -82,11 +82,10 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa
      for (auto& c : conf) {
          std::string fused_ops;
          std::string fused_ops_preload;
-        std::string fused_ops_calc_only;
+        std::string fused_ops_calc;
          std::string in_name = c.input_var_name;
          Datatype in_type = c.input_dt;
-
-        bool can_use_preload = true;
+        bool can_all_use_preload = true;
  
          for (size_t i = 0; i < params.fused_ops.size(); i++) {
              auto fused_dep_codegen = FusedOpsCodeGenerator(params.fused_ops[i]);
@@ -97,20 +96,26 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa
              in_name = out_var;
              in_type = out_type;
  
-            can_use_preload &= fused_dep_codegen.CanPreloadData(c);
+            bool can_use_preload = fused_dep_codegen.CanPreloadData(c);
+            can_all_use_preload &= can_use_preload;
  
              fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
              fused_ops += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
-            fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
-            fused_ops_calc_only += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
+            if (can_use_preload)
+                fused_ops_preload += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+            if (c.allow_for_partial_preload && !can_use_preload)
+                fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_LOAD" + c.suffix;
+            fused_ops_calc += "\\\n\tFUSED_OP" + std::to_string(i) + "_ACTION" + c.suffix;
          }
  
          jit.AddConstant(MakeJitConstant("FUSED_OPS" + c.suffix, fused_ops));
          jit.AddConstant(MakeJitConstant("FUSED_OPS_PRELOAD" + c.suffix, fused_ops_preload));
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc_only));
+        jit.AddConstant(MakeJitConstant("FUSED_OPS_CALC" + c.suffix, fused_ops_calc));
          jit.AddConstant(MakeJitConstant("FUSED_OPS_RESULT" + c.suffix, in_name));
  
-        jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix, can_use_preload));
+        bool can_any_use_preload = !fused_ops_preload.empty();
+        jit.AddConstant(MakeJitConstant("FUSED_OPS_CAN_USE_PRELOAD" + c.suffix,
+            can_all_use_preload || (c.allow_for_partial_preload && can_any_use_preload)));
      }
  
      jit.Merge(MakeFusedOpsDeclsJitConstants(params, conf));
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp

index c711cae..8283c5c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
@@ -117,6 +117,7 @@ std::string toString(DataLayout l) {
          case kernel_selector::DataLayout::bs_fs_yx_bsv16_fsv16:  return "BS_FS_YX_BSV16_FSV16";
          case kernel_selector::DataLayout::bs_fs_zyx_bsv16_fsv16: return "BS_FS_ZYX_BSV16_FSV16";
          case kernel_selector::DataLayout::nv12:                  return "NV12";
+        case kernel_selector::DataLayout::image_2d_rgba:         return "IMAGE_2D_RGBA";
          default:
              return "";
      }
@@ -296,7 +297,7 @@ std::string toString(MVNMode mode) {
  }
  
  std::string toString(WeightsLayout layout) {
-    switch (layout) {
+   switch (layout) {
          case WeightsLayout::oi:                                          return "OI";
          case WeightsLayout::io:                                          return "IO";
          case WeightsLayout::oiyx:                                        return "OIYX";
@@ -304,6 +305,7 @@ std::string toString(WeightsLayout layout) {
          case WeightsLayout::iyxo:                                        return "IYXO";
          case WeightsLayout::yxio:                                        return "YXIO";
          case WeightsLayout::os_is_yx_isv16_osv16:                        return "OS_IS_YX_ISV16_OSV16";
+        case WeightsLayout::os_is_yx_osv16_isv16:                        return "OS_IS_YX_OSV16_ISV16";
          case WeightsLayout::os_iyx_osv16:                                return "OS_IYX_OSV16";
          case WeightsLayout::os_iyx_osv32:                                return "OS_IYX_OSV32";
          case WeightsLayout::os_iyx_osv32__ai32:                          return "OS_IYX_OSV32__AI32";
@@ -362,7 +364,10 @@ std::string toString(WeightsLayout layout) {
          case WeightsLayout::giy_xs_os_xsv2_osv16__ao32:                  return "GIY_XS_OS_XSV2_OSV16__AO32";
          case WeightsLayout::giy_xs_os_xsv2_osv8__ao32:                   return "GIY_XS_OS_XSV2_OSV8__AO32";
          case WeightsLayout::gs_oi_yxs_gsv4_yxsv4:                        return "GS_OI_YXS_GSV4_YXSV4";
+        case WeightsLayout::gs_oi_yxs_gsv16_yxsv4:                       return "GS_OI_YXS_GSV16_YXSV4";
+        case WeightsLayout::gs_oi_yxs_gsv32_yxsv4:                       return "GS_OI_YXS_GSV32_YXSV4";
          case WeightsLayout::g_os_is_yx_isv16_osv16:                      return "G_OS_IS_YX_ISV16_OSV16";
+        case WeightsLayout::g_os_is_yx_osv16_isv4:                       return "G_OS_IS_YX_OSV16_ISV4";
          default: throw std::invalid_argument("Failed to convert WeightsLayout " + std::to_string(layout) + " to string");
      }
  }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp

index 7c2b1f1..92f2601 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
@@ -386,6 +386,7 @@ void ParamsKey::EnableLookUpTableIndicesFormat(Datatype a) {
  }
  
  void ParamsKey::EnableFusedConvEltwiseRWOutOpt() { key.restrict.val.dedicated.fused_conv_eltw.rw_out_opt = 1; }
+void ParamsKey::EnableFusedConvEltwDepthToSpaceFusing() { key.restrict.val.dedicated.fused_conv_eltw.depth_to_space_fused = 1; }
  
  
  void ParamsKey::EnableQuantization(QuantizationType q) {
@@ -466,6 +467,10 @@ ParamsKey Params::GetParamsKey() const {
          k.EnableSubGroupShort();
      }
  
+    if (engineInfo.bSubGroupCharSupport) {
+        k.EnableSubGroupChar();
+    }
+
      return k;
  }
  
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h

index 6121b50..9ac8306 100644 (file)
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
@@ -217,6 +217,7 @@ public:
                          uint32_t stride : 1;
                          // fused conv eltw
                          uint32_t rw_out_opt : 1;
+                        uint32_t depth_to_space_fused : 1;
                      } fused_conv_eltw;
                      struct quantize_t {
                          uint32_t packed_binary_output : 1;
@@ -231,6 +232,7 @@ public:
              struct val_t {
                  uint32_t subgroup : 1;
                  uint32_t subgroupShort : 1;
+                uint32_t subgroupChar : 1;
              } val;
              uint32_t raw;
          } machineInfo;
@@ -293,6 +295,7 @@ public:
      void EnableGradient() { key.restrict.val.gradient = 1; }
      void EnableSubGroup() { key.machineInfo.val.subgroup = 1; }
      void EnableSubGroupShort() { key.machineInfo.val.subgroupShort = 1; }
+    void EnableSubGroupChar() { key.machineInfo.val.subgroupChar = 1; }
      void EnableNonBiasTerm() { key.restrict.val.nonBias = 1; }
      void EnableBiasPerFeature() { key.restrict.val.biasPerFeatureMap = 1; }
      void EnableBiasPerOutput() { key.restrict.val.biasPerOutput = 1; }
@@ -330,6 +333,7 @@ public:
      void EnableFusedConvEltwInt8Quantization() { key.restrict.val.dedicated.fused_conv_eltw.quantization = 1; }
      void EnableFusedConvEltwOutputCalibration() { key.restrict.val.dedicated.fused_conv_eltw.calibration = 1; }
      void EnableFusedConvEltwEltwiseStride();
+    void EnableFusedConvEltwDepthToSpaceFusing();
  
      void EnableQuantizePackedBinaryOutput() { key.restrict.val.dedicated.quantize.packed_binary_output = 1; }
      void EnableQuantizeScaleShiftOpt() { key.restrict.val.dedicated.quantize.scale_shift_opt = 1; }
@@ -375,6 +379,7 @@ private:
  struct EngineInfo {
      bool bSubGroupSupport = false;
      bool bSubGroupShortSupport = false;
+    bool bSubGroupCharSupport = false;
      bool bFP16Support = false;
      bool bFP64Support = false;
      bool bImageSupport = false;
@@ -468,6 +473,9 @@ struct FusedOpsConfiguration {
      IndexType index_type;
      // Defines outer loops channels where fused op is called.
      std::vector<Tensor::DataChannelName> loop_axes;
+    // If allow_for_partial_preload is false, then it's required that all fused_ops can be preloaded.
+    // If allow_for_partial_preload is true, then not preloaded fused_ops will be loaded in FUSED_OPS_CALC.
+    bool allow_for_partial_preload;
  
      FusedOpsConfiguration(std::string suffix,
                            std::vector<std::string> bfzyx_idx_order,
@@ -478,7 +486,8 @@ struct FusedOpsConfiguration {
                            BoundaryCheck boundary_check = BoundaryCheck::ENABLED,
                            IndexType index_type = IndexType::TENSOR_COORD,
                            Tensor::DataChannelName vec_axis = Tensor::DataChannelName::COUNT,
-                          std::vector<Tensor::DataChannelName> loop_axes = {})
+                          std::vector<Tensor::DataChannelName> loop_axes = {},
+                          bool allow_for_partial_preload = false)
        : suffix(suffix)
        , bfzyx_idx_order(bfzyx_idx_order)
        , input_var_name(input_var_name)
@@ -488,14 +497,18 @@ struct FusedOpsConfiguration {
        , load_type(load_type)
        , boundary_check(boundary_check)
        , index_type(index_type)
-      , loop_axes(loop_axes) { }
+      , loop_axes(loop_axes)
+      , allow_for_partial_preload(allow_for_partial_preload) { }
  
      FusedOpsConfiguration& SetVectorSize(size_t val) { vec_size = val; return *this; }
      FusedOpsConfiguration& SetLoadType(LoadType val) { load_type = val; return *this; }
      FusedOpsConfiguration& SetBoundaryCheck(BoundaryCheck val) { boundary_check = val; return *this; }
      FusedOpsConfiguration& SetIndexType(IndexType val) { index_type = val; return *this; }
      FusedOpsConfiguration& SetVectorAxis(Tensor::DataChannelName val) { vec_axis = val; return *this; }
-    FusedOpsConfiguration& SetLoopAxes(std::vector<Tensor::DataChannelName> val) { loop_axes = std::move(val); return *this; }
+    FusedOpsConfiguration& SetLoopAxes(std::vector<Tensor::DataChannelName> val, bool partial_preload = false) {
+        loop_axes = std::move(val);
+        allow_for_partial_preload = partial_preload;
+        return *this; }
  };
  
  // Instance of fused_operation_desc is added to fused_ops vector if a node has been fused to current one using program_impl::fuse_nodes
diff --git a/inference-engine/thirdparty/clDNN/src/error_handler.cpp b/inference-engine/thirdparty/clDNN/src/error_handler.cpp

index d2f8624..f6baa68 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/error_handler.cpp
+++ b/inference-engine/thirdparty/clDNN/src/error_handler.cpp
@@ -21,8 +21,12 @@
  
  namespace cldnn {
  
-void err_details::cldnn_print_error_message(const std::string& file,
-                                            int line,
+void err_details::cldnn_print_error_message(
+#ifndef NDEBUG
+                                            const std::string& file, int line,
+#else
+                                            const std::string&, int,
+#endif
                                              const std::string& instance_id,
                                              std::stringstream& msg,
                                              const std::string& add_msg) {
@@ -31,9 +35,6 @@ void err_details::cldnn_print_error_message(const std::string& file,
  
  #ifndef NDEBUG
          source_of_error << file << " at line: " << line << std::endl;
-#else
-        (void)file;
-        (void)line;
  #endif
          source_of_error << "Error has occured for: " << instance_id << std::endl;
  
diff --git a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp

index 9d0d10f..a000e9a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
@@ -301,12 +301,15 @@ fused_conv_eltwise_inst::typed_primitive_inst(network_impl& network, fused_conv_
                                    "expected size of batch",
                                    1,
                                    "Biases isn't 1D vector.");
-            CLDNN_ERROR_NOT_EQUAL(node.id(),
-                                  "Bias feature[0]",
-                                  bias_inst.size.feature[0],
-                                  "expected feature map number",
-                                  output_size.feature[0] / split,
-                                  "Bias/fm mismatch");
+
+            if (node.get_output_layout().format != format::image_2d_rgba) {
+                CLDNN_ERROR_NOT_EQUAL(node.id(),
+                                      "Bias feature[0]",
+                                      bias_inst.size.feature[0],
+                                      "expected feature map number",
+                                      output_size.feature[0] / split,
+                                      "Bias/fm mismatch");
+            }
              CLDNN_ERROR_NOT_EQUAL(node.id(),
                                    "Bias spatial[1]",
                                    bias_inst.size.spatial[1],
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp

index 8e010f6..dedf1b3 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
@@ -147,6 +147,8 @@ attach_concatenation_gpu::attach_concatenation_gpu() {
          // block f16 format
          {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create},
          {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create},
          // MMAD
          {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), concatenation_gpu::create},
          {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf_af32), concatenation_gpu::create},
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp

index 400b88a..0ea4398 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
@@ -196,12 +196,17 @@ attach_convolution_gpu::attach_convolution_gpu() {
      // block f16 format
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
+    // block i8 format
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
      // MMAD
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf_af32), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf_af32), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf_af32), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byx8_f4), val_fw);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
+    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
  
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
      implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp

index 895f826..8a71082 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
@@ -246,6 +246,8 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
           // block f16
           { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create },
           { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create },
+         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create },
+         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create },
           // 3D
           { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create },
           { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create },
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp

index c008e62..89d592c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
@@ -108,6 +108,8 @@ attach_fully_connected_gpu::attach_fully_connected_gpu() {
          {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
          {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
          {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
+        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
          // fs_b_yx_fsv32
          {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
      });
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp

index 94530d3..66bb526 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
@@ -83,7 +83,8 @@ public:
  
          const auto transposed = arg.get_transposed();
  
-        assert(arg.get_output_layout().size.feature[0] == weights_layout.size.batch[0] * weights_layout.size.group[0]);
+        if (arg.get_fused_primitives().empty() || !(arg.get_fused_primitives().begin()->node->is_type<depth_to_space>()))
+            assert(arg.get_output_layout().size.feature[0] == weights_layout.size.batch[0] * weights_layout.size.group[0]);
  
          // conv params
          auto fused_params =
@@ -113,6 +114,7 @@ public:
  
          fused_params.non_conv_scale = primitive->non_conv_scale;
          fused_params.second_input_in_output = primitive->second_input_in_output;
+        fused_params.depth_to_space_already_fused = primitive->depth_to_space_already_fused;
  
          conv_params.local_convolution = weights_size.local[0] > 1 || weights_size.local[1] > 1;
          conv_params.split = split;
@@ -237,6 +239,8 @@ attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
                                                  fused_conv_eltwise_gpu::create);
      implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf_af32),
                                                  fused_conv_eltwise_gpu::create);
+    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::image_2d_rgba),
+        fused_conv_eltwise_gpu::create);
  }
  
  }  // namespace detail
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp

index ec152db..a09538a 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
@@ -104,6 +104,15 @@ gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const la
              _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1];
              order = CL_RGBA;
              break;
+        case format::image_2d_rgba:
+            _width = layout.size.spatial[0];
+            _height = layout.size.spatial[1];
+            order = CL_RGBA;
+            if (layout.size.feature[0] != 3 && layout.size.feature[0] != 4) {
+                CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in image_2d_rgba input image (should be 3 or 4)!");
+            }
+            type = CL_UNORM_INT8;
+            break;
          case format::nv12:
              _width = layout.size.spatial[1];
              _height = layout.size.spatial[0];
@@ -189,7 +198,7 @@ gpu_media_buffer::gpu_media_buffer(const refcounted_obj_ptr<engine_impl>& engine
      const shared_mem_params* params,
      uint32_t net_id)
      : gpu_image2d(engine, new_layout,
-        cl::ImageVA(engine->get_context()->context(), CL_MEM_READ_ONLY,
+        cl::ImageVA(engine->get_context()->context(), CL_MEM_READ_WRITE,
                      params->surface, params->plane),
          net_id),
      device(params->user_device),
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp

index c6baa9f..4661045 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
@@ -92,6 +92,14 @@ attach_mvn_gpu::attach_mvn_gpu() {
                                   mvn_gpu::create);
      implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
                                   mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
+                                 mvn_gpu::create);
+    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
+                                 mvn_gpu::create);
  }
  
  }  // namespace detail
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp

index 0ca759a..d2e0802 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
@@ -178,9 +178,10 @@ attach_pooling_gpu::attach_pooling_gpu() {
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), pooling_gpu::create);
-    // block fsv16 format
+    // block fp16 format
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), pooling_gpu::create);
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), pooling_gpu::create);
+    // block i8 format
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), pooling_gpu::create);
      implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), pooling_gpu::create);
      // 3D
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp

index ca5bf44..ae38bdd 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
@@ -89,6 +89,8 @@ attach_quantize_gpu::attach_quantize_gpu() {
      implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
      implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
      implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
  
      implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf_af32), val_fw);
      implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf_af32), val_fw);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp

index a7f61cf..ff39eba 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
@@ -100,6 +100,9 @@ attach_scale_gpu::attach_scale_gpu() {
  
      implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
      implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
+
      implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
      implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
      implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp

index 6cc6040..aca26f1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
@@ -58,7 +58,8 @@ public:
          params.shrink_axis_mask = arg.get_primitive()->shrink_axis_mask;
          pad_vector_to_size(params.shrink_axis_mask, dims_num, 0);
  
-        std::vector<size_t> logical_dims = params.output.LogicalDims();
+        std::vector<size_t> logical_dims = params.inputs[0].LogicalDims();
+        std::reverse(logical_dims.begin(), logical_dims.end());  // get dims in bfyx order
          std::vector<int32_t> out_shape;
          for (const auto& dim : logical_dims)
              out_shape.push_back(static_cast<int32_t>(dim));
@@ -68,6 +69,16 @@ public:
          // instead.
          vector_assign_if_not_mask(params.striding_params[1], out_shape, params.end_mask);
  
+        for (size_t dim = 0; dim < params.striding_params[2].size(); dim++) {
+            auto begin = params.striding_params[0][dim] < 0 ? out_shape[dim] + params.striding_params[0][dim] : params.striding_params[0][dim];
+            auto end = params.striding_params[1][dim] < 0 ? out_shape[dim] + params.striding_params[1][dim] : params.striding_params[1][dim];
+            auto stride = params.striding_params[2][dim];
+            if (stride < 0 && (end > begin)) {
+                std::swap(params.striding_params[0][dim], params.striding_params[1][dim]);
+                params.striding_params[0][dim] = params.striding_params[0][dim] - 1;
+            }
+        }
+
          auto& kernel_selector = kernel_selector::strided_slice_kernel_selector::Instance();
          auto best_kernels = kernel_selector.GetBestKernels(params, op_params);
  
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp

index 2a6c099..55e6b92 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
@@ -25,11 +25,14 @@
  #include "lstm_inst.h"
  #include "reshape_inst.h"
  #include "resample_inst.h"
+#include "permute_inst.h"
+#include "depth_to_space_inst.h"
  #include "lstm_dynamic_inst.h"
  #include "lstm_dynamic_input_inst.h"
  #include "lstm_dynamic_timeloop_inst.h"
  #include "mutable_data_inst.h"
  #include "arg_max_min_inst.h"
+#include "kernel_selector_utils.h"
  
  #include <iomanip>
  #include <string>
@@ -114,6 +117,65 @@ void graph_initializations::replace_nodes(program_impl& p) {
              p.nodes_map.erase(node->id());
              continue;
          }
+
+        // find sequence reshape->permute->reshape and exchange with depth to space
+        if (node->is_type<reshape>()) {
+            if (!p.get_options().get<build_option_type::optimize_data>()->enabled())
+                continue;
+
+            if (node->get_users().size() == 0)
+                continue;
+
+            auto& input_node = node->get_dependency(0);
+            if (!(node->get_users().front()->is_type<permute>()) || !(input_node.is_type<reorder>()))
+                continue;
+
+            auto input_node_layout = input_node.get_output_layout();
+            if (input_node_layout.format != format::bfwzyx || input_node_layout.data_type != data_types::f16)
+                continue;
+
+            // optimal implementation only for depth to space block size 2
+            auto reshape1_layout = node->get_output_layout();
+            if (reshape1_layout.size.spatial[3] != 2)
+                continue;
+
+            auto permute_prim = node->get_users().front()->as<permute>().typed_desc();
+            primitive_id permute_id = node->get_users().front()->id();
+            auto& permute_node = node->get_users().front();
+
+            auto reshape1_prim = node->as<reshape>().typed_desc();
+            primitive_id reshape1_id = node->id();
+
+            p.remove_connection(*node, *permute_node);
+
+            auto perm_node_ptr = p.nodes_map.find(permute_id)->second;
+            auto perm_node = &perm_node_ptr->as<permute>();
+
+            auto rename_id = permute_id + "_tmp";
+            p.rename(*perm_node, rename_id);
+
+            auto reorder_id = input_node.id() + "_reorder_for_depth_to_space";
+            auto reorder_prim = std::make_shared<reorder>(reorder_id, input_node.id(), format::bfyx, input_node_layout.data_type);
+            auto pixel_shuffle_prim = std::make_shared<depth_to_space>(permute_id, reorder_id, 2);
+
+            p.get_or_create(reorder_prim);
+            p.get_or_create(pixel_shuffle_prim);
+            auto reorder_depth_node_ptr = p.nodes_map.find(reorder_id)->second;
+            auto pixel_shuffle_node_ptr = p.nodes_map.find(permute_id)->second;
+            p.add_connection(input_node, *reorder_depth_node_ptr);
+            p.add_connection(*reorder_depth_node_ptr, *pixel_shuffle_node_ptr);
+
+            auto deconv_node_ptr = p.nodes_map.find(rename_id)->second;
+            p.replace_all_usages(*deconv_node_ptr, *pixel_shuffle_node_ptr);
+            p.optimized_out.push_back(rename_id);
+            p.nodes_map.erase(rename_id);
+
+            p.remove_connection(input_node, *node);
+            p.replace_all_usages(*node, input_node);
+            p.optimized_out.push_back(reshape1_id);
+            p.nodes_map.erase(reshape1_id);
+            continue;
+        }
      }
  }
  
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp

index 1e76538..59ca913 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
@@ -21,6 +21,8 @@
  
  #include "convolution_inst.h"
  #include "deconvolution_inst.h"
+#include "depth_to_space_inst.h"
+#include "kernel_selector_utils.h"
  #include <vector>
  #include <list>
  #include <memory>
@@ -40,132 +42,291 @@ void pre_replace_deconv::run(program_impl& p) {
                  continue;
  
              auto deconv_prim = node->as<deconvolution>().typed_desc();
-
-            // limit optimization to stride = 1
-            if (deconv_prim->stride.spatial[0] != 1 || deconv_prim->stride.spatial[1] != 1 || deconv_prim->gradient())
-                continue;
-
-            primitive_id deconv_id = node->id();
-            auto& input_node = node->get_dependency(0);
-
-            // disable for 5D
-            if (cldnn::format::dimension(input_node.get_output_layout().format) == 5)
-                continue;
-
-            // Disable for blocked formats
-            if ((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
-                _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16)) {
-                continue;
-            }
-
-
-            primitive_id input_id = deconv_prim->input[0];
-
-            // setting convolution parameters based on deconvolution params
-            auto stride = deconv_prim->stride;
+            tensor filter_size = { 1, 1, 1, 1, 1 };
              auto weights = deconv_prim->weights;
+
              std::vector<primitive_id> weights_vec;
-            for (auto& weights_id : weights) weights_vec.push_back(weights_id);
-            auto biases = deconv_prim->bias;
-            std::vector<primitive_id> bias_vec;
-            for (auto& bias_id : biases) bias_vec.push_back(bias_id);
-            auto input_offset = deconv_prim->input_offset;
-            auto output_padding = deconv_prim->output_padding;
-
-            // remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
-            // list
-            tensor filter_size = {1, 1, 1, 1, 1};
-            p.remove_connection(node->get_dependency(0), *node);
+            for (auto& weights_id : weights)
+                weights_vec.push_back(weights_id);
+
              for (auto& weights_id : weights_vec) {
                  auto weights_iter = p.nodes_map.find(weights_id);
                  if (weights_iter == p.nodes_map.end())  continue;
  
                  auto weights_node_ptr = weights_iter->second;
-                p.remove_connection(*weights_node_ptr, *node);
-                // get filter spatial sizes for input offset adjustment, perform this only once as all filters shouls
+                // get filter spatial sizes for input offset adjustment, perform this only once as all filters should
                  // have same size
                  if (weights_id == weights_vec[0])
                      filter_size = weights_node_ptr->get_output_layout().size;
              }
  
-            input_offset.spatial[0] = std::abs(input_offset.spatial[0]) - (filter_size.spatial[0] - 1);
-            input_offset.spatial[1] = std::abs(input_offset.spatial[1]) - (filter_size.spatial[1] - 1);
-            input_offset.spatial[2] = std::abs(input_offset.spatial[2]) - (filter_size.spatial[2] - 1);
+            // limit optimization to stride = 1
+            if (deconv_prim->stride.spatial[0] == 1 && deconv_prim->stride.spatial[1] == 1 && !deconv_prim->gradient()) {
+                primitive_id deconv_id = node->id();
+                auto& input_node = node->get_dependency(0);
  
-            if (!bias_vec.empty()) {
-                for (auto& bias_id : bias_vec) {
-                    auto bias_iter = p.nodes_map.find(bias_id);
-                    if (bias_iter == p.nodes_map.end())  continue;
+                // disable for 5D
+                if (cldnn::format::dimension(input_node.get_output_layout().format) == 5)
+                    continue;
  
-                    auto bias_id_node_ptr = bias_iter->second;
-                    p.remove_connection(*bias_id_node_ptr, *node);
+                // Disable for blocked formats
+                if ((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
+                    _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16)) {
+                    continue;
                  }
-            }
-            auto rename_id = deconv_id + "_tmp";
-            p.rename(*node, rename_id);
-
-            // create convolution primitive
-            if (biases.size() != 0) {
-                auto conv_prim = std::make_shared<convolution>(deconv_id,
-                                                               input_id,
-                                                               weights_vec,
-                                                               bias_vec,
-                                                               stride,
-                                                               input_offset,
-                                                               tensor{1, 1, 1, 1},
-                                                               output_padding);
-                p.get_or_create(conv_prim);
-            } else {
-                auto conv_prim = std::make_shared<convolution>(deconv_id,
-                                                               input_id,
-                                                               weights_vec,
-                                                               stride,
-                                                               input_offset,
-                                                               tensor{1, 1, 1, 1},
-                                                               output_padding);
-                p.get_or_create(conv_prim);
-            }
  
-            auto conv_node_itr = p.nodes_map.find(deconv_id);
-            if (conv_node_itr == p.nodes_map.end()) continue;
+                primitive_id input_id = deconv_prim->input[0];
  
-            auto conv_node_ptr = conv_node_itr->second;
-            auto conv_node = &conv_node_ptr->as<convolution>();
-            conv_node->set_transposed(true);
+                // setting convolution parameters based on deconvolution params
+                auto stride = deconv_prim->stride;
+                auto biases = deconv_prim->bias;
+                std::vector<primitive_id> bias_vec;
+                for (auto& bias_id : biases) bias_vec.push_back(bias_id);
+                auto input_offset = deconv_prim->input_offset;
+                auto output_padding = deconv_prim->output_padding;
  
-            // add connections input->convolution, weights->convolution and bias->convolution
-            p.add_connection(input_node, *conv_node_ptr);
+                // remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
+                // list
+                p.remove_connection(node->get_dependency(0), *node);
+                for (auto& weights_id : weights_vec) {
+                    auto weights_iter = p.nodes_map.find(weights_id);
+                    if (weights_iter == p.nodes_map.end())  continue;
  
-            for (auto& weights_id : weights_vec) {
-                auto weights_node_itr = p.nodes_map.find(weights_id);
-                if (weights_node_itr == p.nodes_map.end()) continue;
+                    auto weights_node_ptr = weights_iter->second;
+                    p.remove_connection(*weights_node_ptr, *node);
+                }
  
-                auto weights_node_ptr = weights_node_itr->second;
-                p.add_connection(*weights_node_ptr, *conv_node_ptr);
-            }
+                input_offset.spatial[0] = std::abs(input_offset.spatial[0]) - (filter_size.spatial[0] - 1);
+                input_offset.spatial[1] = std::abs(input_offset.spatial[1]) - (filter_size.spatial[1] - 1);
+                input_offset.spatial[2] = std::abs(input_offset.spatial[2]) - (filter_size.spatial[2] - 1);
  
-            if (!bias_vec.empty()) {
-                for (auto& bias_id : bias_vec) {
-                    auto bias_id_node_itr = p.nodes_map.find(bias_id);
-                    if (bias_id_node_itr == p.nodes_map.end()) continue;
+                if (!bias_vec.empty()) {
+                    for (auto& bias_id : bias_vec) {
+                        auto bias_iter = p.nodes_map.find(bias_id);
+                        if (bias_iter == p.nodes_map.end())  continue;
  
-                    auto bias_id_node_ptr = bias_id_node_itr->second;
-                    p.add_connection(*bias_id_node_ptr, *conv_node_ptr);
+                        auto bias_id_node_ptr = bias_iter->second;
+                        p.remove_connection(*bias_id_node_ptr, *node);
+                    }
                  }
-            }
+                auto rename_id = deconv_id + "_tmp";
+                p.rename(*node, rename_id);
  
-            auto deconv_node_itr = p.nodes_map.find(rename_id);
-            if (deconv_node_itr != p.nodes_map.end()) {
-                auto deconv_node_ptr = deconv_node_itr->second;
-                p.replace_all_usages(*deconv_node_ptr, *conv_node_ptr);
-                p.optimized_out.push_back(rename_id);
-                p.nodes_map.erase(rename_id);
-            }
+                // create convolution primitive
+                if (biases.size() != 0) {
+                    auto conv_prim = std::make_shared<convolution>(deconv_id,
+                        input_id,
+                        weights_vec,
+                        bias_vec,
+                        stride,
+                        input_offset,
+                        tensor{ 1, 1, 1, 1 },
+                        output_padding);
+                    p.get_or_create(conv_prim);
+                } else {
+                    auto conv_prim = std::make_shared<convolution>(deconv_id,
+                        input_id,
+                        weights_vec,
+                        stride,
+                        input_offset,
+                        tensor{ 1, 1, 1, 1 },
+                        output_padding);
+                    p.get_or_create(conv_prim);
+                }
+
+                auto conv_node_itr = p.nodes_map.find(deconv_id);
+                if (conv_node_itr == p.nodes_map.end()) continue;
+
+                auto conv_node_ptr = conv_node_itr->second;
+                auto conv_node = &conv_node_ptr->as<convolution>();
+                conv_node->set_transposed(true);
+
+                // add connections input->convolution, weights->convolution and bias->convolution
+                p.add_connection(input_node, *conv_node_ptr);
+
+                for (auto& weights_id : weights_vec) {
+                    auto weights_node_itr = p.nodes_map.find(weights_id);
+                    if (weights_node_itr == p.nodes_map.end()) continue;
+
+                    auto weights_node_ptr = weights_node_itr->second;
+                    p.add_connection(*weights_node_ptr, *conv_node_ptr);
+                }
+
+                if (!bias_vec.empty()) {
+                    for (auto& bias_id : bias_vec) {
+                        auto bias_id_node_itr = p.nodes_map.find(bias_id);
+                        if (bias_id_node_itr == p.nodes_map.end()) continue;
+
+                        auto bias_id_node_ptr = bias_id_node_itr->second;
+                        p.add_connection(*bias_id_node_ptr, *conv_node_ptr);
+                    }
+                }
+
+                auto deconv_node_itr = p.nodes_map.find(rename_id);
+                if (deconv_node_itr != p.nodes_map.end()) {
+                    auto deconv_node_ptr = deconv_node_itr->second;
+                    p.replace_all_usages(*deconv_node_ptr, *conv_node_ptr);
+                    p.optimized_out.push_back(rename_id);
+                    p.nodes_map.erase(rename_id);
+                }
  
-            p.mark_if_data_flow(*conv_node);
-            conv_node->recalc_output_layout(true);
+                update_processing_order = true;
  
-            update_processing_order = true;
+
+                p.mark_if_data_flow(*conv_node);
+                conv_node->recalc_output_layout(true);
+
+                update_processing_order = true;
+            // current optimization only available for specific deconvolution parameters
+            } else if (node->is_output() == false &&
+               node->get_output_layout().size.feature[0] == 1 &&
+               deconv_prim->stride.spatial[0] == 2 && deconv_prim->stride.spatial[1] == 2 &&
+               filter_size.spatial[0] == 9 && filter_size.spatial[1] == 9 &&
+               deconv_prim->input_offset.spatial[0] == -4 && deconv_prim->input_offset.spatial[1] == -4 &&
+               weights_vec.size() == 1 && deconv_prim->bias.size() == 1 &&
+               node->get_dependency(0).get_output_layout().format == format::bfyx &&
+               !deconv_prim->gradient()) {
+                primitive_id deconv_id = node->id();
+                auto& input_node = node->get_dependency(0);
+                primitive_id input_id = deconv_prim->input[0];
+
+                auto scale_factor = deconv_prim->stride.spatial[0];
+
+                auto cur_weights_node_ptr = p.nodes_map.find(weights_vec[0])->second;
+                auto weights_layout = cur_weights_node_ptr->get_output_layout();
+                auto weights_data_type = weights_layout.data_type;
+
+                auto biases = deconv_prim->bias[0];
+                auto bias_id_node_ptr = p.nodes_map.find(biases)->second;
+                auto bias_data_type = bias_id_node_ptr->get_output_layout().data_type;
+
+                // enable only for fp32 and fp16
+                if (weights_data_type != data_types::f16 &&
+                    weights_data_type != data_types::f32 &&
+                    bias_data_type != data_types::f16 &&
+                    bias_data_type != data_types::f32)
+                    continue;
+
+                // setting convolution parameters based on deconvolution params
+                tensor stride = { 1, 1, 1, 1 };
+                tensor input_offset = { 0, 0, -scale_factor, -scale_factor };
+                auto output_padding = deconv_prim->output_padding;
+
+                // remove deconvolution node and its connections to weights and biases,
+                // rename it and move to the optimized list
+                p.remove_connection(node->get_dependency(0), *node);
+
+                auto weights_node_ptr = p.nodes_map.find(weights_vec[0])->second;
+                p.remove_connection(*weights_node_ptr, *node);
+                p.remove_connection(*bias_id_node_ptr, *node);
+
+                auto rename_id = deconv_id + "_tmp";
+                p.rename(*node, rename_id);
+
+                // reshape weights
+                int pixel_shuffle_size = scale_factor * scale_factor;
+                int kernel_size = 5;
+                tensor target_weights_size = { pixel_shuffle_size, filter_size.feature[0], kernel_size, kernel_size };
+                auto target_weights_layout = layout{ weights_layout.data_type, weights_layout.format, target_weights_size };
+
+                {
+                     memory_impl::ptr data_to_allocate = p.get_engine().allocate_memory(target_weights_layout, 0);
+
+                     std::vector<float> weights_vec_float;
+
+                     if (weights_data_type == data_types::f16) {
+                         mem_lock<half_t> src{ cur_weights_node_ptr->as<data>().get_attached_memory() };
+                         for (uint32_t i = 0; i < weights_layout.size.count(); i++)
+                             weights_vec_float.push_back(static_cast<float>(src.data()[i]));
+                     } else {
+                         mem_lock<float> src{ cur_weights_node_ptr->as<data>().get_attached_memory() };
+                         for (uint32_t i = 0; i < weights_layout.size.count(); i++)
+                             weights_vec_float.push_back(src.data()[i]);
+                     }
+
+                     std::vector<std::vector<std::vector<float> > > subpixel_weights(pixel_shuffle_size);
+
+                     program_helpers::reshape_deconvolution_weights(weights_vec_float,
+                         static_cast<int>(filter_size.feature[0]),
+                         static_cast<int>(filter_size.spatial[0]),
+                         static_cast<int>(filter_size.spatial[1]),
+                         scale_factor,
+                         subpixel_weights);
+
+                     if (weights_data_type == data_types::f16) {
+                         mem_lock<half_t> dst{ data_to_allocate };
+                         program_helpers::set_weights_values<half_t>(dst.data(), subpixel_weights);
+                     } else if (weights_data_type == data_types::f32) {
+                         mem_lock<float> dst{ data_to_allocate };
+                         program_helpers::set_weights_values<float>(dst.data(), subpixel_weights);
+                     } else {
+                         throw std::logic_error("Not supported data type.");
+                     }
+
+                     memory api_memory = memory(data_to_allocate.detach());
+                     auto data_node_weights_replace = std::make_shared<data>(weights_vec[0] + "_conv_rpl", api_memory);
+                     p.get_or_create(data_node_weights_replace);
+                     auto data_node_weights_replace_node_ptr = p.nodes_map.find(weights_vec[0] + "_conv_rpl")->second;
+                     auto& data_node = data_node_weights_replace_node_ptr->as<data>();
+                     data_node.set_output_layout(target_weights_layout, false);
+                }
+                float bias = 0;
+
+                if (bias_data_type == data_types::f16) {
+                    mem_lock<half_t> src{ bias_id_node_ptr->as<data>().get_attached_memory() };
+                    bias = static_cast<float>(src.data()[0]);
+                } else {
+                    mem_lock<float> src{ bias_id_node_ptr->as<data>().get_attached_memory() };
+                    bias = src.data()[0];
+                }
+
+                auto deconv_id_conv = deconv_id + "_conv";
+
+                // create convolution primitive
+                auto conv_prim = std::make_shared<convolution>(deconv_id_conv,
+                    input_id,
+                    std::vector<primitive_id>{ weights_vec[0] + "_conv_rpl" },
+                    stride,
+                    input_offset,
+                    tensor{ 1, 1, 1, 1 },
+                    output_padding);
+                p.get_or_create(conv_prim);
+
+                auto conv_node_itr = p.nodes_map.find(deconv_id_conv);
+                if (conv_node_itr == p.nodes_map.end()) continue;
+
+                auto conv_node_ptr = conv_node_itr->second;
+                auto conv_node = &conv_node_ptr->as<convolution>();
+
+                // add connections input->convolution, weights->convolution and bias->convolution
+                p.add_connection(input_node, *conv_node_ptr);
+
+                {
+                    auto weights_node_conv_rpl_ptr = p.nodes_map.find(weights_vec[0] + "_conv_rpl")->second;
+                    p.add_connection(*weights_node_conv_rpl_ptr, *conv_node_ptr);
+                    p.inputs.push_back(weights_node_conv_rpl_ptr.get());
+                }
+
+                auto pixel_shuffle_prim = std::make_shared<depth_to_space>(deconv_id, deconv_id_conv, 2);
+
+                p.get_or_create(pixel_shuffle_prim);
+                auto pixel_shuffle_node_ptr = p.nodes_map.find(deconv_id)->second;
+                pixel_shuffle_node_ptr->add_fused_activation(activation_func::linear, { 1, bias });
+
+                // add connections input->convolution, weights->convolution
+                p.add_connection(*conv_node_ptr, *pixel_shuffle_node_ptr);
+
+                auto deconv_node_ptr = p.nodes_map.find(rename_id);
+                if (deconv_node_ptr != p.nodes_map.end()) {
+                    p.replace_all_usages(*deconv_node_ptr->second, *pixel_shuffle_node_ptr);
+                    p.optimized_out.push_back(rename_id);
+                    p.nodes_map.erase(rename_id);
+                }
+                p.mark_if_data_flow(*conv_node);
+                conv_node->recalc_output_layout(true);
+
+                update_processing_order = true;
+            }
          }
      }
  
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp

index 397d65c..6425e7e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
@@ -1,5 +1,5 @@
  /*
-// Copyright (c) 2018-2019 Intel Corporation
+// Copyright (c) 2018-2020 Intel Corporation
  //
  // Licensed under the Apache License, Version 2.0 (the "License");
  // you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
  
  #include "api/eltwise.hpp"
  #include "api/pooling.hpp"
+#include "fused_conv_eltwise_inst.h"
  #include "primitive_inst.h"
  #include "activation_inst.h"
  #include "concatenation_inst.h"
@@ -25,6 +26,7 @@
  #include "eltwise_inst.h"
  #include "reshape_inst.h"
  #include "scale_inst.h"
+#include "depth_to_space_inst.h"
  
  #include "pass_manager.h"
  #include "program_helpers.h"
@@ -161,6 +163,35 @@ void prepare_buffer_fusing::run(program_impl& p) {
                      lower_padd_in_axis += input->get_output_layout().size.raw[concat_axis];
                  }
  
+                // check if it is worth doing concat in place, in case the following primitive is convolution
+                // with different input padding than concatenation's input users' convolutions,
+                // it is likely that convolution's implementation will be a reference one, due to mismatched padding
+                // and performance gain by doing in place concat is nullified by slower convolution implementation
+                // this should be handled by more advanced tuning mechanism on the topology level
+                auto& users = node.get_users();
+                if (users.size() == 1) {
+                    auto& user = users.front();
+                    if (node.get_output_layout().format == format::bfyx && user->type() == convolution::type_id()) {
+                        auto out_input_offsets = user->as<convolution>().get_primitive()->input_offset;
+
+                        std::vector<tensor> in_input_offsets;
+                        for (auto& in_user : nodes_list.first) {
+                            if (in_user->type() == convolution::type_id())
+                                in_input_offsets.push_back(in_user->as<convolution>().get_primitive()->input_offset);
+                        }
+
+                        for (auto& in_input_offset : in_input_offsets) {
+                            if (in_input_offset.spatial[0] != out_input_offsets.spatial[0] &&
+                                in_input_offset.spatial[1] != out_input_offsets.spatial[1])
+                                return;
+                        }
+                    } else if (user->type() == fused_conv_eltwise::type_id()) {
+                        if (!user->as<fused_conv_eltwise>().get_fused_primitives().empty() &&
+                            user->as<fused_conv_eltwise>().get_fused_primitives().begin()->node->is_type<depth_to_space>())
+                            return;
+                    }
+                }
+
                  // apply concatenation in place optimization
                  for (auto input : nodes_list.first) {
                      auto input_lenght = input->get_output_layout().size.raw[concat_axis];
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp

index b7351db..5bd635e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -515,12 +515,21 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
              int p1_pnum = p.get_processing_order().get_processing_number(parents[fused_idx]);
              int p2_pnum = p.get_processing_order().get_processing_number(parents[peer_idx]);
  
-            if (p1_pnum < p2_pnum && can_fuse_parents[peer_idx]) {
+            auto p1_dt = parents[fused_idx]->get_output_layout().data_type;
+            auto p2_dt = parents[peer_idx]->get_output_layout().data_type;
+
+            if (can_fuse_parents[peer_idx] &&
+               ((p1_pnum < p2_pnum && p1_dt == p2_dt) || (data_type_traits::is_floating_point(p2_dt) && !data_type_traits::is_floating_point(p1_dt)))) {
+                // Swap in 2 cases:
+                // 1. Both branches have same data type. Select branch with lower processing number
+                // 2. Peer node has fp32 output type, but fused node - int8. In that case we have to fuse to the branch
+                // with fp32 out type to avoid fp32 blobs in the quantized graph.
                  std::swap(fused_idx, peer_idx);
              }
  
              auto fused_node = parents[fused_idx];
              auto peer_node = parents[peer_idx];
+
              if (parent1->is_type<convolution>() && !conv_supports_fusings(parent1->as<convolution>()))
                  return;
  
@@ -558,6 +567,33 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
          p.get_processing_order().calc_processing_order(p);
  }
  
+void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program_node* node) {
+    // make sure this convolution have only 1 user and it's depth_to_space
+    // make sure convolution is not an output
+    if (node->get_users().size() != 1 || node->is_output())
+        return;
+
+    if (!node->get_users().front()->is_type<depth_to_space>())
+        return;
+
+    convolution_node* conv_node = static_cast<convolution_node*>(node);
+
+    depth_to_space_node* d_t_s_node = static_cast<depth_to_space_node*>(node->users.front());
+    if (d_t_s_node->get_users().empty())
+        return;
+    if (!d_t_s_node->get_users().front()->is_type<eltwise>())
+        return;
+
+    for (auto& dep : d_t_s_node->get_dependencies()) {
+        format fmt = dep->get_output_layout().format;
+        data_types dep_dt = dep->get_output_layout().data_type;
+        if ((fmt != format::bfyx || dep_dt != data_types::f16))
+            return;
+    }
+
+    p.fuse_nodes(*conv_node, *d_t_s_node);
+}
+
  void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node* node) {
      // make sure this convolution have only 1 user and it's eltwise
      // make sure convolution is not an output
@@ -570,6 +606,10 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
      convolution_node* conv_node = static_cast<convolution_node*>(node);
      convolution& conv = const_cast<convolution&>(*conv_node->get_primitive());
  
+    bool if_already_depth_to_space_fused = false;
+    if (!conv_node->get_fused_primitives().empty())
+        if_already_depth_to_space_fused = conv_node->get_fused_primitives().begin()->node->is_type<depth_to_space>();
+
      // TODO: find a better way to check for available kernels
      // currently works only for these formats
      data_types data_type = conv_node->get_output_layout().data_type;
@@ -583,7 +623,8 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
              (fmt != format::byxf_af32 || dep_dt != data_types::i8) &&
              (fmt != format::byxf_af32 || dep_dt != data_types::u8) &&
              (fmt != format::bfyx || dep_dt != data_types::f32) && (fmt != format::bfyx || dep_dt != data_types::u8) &&
-            (fmt != format::bfyx || dep_dt != data_types::i8) && (fmt != format::yxfb || dep_dt != data_types::f16))
+            (fmt != format::bfyx || dep_dt != data_types::i8) && (fmt != format::yxfb || dep_dt != data_types::f16) &&
+            (fmt != format::bfyx || dep_dt != data_types::f16 || !if_already_depth_to_space_fused))
              return;
      }
  
@@ -597,7 +638,7 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
          if (filter_size.spatial[0] == 1 && filter_size.spatial[1] == 1) {
              if (conv.stride.spatial[0] != 1 || conv.stride.spatial[1] != 1)
                  return;
-        } else {
+        } else if (!if_already_depth_to_space_fused) {
              return;
          }
      }
@@ -614,7 +655,7 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
  
      // make sure eltwise have only 2 inputs
      // make sure eltwise is not an output
-    if (eltw_node->inputs_count() != 2 || eltw_node->is_output())
+    if (!if_already_depth_to_space_fused && (eltw_node->inputs_count() != 2 || eltw_node->is_output()))
          return;
  
      // only single ADD operation is currently supported
@@ -638,6 +679,13 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
      if (eltw_node->input(eltw_fused_input_idx).id() != conv.id)
          return;
  
+    auto fused_output_layout_size = eltw_node->input(eltw_second_input_idx).get_output_layout().size;
+    auto conv_output_layout_size = conv_node->get_output_layout().size;
+
+    if (fused_output_layout_size.spatial[0] * fused_output_layout_size.spatial[1] * fused_output_layout_size.feature[0] * fused_output_layout_size.batch[0]
+        != conv_output_layout_size.spatial[0] * conv_output_layout_size.spatial[1] * conv_output_layout_size.feature[0] * conv_output_layout_size.batch[0])
+        return;
+
      // get strides for other than our conv input
      std::vector<tensor> new_eltw_strides;
      // conv strides modified by eltwise stride
@@ -694,6 +742,8 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
      // Copy output data type from eltwise
      fused_conv_eltw->output_data_type = eltw_node->get_output_layout().data_type;
  
+    fused_conv_eltw->depth_to_space_already_fused = if_already_depth_to_space_fused;
+
      auto& new_node = p.get_or_create(fused_conv_eltw);
  
      for (size_t i = 0; i < eltw_node->get_fused_activations_funcs().size(); i++)
@@ -733,9 +783,30 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
  
      new_node.dependencies = updated_deps;
  
+    if (if_already_depth_to_space_fused) {
+        new_node.add_fused_primitives(conv_node->get_fused_primitives());
+    }
+
      // Extract convolution node - will replace its usage in fused with input
      p.extract_and_remove(*conv_node);
-    new_node.recalc_output_layout();
+
+    // To change convolution's output to image type, make sure that it is the last primitive in the topology,
+    // or only reorder is afterwards and it is network's output
+    auto reorder_user = (new_node.get_users().size() == 1);
+    if (reorder_user)
+        reorder_user &= ((new_node.get_users().front()->is_type<reorder>()) && (new_node.get_users().front()->is_output()));
+    if (if_already_depth_to_space_fused && (new_node.get_users().size() == 0 || reorder_user)) {
+        cldnn::layout new_layout = { data_types::u8, format::image_2d_rgba, fused_output_layout_size };
+        new_node.set_output_layout(new_layout);
+        // Remove output reorder if present
+        if (reorder_user) {
+            auto& reorder_node = new_node.get_users().front();
+            reorder_node->remove_dependency(1);
+            p.extract_and_remove(*reorder_node);
+        }
+    } else {
+        new_node.recalc_output_layout();
+    }
  
      p.add_optimized_primitive_info(conv_id, {new_node.id()});
      p.add_optimized_primitive_info(eltw_id, {new_node.id()});
@@ -763,6 +834,8 @@ void prepare_conv_eltw_fusing::run(program_impl& p) {
  
          auto& node = (*node_itr);
  
+        fuse_conv_depth_to_space(p, node);
+
          fuse_conv_eltwise(p, node);
      }
  }
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp

index 4c62624..bb81b6d 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@@ -434,9 +434,6 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) {
          auto node_itr = itr++;
          auto& node = (*node_itr);
  
-        if (node->is_output())
-            continue;
-
          // Detects if given eltwise node performs zero point subtraction
          auto is_zero_point_node = [](eltwise_node& node) -> bool {
              auto prim = node.get_primitive();
@@ -660,7 +657,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) {
              // Remove sub operations from the graph and set correct users for zero points and inputs
              if (asymmetric_data) {
                  if (!new_a_zp || !new_input)
-                    CLDNN_ERROR_MESSAGE(convolution_node.id(), "Unexpected nullptr in asymmetric quantization for activations optimization");
+                    CLDNN_ERROR_MESSAGE(new_conv_node.id(), "Unexpected nullptr in asymmetric quantization for activations optimization");
  
                  auto& zp_users = new_a_zp->users;
                  auto& in_users = new_input->users;
@@ -682,7 +679,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) {
  
              if (asymmetric_weights) {
                  if (!new_w_zp || !new_weights)
-                    CLDNN_ERROR_MESSAGE(convolution_node.id(), "Unexpected nullptr in asymmetric quantization for weights optimization");
+                    CLDNN_ERROR_MESSAGE(new_conv_node.id(), "Unexpected nullptr in asymmetric quantization for weights optimization");
  
                  auto& zp_users = new_w_zp->users;
                  auto& wei_users = new_weights->users;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp

index b3ef892..969885c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
@@ -25,8 +25,10 @@
  
  using namespace cldnn;
  
-remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations)
-    : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations) {}
+remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations,
+    bool remove_output_reorders)
+    : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations),
+    remove_output_reorders(remove_output_reorders) {}
  
  void remove_redundant_reorders::run(program_impl& p) {
      auto update_implementation = [&](program_node& node) {
@@ -159,9 +161,13 @@ void remove_redundant_reorders::run(program_impl& p) {
  
          auto& r_node = node->as<reorder>();
  
+        bool no_output_optimization = remove_output_reorders ?
+            r_node.is_output() && (r_node.get_dependency(0).is_output() || r_node.get_dependency(0).is_type<input_layout>() ||
+                r_node.get_dependency(0).can_be_optimized()) : r_node.is_output();
+
          if (r_node.has_mean() ||
              !r_node.get_primitive()->subtract_per_feature.empty() ||
-            r_node.is_output() ||
+            no_output_optimization ||
              !r_node.get_fused_activations_funcs().empty())
              continue;
  
@@ -170,7 +176,7 @@ void remove_redundant_reorders::run(program_impl& p) {
  
          // Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer,
          // but pads need to be handled correctly.
-        if (i_layout.format == format::b_fs_yx_fsv16 && o_layout.format == format::bfyx &&
+        if (i_layout.format == format::b_fs_yx_fsv16 && o_layout.format == format::bfyx && !r_node.is_output() &&
              i_layout.size.spatial[0] == 1 && i_layout.size.spatial[1] == 1 &&
              o_layout.data_padding.upper_size() == (tensor)0 && o_layout.data_padding.lower_size() == (tensor)0) {
              r_node.can_be_optimized(true);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp

index 55eca03..1ad6612 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@@ -22,6 +22,7 @@
  #include "layout_optimizer.h"
  #include "program_impl.h"
  #include "program_helpers.h"
+#include "mvn_inst.h"
  #include <vector>
  #include <memory>
  #include <list>
@@ -344,7 +345,7 @@ void insert_reorders(program_impl& p, const std::map<program_node*, format::type
              continue;
  
          auto fmt = fmt_map.at(node);
-        if (fmt == format::any)
+        if (fmt == format::any || format::is_image(fmt))
              continue;
  
          insert_reorders_in_dir<direction_e::forwards>(p, fmt_map, rf, node);
@@ -358,7 +359,7 @@ void insert_reorders(program_impl& p, const std::map<program_node*, format::type
              continue;
  
          auto fmt = fmt_map.at(node);
-        if (fmt == format::any)
+        if (fmt == format::any || format::is_image(fmt))
              continue;
  
          insert_reorders_in_dir<direction_e::backwards>(p, fmt_map, rf, node);
@@ -371,6 +372,70 @@ void reorder_inputs::run(program_impl& p, layout_optimizer& lo, reorder_factory&
      auto fmt_map = get_preferred_formats(p, lo);
      propagate_formats(p, fmt_map, lo);
      minimize_local_reorders(p, fmt_map, lo);
+
+    // WA START ============================================================================================================
+    if (lo.get_optimization_attributes().b_fs_yx_fsv16_network) {
+        // This is a temprorary work-around for known bad case until byxf_af32 handling will be corrected in layout_optimizer.
+        //
+        // Find pattern:
+        //    mvn(int8, b_fs_yx_fsv16, [x,16,1280,720]) -> conv(int8, byxf_af32, [x,3,1280,720]) -> mvn(*, bfyx) ->
+        // Replace with:
+        //    mvn(b_fs_yx_fsv16) -> conv(b_fs_yx_fsv16) -> mvn(b_fs_yx_fsv16) ->
+        //
+        // Generally for such convolution b_fs_yx_fsv16 will always perform better than byxf_af32,
+        // but to avoid unvalidated int8 b_fs_yx_fsv16 networks and potential regressions this WA is needed.
+        // Additionally reorder from af32 -> bfyx will take ~9 times longer than actual convolution.
+        for (auto& node_ptr : p.get_processing_order()) {
+            if (!node_ptr->is_in_data_flow() || !node_ptr->is_type<convolution>() || fmt_map.at(node_ptr) != format::byxf_af32)
+                continue;
+
+            auto& conv_node = node_ptr->as<convolution>();
+
+            bool input_path =
+                conv_node.input().get_output_layout().data_type == data_types::i8 &&
+                conv_node.input().is_type<mvn>() &&
+                fmt_map.at(&conv_node.input()) == format::b_fs_yx_fsv16;
+            bool output_path =
+                conv_node.get_users().size() == 1 &&
+                conv_node.get_users().front()->is_type<mvn>() &&
+                fmt_map.at(conv_node.get_users().front()) == format::bfyx &&
+                conv_node.get_users().front()->get_users().size() == 1 &&
+                !conv_node.get_users().front()->as<mvn>().get_primitive()->across_channels;
+
+            if (!input_path || !output_path)
+                continue;
+
+            auto in_lay = conv_node.input().get_output_layout();
+            auto out_lay = conv_node.get_output_layout();
+            auto wei_lay = conv_node.weights().get_output_layout();
+            bool correct_layouts =
+                // weights
+                wei_lay.data_type == data_types::i8 &&
+                wei_lay.size.spatial[0] == 3 && wei_lay.size.spatial[1] == 3 &&
+                // input/output
+                in_lay.data_type == data_types::i8 && out_lay.data_type == data_types::i8 &&
+                in_lay.size.feature[0] == 16 && out_lay.size.feature[0] == 3 &&
+                in_lay.size.spatial[0] == 1280 && out_lay.size.spatial[0] == 1280 &&
+                in_lay.size.spatial[1] == 720 && out_lay.size.spatial[1] == 720;
+
+            if (!correct_layouts)
+                continue;
+
+            bool correct_conv =
+                conv_node.get_groups() == 1 && conv_node.get_split() == 1 && conv_node.get_deformable_groups() == 1 &&
+                !conv_node.get_depthwise_sep_opt() && !conv_node.get_transposed() &&
+                !conv_node.activations_zero_points_term() && !conv_node.weights_zero_points_term() && !conv_node.compensation_term() &&
+                conv_node.get_primitive()->dilation == tensor(1);
+
+            if (!correct_conv)
+                continue;
+
+            fmt_map.at(node_ptr) = format::b_fs_yx_fsv16;
+            fmt_map.at(conv_node.get_users().front()) = format::b_fs_yx_fsv16;
+        }
+    }
+    // WA END ==============================================================================================================
+
      insert_reorders(p, fmt_map, rf);
  
      for (auto n : p.get_processing_order()) {
diff --git a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h

index 835222c..67788d9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
@@ -18,7 +18,9 @@
  #pragma once
  #include "api/depth_to_space.hpp"
  #include "primitive_inst.h"
+#include "kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.h"
  #include <string>
+#include <memory>
  
  namespace cldnn {
  template <>
@@ -29,6 +31,9 @@ public:
      using parent::parent;
  
      program_node& input(size_t index = 0) const { return get_dependency(index); }
+    std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
+        return std::make_shared<kernel_selector::depth_to_space_fuse_params>();
+    }
  };
  
  using depth_to_space_node = typed_program_node<depth_to_space>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h

index 7c5ddd7..34f8c96 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
@@ -176,6 +176,7 @@ public:
  private:
      void run(program_impl& p) override;
      void fuse_conv_eltwise(program_impl& p, program_node* node);
+    void fuse_conv_depth_to_space(program_impl& p, program_node* node);
      layout_optimizer& _lo;
      bool b_fs_yx_fsv16_opt;
  };
@@ -289,13 +290,15 @@ private:
  
  class remove_redundant_reorders : public base_pass {
  public:
-    explicit remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing = false, bool update_implementations = false);
+    explicit remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing = false, bool update_implementations = false,
+        bool remove_output_reorders = false);
      void run(program_impl& p) override;
  
  private:
      layout_optimizer& lo;
      bool enable_reorder_fusing;
      bool update_implementations;
+    bool remove_output_reorders;
  };
  
  class reorder_inputs : public base_pass {
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h

index 9277513..7ec2622 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
@@ -112,7 +112,27 @@ struct program_helpers {
                                const layout& target_layout,
                                size_t begin_offset,
                                size_t end_offset);
-    static layout get_weights_layout(typed_program_node<cldnn::data>& data_node, int32_t split);
+
      static std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2);
+
+    // helper functions for deconvolution optimizations
+    static void reshape_deconvolution_weights(const std::vector<float> &deconv_weights,
+                                              const int channels,
+                                              const int kernel_width,
+                                              const int kernel_height,
+                                              const int scale_factor,
+                                              std::vector<std::vector<std::vector<float> > >& subpixel_weights);
+    template <typename T>
+    static void set_weights_values(T* mem, std::vector<std::vector<std::vector<float> > > args) {
+        for (uint32_t x = 0; x < static_cast<uint32_t>(args.size()); ++x) {
+            for (uint32_t y = 0; y < static_cast<uint32_t>(args[x].size()); ++y) {
+                for (uint32_t z = 0; z < static_cast<uint32_t>(args[x][y].size()); ++z) {
+                    *mem = static_cast<T>(args[x][y][z]);
+                    mem++;
+                }
+            }
+        }
+    }
+    static layout get_weights_layout(typed_program_node<cldnn::data>& data_node, int32_t split);
  };
  }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h

index 5849c66..4fe44eb 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
+++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
@@ -111,6 +111,8 @@ inline std::string fmt_to_str(format fmt) {
              return "b_fs_zyx_fsv16";
          case format::bs_fs_zyx_bsv16_fsv16:
              return "bs_fs_zyx_bsv16_fsv16";
+        case format::image_2d_rgba:
+            return "image_2d_rgba";
  
          case format::oiyx:
              return "oiyx";
@@ -134,6 +136,8 @@ inline std::string fmt_to_str(format fmt) {
              return "image_2d_weights_winograd_6x3_s1_xfbyb";
          case format::os_iyx_osv16:
              return "os_iyx_osv16";
+        case format::os_is_yx_osv16_isv16:
+            return "os_is_yx_osv16_isv16";
          case format::os_iyx_osv32:
              return "os_iyx_osv32";
          case format::os_iyx_osv64:
@@ -197,6 +201,8 @@ inline std::string fmt_to_str(format fmt) {
              return "g_os_is_yx_isv8_osv16_isv2";
          case format::g_os_is_zyx_isv16_osv16:
              return "g_os_is_zyx_isv16_osv16";
+        case format::g_os_is_yx_osv16_isv4:
+            return "g_os_is_yx_osv16_isv4";
          default:
              return "unknown (" + std::to_string(fmt.value) + ")";
      }
diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp

index fed6a81..0d9d3b9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -155,6 +155,8 @@ kernel_selector::data_layout to_data_layout(format f) {
              return kernel_selector::data_layout::bs_fs_yx_bsv16_fsv16;
          case format::nv12:
              return kernel_selector::data_layout::nv12;
+        case format::image_2d_rgba:
+            return kernel_selector::data_layout::image_2d_rgba;
          default:
              throw std::invalid_argument("Format f (" +  std::to_string((int32_t)f.value) + ") is not a proper data layout");
      }
@@ -206,6 +208,8 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) {
              return cldnn::format::b_fs_yx_fsv4;
          case kernel_selector::data_layout::nv12:
              return cldnn::format::nv12;
+        case kernel_selector::data_layout::image_2d_rgba:
+            return cldnn::format::image_2d_rgba;
          default:
              throw std::invalid_argument("Unable to convert data layout " + std::to_string(l) + " to tensor format");
      }
@@ -225,6 +229,8 @@ kernel_selector::weights_layout to_weights_layout(format f) {
              return kernel_selector::weights_layout::yxio;
          case format::os_iyx_osv16:
              return kernel_selector::weights_layout::os_iyx_osv16;
+        case format::os_is_yx_osv16_isv16:
+            return kernel_selector::weights_layout::os_is_yx_osv16_isv16;
          case format::os_iyx_osv32:
              return kernel_selector::weights_layout::os_iyx_osv32;
          case format::os_iyx_osv64:
@@ -314,6 +320,8 @@ kernel_selector::weights_layout to_weights_layout(format f) {
              return kernel_selector::weights_layout::g_os_is_yx_isv8_osv16_isv2;
          case format::g_os_is_zyx_isv16_osv16:
              return kernel_selector::weights_layout::g_os_is_zyx_isv16_osv16;
+        case format::g_os_is_yx_osv16_isv4:
+            return kernel_selector::weights_layout::g_os_is_yx_osv16_isv4;
          default:
              throw std::invalid_argument("Unable to convert tensor layout " + fmt_to_str(f) + " to weights layout");
      }
@@ -335,6 +343,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
              return cldnn::format::yxfb;
          case kernel_selector::weights_layout::os_iyx_osv16:
              return cldnn::format::os_iyx_osv16;
+        case kernel_selector::weights_layout::os_is_yx_osv16_isv16:
+            return cldnn::format::os_is_yx_osv16_isv16;
          case kernel_selector::weights_layout::os_iyx_osv32:
              return cldnn::format::os_iyx_osv32;
          case kernel_selector::weights_layout::os_iyx_osv64:
@@ -417,6 +427,8 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l) {
              return cldnn::format::g_os_is_yx_isv8_osv16_isv2;
          case kernel_selector::weights_layout::g_os_is_zyx_isv16_osv16:
              return cldnn::format::g_os_is_zyx_isv16_osv16;
+        case kernel_selector::weights_layout::os_is_yx_osv16_isv4:
+            return cldnn::format::g_os_is_yx_osv16_isv4;
          default:
              return cldnn::format::bfyx;
      }
@@ -455,7 +467,6 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
      kernel_selector::n_dims vec(kernel_selector::DataTensor::ChannelsCount(ks_layout));
  
      size_t pitch = 1;
-
      auto new_vals = vals;
  
      if (ks_layout == kernel_selector::Tensor::byxf_af32) {
@@ -633,6 +644,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
  
      params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
      params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
+    params.engineInfo.bSubGroupCharSupport = context->extension_supported("cl_intel_subgroups_char");
      params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
      params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
      params.engineInfo.bIMADSupport = device_info.supports_imad != 0;
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp

index 21c11a7..bbf7825 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -28,6 +28,8 @@
  #include "eltwise_inst.h"
  #include "pooling_inst.h"
  #include "permute_inst.h"
+#include "quantize_inst.h"
+#include "mvn_inst.h"
  #include <vector>
  #include <memory>
  #include <utility>
@@ -121,6 +123,11 @@ bool layout_optimizer::is_format_supported(program_node& node, format::type fmt)
      if (node.is_type<fully_connected>() && fmt == format::byxf)
          return false;
  
+    if (node.is_type<mvn>() && fmt == format::b_fs_yx_fsv16 &&
+        node.get_dependency(0).get_output_layout().data_type != data_types::i8 &&
+        node.get_dependency(0).get_output_layout().data_type != data_types::u8)
+        return false;
+
      if (node.is_type<input_layout>())
          return node.get_output_layout().format == fmt;
  
@@ -190,7 +197,10 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
          fmt_prev == format::bfyx &&
          ((fmt_next == format::fs_b_yx_fsv32 && next.as<convolution>().get_primitive()->groups == 1) ||
          (fmt_next == format::b_fs_yx_fsv32 && prev_output_layout.size.feature[0] == 3) ||
-        (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && prev_output_layout.size.feature[0] == 3) ||
+        (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 &&
+        prev_output_layout.size.feature[0] == 3 &&
+        (next_output_layout.data_type != data_types::i8 && next_output_layout.data_type != data_types::u8)) ||
+         (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && prev_output_layout.size.feature[0] == 3) ||
          (fmt_next == format::bs_fs_yx_bsv16_fsv16 && next_output_layout.size.feature[0] % 16 == 0 && prev_output_layout.size.feature[0] == 3)))
          return true;
  
@@ -219,7 +229,7 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node
          return true;
  
      if (prev.is_type<quantize>() &&
-        (fmt_next == format::b_fs_yx_fsv4 || fmt_next == format::b_fs_yx_fsv32 || fmt_next == format::b_fs_zyx_fsv32))
+        (fmt_next == format::b_fs_yx_fsv4 || fmt_next == format::b_fs_yx_fsv32 || fmt_next == format::b_fs_zyx_fsv32 || fmt_next == format::b_fs_yx_fsv16))
          return true;
  
      return false;
@@ -332,7 +342,24 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(layout const &input_layout,
                                                       const layout &weights_layout,
                                                       std::shared_ptr<const convolution> conv,
                                                       bool weak_restrictions) {
-    // A set of rules that define when b_fs_yx_fsv16 mem format can be used
+    // A set of rules that define when b_fs_yx_fsv16 mem format can be used for int8 case
+    bool i8_dt_case = (input_layout.data_type == data_types::u8 || input_layout.data_type == data_types::i8) &&
+        weights_layout.data_type == data_types::i8 &&
+        (conv->activations_zero_points.empty() && conv->weights_zero_points.empty());  // only symmetric
+    if (i8_dt_case) {
+        auto ks_x = weights_layout.size.spatial[0];
+        auto ks_y = weights_layout.size.spatial[1];
+        if (input_layout.size.spatial[2] == 1 &&
+            input_layout.size.batch[0] < 16 &&
+            ((ks_x == 7 && ks_y == 7) || (ks_x == 3 && ks_y == 3) || (ks_x == 1 && ks_y == 1) || (ks_x == 5 && ks_y == 5)) &&
+            weights_layout.size.batch[0] >= 16 &&
+            ((conv->groups == 1 && conv->split() == 1) ||
+             conv->groups == static_cast<uint32_t>(input_layout.size.feature[0]) ||
+             conv->split() == static_cast<int32_t>(input_layout.size.feature[0])) &&
+            conv->dilation == tensor{ 1 })
+            return true;
+    }
+    // A set of rules that define when b_fs_yx_fsv16 mem format can be used for fp16/fp32 case
      auto feature_block_size = 16;
      auto correct_data_type = input_layout.data_type == data_types::f16 || input_layout.data_type == data_types::f32;
      correct_data_type &= weights_layout.data_type == input_layout.data_type;
@@ -530,7 +557,7 @@ format layout_optimizer::imad_case(convolution_node const& node) const {
      if (dims_count == 5 && is_grouped) {
          return format::bfzyx;
      } else if (dims_count == 4 && is_grouped && !is_dw) {
-        return format::bfyx;
+        return format::b_fs_yx_fsv4;
      }
  
      bool asymmetric_quantization = node.activations_zero_points_term() || node.weights_zero_points_term();
@@ -567,7 +594,12 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
      const float cond_denom = _total_conv > 0 ? 1.0f / static_cast<float>(_total_conv) : 1.0f;
  
      if ((input_layout.data_type == data_types::u8 || input_layout.data_type == data_types::i8)) {
-        expected_format = imad_case(node);
+        if ((_optimization_attributes.b_fs_yx_fsv16_network &&
+            convolution_b_fs_yx_fsv16_opt(input_layout, output_or_weights_layout, prim))) {
+            expected_format = cldnn::format::b_fs_yx_fsv16;
+        } else {
+            expected_format = imad_case(node);
+        }
          expected_tensor = current_layout.size;
      } else if (_optimization_attributes.b_fs_zyx_fsv16_network &&
              convolution_b_fs_zyx_fsv16_opt(input_layout,
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp

index 1596e09..4d2c049 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -38,6 +38,7 @@
  #include "reshape_inst.h"
  #include "activation_inst.h"
  #include "scale_inst.h"
+#include "depth_to_space_inst.h"
  #include "convolution_inst.h"
  #include "concatenation_inst.h"
  #include "crop_inst.h"
@@ -55,6 +56,7 @@
  #include "proposal_inst.h"
  #include "reorder_inst.h"
  #include "split_inst.h"
+#include "mvn_inst.h"
  #include "to_string_utils.h"
  #include "gpu/memory_gpu.h"
  
@@ -466,6 +468,9 @@ void program_impl::post_optimize_graph(bool is_internal) {
          // ToDo remove hidden dependencies from propagate_constants pass
          apply_opt_pass<propagate_constants>();
      }
+
+    if (options.get<build_option_type::optimize_data>()->enabled())
+        apply_opt_pass<remove_redundant_reorders>(lo, false, true, true);  // pass to remove output reorders while all others graph optimizations were done
  }
  
  // mark if the node is constant assuming that all dependencies are marked properly
@@ -1116,7 +1121,12 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
              prim.type() != cldnn::prior_box::type_id() &&
              prim.type() != cldnn::resample::type_id() &&
              prim.type() != cldnn::crop::type_id() &&
-            prim.type() != cldnn::scale::type_id())
+            prim.type() != cldnn::scale::type_id() &&
+            prim.type() != cldnn::depth_to_space::type_id() &&
+            (prim.type() != cldnn::mvn::type_id()
+             || (prim.as<mvn>().input().get_output_layout().data_type != data_types::u8 &&
+                 prim.as<mvn>().input().get_output_layout().data_type != data_types::i8)
+             || prim.as<mvn>().get_primitive()->across_channels))
              can_use_fsv16 = false;
  
          // WA to keep bfyx_f16 layout disabled for some topologies where it leads to regressions.
diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp

index 1e54d7f..2ff72f9 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
@@ -21,6 +21,7 @@
  #include "data_inst.h"
  #include <algorithm>
  #include <utility>
+#include <vector>
  
  namespace cldnn {
  // helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
@@ -44,6 +45,67 @@ void program_helpers::merge_buffers(engine_impl& engine,
      data_node.attach_memory(*data_to_allocate, false);
  }
  
+void program_helpers::reshape_deconvolution_weights(const std::vector<float> &deconv_weights,
+    const int channels,
+    const int kernel_width,
+    const int kernel_height,
+    const int scale_factor,
+    std::vector<std::vector<std::vector<float> > >& subpixel_weights) {
+
+    std::vector<std::vector<float> > weights(channels);
+
+    int pad_zero_x = kernel_width % 2 == 0 ? 0 : 1;
+    int pad_zero_y = kernel_height % 2 == 0 ? 0 : 1;
+
+    // reshape 9x9 deconv weights, for example 32 9x9 deconv weights to 32 10x10 conv weights
+    for (int f = 0; f < channels; ++f) {
+        for (int kernel_y = 0; kernel_y < kernel_height; ++kernel_y) {
+            for (int kernel_x = 0; kernel_x < kernel_width; ++kernel_x) {
+                int index = f * kernel_width * kernel_height + kernel_y * kernel_width + kernel_x;
+                weights[f].push_back(deconv_weights[index]);
+            }
+            if (pad_zero_x == 1) {    // pad with zero on x axis
+                weights[f].push_back(0.f);
+            }
+        }
+        if (pad_zero_y == 1) {    // pad a line on y axis with zero
+            for (int kernel_x = 0; kernel_x < kernel_width + pad_zero_x; ++kernel_x) {
+                weights[f].push_back(0.f);
+            }
+        }
+    }
+
+    // reshape 32 10x10 weights to 4 32 5x5 weights
+    for (int s = 0; s < scale_factor*scale_factor; ++s) {
+        subpixel_weights[s].resize(channels);
+    }
+
+    const int kernel_sz = kernel_width + pad_zero_x;
+
+    auto get_row_index = [](int index, const int kernel_sz)->int {
+        bool isRowEven = (index / (kernel_sz)) % 2 == 0 ? true : false;
+        bool isColEven = (index % 2) == 0 ? true : false;
+        int kernel_num = isRowEven ? (isColEven ? 0 : 1) : isColEven ? 2 : 3;
+        return kernel_num;
+    };
+
+    int feature_num = static_cast<int>(weights.size());
+    for (int f = 0; f < feature_num; ++f) {
+        for (int i = 0; i < static_cast<int>(weights[f].size()); ++i) {
+            int row = get_row_index(i, kernel_sz);
+            subpixel_weights[row][f].push_back(weights[f][i]);
+        }
+    }
+
+    // dump the weights for the shuffled kernel
+    int subpixel_conv_num = static_cast<int>(subpixel_weights.size());
+    for (int s = 0; s < subpixel_conv_num; ++s) {
+        for (int row = 0; row < static_cast<int>(subpixel_weights[s].size()); ++row) {
+            std::reverse(std::begin(subpixel_weights[s][row]), std::end(subpixel_weights[s][row]));
+        }
+    }
+}
+
  // helper function for getting target layout used in depthwise sep optimization
  layout program_helpers::get_weights_layout(typed_program_node<cldnn::data>& data_node, int32_t split) {
      auto mem_layout = data_node.get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/reorder.cpp b/inference-engine/thirdparty/clDNN/src/reorder.cpp

index 846e83e..0f74654 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reorder.cpp
@@ -48,12 +48,14 @@ layout reorder_inst::calc_output_layout(reorder_node const& node) {
          if (ofmt != ifmt)
              return layout(odt, ofmt, data_size, op);
  
-        CLDNN_ERROR_MESSAGE(node.id(), "Reordering between winograd weights and data formats is unsupported");
+        CLDNN_ERROR_MESSAGE(node.id(), "No image_nv12 to image_nv12 reorder is supported");
      } else if (ofmt.is_winograd() && ifmt.is_winograd()) {
          if (ofmt == ifmt)
              return layout(odt, ofmt, input_layout.size, op);
  
          CLDNN_ERROR_MESSAGE(node.id(), "Reordering between winograd weights and data formats is unsupported");
+    } else if (ifmt == format::image_2d_rgba) {
+        return layout(data_types::f16, format::bfyx, input_layout.size, op);
      }
  
      // transformation of data from standard to winograd
diff --git a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp

index 8cd209e..fa0fc02 100644 (file)
--- a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
+++ b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
@@ -77,8 +77,11 @@ layout strided_slice_inst::calc_output_layout(strided_slice_node const& node) {
      std::vector<int32_t> output_shape;
      if (std::find(desc->new_axis_mask.begin(), desc->new_axis_mask.end(), 1) == desc->new_axis_mask.end()) {
          for (size_t i = 0; i < dims_num; ++i) {
-            int32_t outputDimSize = (end[i] - begin[i]) / strides[i];
-            if ((end[i] - begin[i]) % strides[i] != 0)
+            int32_t b = begin[i] < 0 ? input_layout.size.sizes(input_format)[i] - 1 : begin[i];
+            int32_t e = end[i] < 0 ? input_layout.size.sizes(input_format)[i] - 1 : end[i];
+            int32_t s = strides[i];
+            int32_t outputDimSize = std::abs((e - b) / s);
+            if ((e - b) % s != 0)
                  outputDimSize++;
              output_shape.push_back(outputDimSize);
          }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp

index ca6eee0..9678e61 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp
@@ -108,6 +108,80 @@ TEST(concat_gpu, mixed_input_types) {
      }
  }
  
+TEST(concat_gpu, mixed_input_types_5d) {
+    const auto& engine = get_test_engine();
+
+    auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } });
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } });
+    auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } });
+
+    set_values(input0, { half_t(1.0f), half_t(2.0f), half_t(3.0f),
+                         half_t(4.0f), half_t(2.0f), half_t(2.0f),
+                         half_t(3.0f), half_t(4.0f), half_t(3.0f),
+                         half_t(3.0f), half_t(3.0f), half_t(5.0f) });
+    set_values(input1, { half_t(11), half_t(12), half_t(13),
+                         half_t(14), half_t(12), half_t(12),
+                         half_t(13), half_t(14), half_t(13),
+                         half_t(13), half_t(13), half_t(15) });
+    set_values(input2, { half_t(21), half_t(22), half_t(23),
+                         half_t(24), half_t(22), half_t(22),
+                         half_t(23), half_t(24), half_t(23),
+                         half_t(23), half_t(23), half_t(25) });
+    set_values(input3, { half_t(31.f), half_t(32.f), half_t(33.f),
+                         half_t(34.f), half_t(32.f), half_t(32.f),
+                         half_t(33.f), half_t(34.f), half_t(33.f),
+                         half_t(33.f), half_t(33.f), half_t(35.f) });
+
+    VF<float> output_vec = {
+            1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f,
+            11.0f, 12.0f, 13.0f, 14.0f, 12.0f, 12.0f, 13.0f, 14.0f, 13.0f, 13.0f, 13.0f, 15.0f,
+            21.0f, 22.0f, 23.0f, 24.0f, 22.0f, 22.0f, 23.0f, 24.0f, 23.0f, 23.0f, 23.0f, 25.0f,
+            31.0f, 32.0f, 33.0f, 34.0f, 32.0f, 32.0f, 33.0f, 34.0f, 33.0f, 33.0f, 33.0f, 35.0f };
+
+    topology topology(
+            input_layout("input0", input0.get_layout()),
+            input_layout("input1", input1.get_layout()),
+            input_layout("input2", input2.get_layout()),
+            input_layout("input3", input3.get_layout()),
+            concatenation("concat",
+                          { "input0", "input1", "input2", "input3" },
+                          concatenation::concatenation_axis::along_f,
+                          data_types::f32,
+                          padding{ { 0,0,0,0 }, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input0", input0);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "concat");
+
+    auto output_memory = outputs.at("concat").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int z_size = output_layout.size.spatial[2];
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::bfzyx);
+    EXPECT_EQ(z_size, 3);
+    EXPECT_EQ(y_size, 4);
+    EXPECT_EQ(x_size, 1);
+    EXPECT_EQ(f_size, 4);
+    EXPECT_EQ(b_size, 1);
+
+    for (size_t x = 0; x < output_layout.count(); ++x) {
+        EXPECT_EQ(output_vec[x], output_ptr[x]);
+    }
+}
+
  using TestParamType_concat = ::testing::tuple<size_t,   // 0 - Input Batch size
          std::vector<size_t>,                            // 1 - Inputs Features Sizes
          size_t,                                         // 2 - Input Y Size
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp

index 13a1e67..ecd134e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -80,7 +80,7 @@ struct convolution_accumulator<uint8_t> {
  template<typename InputT, typename OutputT = InputT, typename WeightsT = InputT,  typename AccT = typename convolution_accumulator<InputT>::type>
  VVF<OutputT> reference_convolve(VVVF<InputT> &input, VVVF<WeightsT> &filter, int stride_y, int stride_x, float bias, int dilation_y = 1, int dilation_x = 1,
          int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0,
-        int output_padding_x = 0, size_t f_begin = 0, size_t f_end = 0, bool depthwise = false,
+        int output_padding_x = 0, size_t f_begin = 0, size_t f_end = 0, bool depthwise = false, bool grouped = false,
          const VF<InputT>& data_zp = {}, const WeightsT& weights_zp = 0)
  {
      size_t kernel_extent_y = dilation_y * (filter[0].size() - 1) + 1;
@@ -100,27 +100,25 @@ VVF<OutputT> reference_convolve(VVVF<InputT> &input, VVVF<WeightsT> &filter, int
                  for (size_t yf = 0; yf < filter[0].size(); ++yf) {
                      int yi = -input_padding_y + (int)yf * dilation_y + stride_y * (int)y;
                      bool yi_inside = yi >= 0 && (int)input[0].size() > yi;
-                    if (!yi_inside && !asymm_data) continue;
+                    if (!yi_inside) continue;
                      for (size_t xf = 0; xf < filter[0][0].size(); ++xf) {
                          int xi = -input_padding_x + (int)xf * dilation_x + stride_x * (int)x;
                          bool xi_inside = xi >= 0 && (int)input[0][0].size() > xi;
-                        if (!xi_inside && !asymm_data) continue;
+                        if (!xi_inside) continue;
  
-                        AccT input_val;
-                        if (xi_inside && yi_inside) {
-                            input_val = static_cast<AccT>(input[f][yi][xi]);
-                        } else {
-                            input_val = static_cast<AccT>(0);
-                        }
+                        auto input_val = static_cast<AccT>(input[f][yi][xi]);
  
                          if (asymm_data) {
                              input_val = input_val - static_cast<AccT>(data_zp[f]);
                          }
  
                          AccT weights_val;
-                        if (!depthwise) {
+                        if (!depthwise && !grouped) {
                              weights_val = static_cast<AccT>(filter[f][yf][xf]);
-                        } else {
+                        } else if (grouped) {
+                            weights_val = static_cast<AccT>(filter[f - filter_begin][yf][xf]);
+                        }
+                        else {
                              weights_val = static_cast<AccT>(filter[0][yf][xf]);
                          }
  
@@ -5116,6 +5114,16 @@ using TestParamType_convolution_depthwise_gpu = ::testing::tuple<int,   // 0 - I
          int,   // 5 - Output padding
          bool>; // 6 - With bias
  
+using TestParamType_grouped_convolution_gpu = ::testing::tuple<  int,    // 0 - Input X size
+        int,   // 1 - Input Y size
+        int,   // 2 - Input features
+        int,   // 3 - Output features
+        int,   // 4 - Kernel sizeX
+        int,   // 5 - Kernel sizeY
+        int,   // 6 - Groups number
+        int,   // 7 - Stride
+        int>;  // 8 - Batch
+
  struct convolution_gpu : public ::testing::TestWithParam<TestParamType_convolution_gpu>
  {
      static std::string
@@ -5184,6 +5192,22 @@ struct convolution_depthwise_gpu_fsv16 : public ::testing::TestWithParam<TestPar
      }
  };
  
+struct convolution_grouped_gpu : public ::testing::TestWithParam<TestParamType_grouped_convolution_gpu> {
+    static std::string PrintToStringParamName(
+        testing::TestParamInfo<TestParamType_grouped_convolution_gpu> param_info) {
+        // construct a readable name
+        return "in" + std::to_string(testing::get<0>(param_info.param)) + "x" +
+               std::to_string(testing::get<1>(param_info.param)) + "y" +
+               std::to_string(testing::get<2>(param_info.param)) + "f" +
+               "_output" + std::to_string(testing::get<3>(param_info.param)) + "f" +
+               "_filter" + std::to_string(testing::get<4>(param_info.param)) + "x" +
+                           std::to_string(testing::get<5>(param_info.param)) + "y" +
+               "_groups" + std::to_string(testing::get<6>(param_info.param)) +
+               "_stride" + std::to_string(testing::get<7>(param_info.param)) +
+               "_batch"  + std::to_string(testing::get<8>(param_info.param));
+    }
+};
+
  TEST_P(convolution_gpu, b_fs_yx_fsv4)
  {
      const int in_B = 2;
@@ -5890,6 +5914,108 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32)
                  }
  }
  
+TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) {
+    const auto& engine = get_test_engine();
+
+    if (!engine.get_info().supports_fp16)
+    {
+        std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
+        EXPECT_EQ(1, 1);
+        return;
+    }
+
+    const int batch_num = 2;
+    const int input_xy = 32;
+    const int input_f = 96;
+    const int output_f = 192;
+    const int filter_xy = 1;
+    const int stride = 1;
+    const int output_xy = 1 + (input_xy - filter_xy) / stride;
+
+    auto input_size = tensor(batch_num, input_f, input_xy, input_xy);
+    auto input_data = generate_random_4d<FLOAT16>(batch_num, input_f, input_xy, input_xy, -1, 1);
+    auto input_data_bfyx = flatten_4d(format::bfyx, input_data);
+    auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size });
+    set_values(input_mem, input_data_bfyx);
+
+    auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy);
+    auto weights_data = generate_random_4d<FLOAT16>(output_f, input_f, filter_xy, filter_xy, -1, 1);
+    auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
+    auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size });
+    set_values(weights_mem, weights_data_bfyx);
+
+    // Will be used to store reference values calculated in branches depending on bias
+    auto reference_result = VVVVF<FLOAT16>(batch_num, VVVF<FLOAT16>(output_f));
+
+    topology topology(
+        input_layout("input", input_mem.get_layout()),
+        data("weights_fsv", weights_mem));
+
+    // add input padding by X and Y
+    layout w_pad(data_types::f16, format::bfyx, input_size, padding({ 0,0,1,1 }, { 0, 0, 0, 0 }));
+    topology.add(reorder("input_fsv", "input", w_pad));
+
+    // Generate bias data
+    auto biases_size = tensor(1, output_f, 1, 1);
+    auto biases_data = generate_random_1d<FLOAT16>(output_f, -1, 1);
+    auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size });
+    set_values(biases_mem, biases_data);
+
+    // Calculate reference values
+    for (auto bi = 0; bi < batch_num; ++bi)
+    {
+        for (auto ofi = 0; ofi < output_f; ++ofi)
+        {
+            reference_result[bi][ofi] = reference_convolve(
+                input_data[bi], weights_data[ofi],
+                stride, stride,
+                biases_data[ofi],
+                1, 1);
+        }
+    }
+
+    topology.add(data("biases_fsv", biases_mem));
+
+    auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, { "biases_fsv" },
+        { 1, 1, stride, stride }, { 0, 0, 0, 0 });
+
+    topology.add(conv_fsv);
+
+    build_options options;
+    implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" };
+    options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} }));
+    options.set_option(build_option::optimize_data(true));
+    network network(engine, topology, options);
+
+    network.set_input_data("input", input_mem);
+
+    network.execute();
+
+    auto out_mem = network.get_output("conv_fsv").get_memory();
+    auto out_ptr = out_mem.pointer<FLOAT16>();
+
+    ASSERT_EQ(out_mem.get_layout().format, format::fs_b_yx_fsv32);
+
+    for (int bi = 0; bi < batch_num; ++bi)
+        for (int fi = 0; fi < output_f; ++fi)
+            for (int yi = 0; yi < output_xy; ++yi)
+                for (int xi = 0; xi < output_xy; ++xi)
+                {
+                    auto val_ref = reference_result[bi][fi][yi][xi];
+                    auto val = out_ptr[(fi / 32) * batch_num * output_xy * output_xy * 32 +
+                        bi * output_xy * output_xy * 32 +
+                        yi * output_xy * 32 +
+                        xi * 32 +
+                        fi % 32];
+                    auto equal = are_equal(val_ref, val, 1e-2f);
+                    EXPECT_TRUE(equal);
+                    if (!equal)
+                    {
+                        std::cout << "At b = " << bi << ", fi = " << fi << ", xi = " << xi << ", yi = " << yi << std::endl;
+                    }
+                }
+}
+
  using TestParamType_convolution_gpu_with_crop = ::testing::tuple<int,   // 0 - Filter size
      int,   // 1 - Input size
      int,   // 2 - Input/output features
@@ -6098,6 +6224,148 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop)
  }
  
  
+
+TEST(convolution_gpu, bfyx_iyxo_5x5_fp16)
+{
+
+    const auto& engine = get_test_engine();
+
+    if (!engine.get_info().supports_fp16)
+    {
+        std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
+        EXPECT_EQ(1, 1);
+        return;
+    }
+    
+    const int batch_num = 1;
+    const int output_f = 4;
+
+    const int input_f = 32;
+    const int filter_xy = 5;
+    const int stride = 1;
+    const int output_padding = 0;
+    const bool with_bias = false;
+    const int input_size_x = 64;
+    const int input_size_y = 20;
+
+
+    const int input_offset = -(filter_xy / 2);
+
+    const int output_x = 1 + (input_size_x + 2 * (-input_offset) - filter_xy) / stride + 2 * output_padding;
+
+    const int output_y = 1 + (input_size_y + 2 * (-input_offset) - filter_xy) / stride + 2 * output_padding;
+
+    auto input_size = tensor(batch_num, input_f, input_size_x, input_size_y);
+    auto input_data = generate_random_4d<FLOAT16>(batch_num, input_f, input_size_y, input_size_x, -1, 1);
+
+    auto input_data_bfyx = flatten_4d(format::bfyx, input_data);
+    auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size });
+    set_values(input_mem, input_data_bfyx);
+
+    auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy);
+    auto weights_data = generate_random_4d<FLOAT16>(output_f, input_f, filter_xy, filter_xy, -1, 1);
+    auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
+    auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size });
+
+    set_values(weights_mem, weights_data_bfyx);
+
+    // Will be used to store reference values calculated in branches depending on bias
+    auto reference_result = VVVVF<FLOAT16>(batch_num, VVVF<FLOAT16>(output_f));
+
+    topology topology(
+        input_layout("input", input_mem.get_layout()),
+        data("weights_fsv", weights_mem)
+    );
+
+    if (with_bias)
+    {
+        // Generate bias data
+        auto biases_size = tensor(1, output_f, 1, 1);
+        auto biases_data = generate_random_1d<FLOAT16>(output_f, -1, 1);
+        auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size });
+        set_values(biases_mem, biases_data);
+
+        // Calculate reference values with bias
+        for (auto bi = 0; bi < batch_num; ++bi)
+        {
+            for (auto ofi = 0; ofi < output_f; ++ofi)
+            {
+                reference_result[bi][ofi] = reference_convolve(
+                    input_data[bi], weights_data[ofi],
+                    stride, stride, biases_data[ofi],
+                    1, 1,                               // dilation
+                    -input_offset, -input_offset,       // input padding
+                    output_padding, output_padding);
+            }
+        }
+
+        topology.add(data("biases_fsv", biases_mem));
+
+        auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" }, { "biases_fsv" },
+                                    { 1, 1, stride, stride }, { 0, 0, input_offset, input_offset });
+        conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
+
+        topology.add(conv_fsv);
+    }
+    else
+    {
+        
+        // Calculate reference values without bias
+        for (auto bi = 0; bi < batch_num; ++bi)
+        {
+            for (auto ofi = 0; ofi < output_f; ++ofi)
+            {
+                reference_result[bi][ofi] = reference_convolve(
+                    input_data[bi], weights_data[ofi],
+                    stride, stride,
+                    0,                                  // bias
+                    1, 1,                               // dilation
+                    -input_offset, -input_offset,       // input padding
+                    output_padding, output_padding);
+            }
+        }
+        
+        
+        auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" },
+            { 1, 1, stride, stride }, { 0, 0, input_offset, input_offset });
+        conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f);
+
+        topology.add(conv_fsv);
+    }
+
+
+    build_options options;
+    implementation_desc conv_impl = { format::bfyx, "" };
+    options.set_option(build_option::optimize_data(true));
+    network network(engine, topology, options);
+
+    network.set_input_data("input", input_mem);
+
+    network.execute();
+
+    auto out_mem = network.get_output("conv_fsv").get_memory();
+    auto out_ptr = out_mem.pointer<FLOAT16>();
+    
+    for (int bi = 0; bi < batch_num; ++bi)
+        for (int fi = 0; fi < output_f; ++fi)
+            for (int yi = 0; yi < output_y; ++yi)
+                for (int xi = 0; xi < output_x; ++xi)
+                {
+                    auto val_ref = reference_result[bi][fi][yi][xi];
+                    auto val = out_ptr[bi * output_f * output_x * output_y +
+                                        fi * output_y * output_x  +
+                                        yi * output_x +
+                                        xi];
+                    auto equal = are_equal(val_ref, val, 1e-2f);
+                    EXPECT_TRUE(equal);
+                    if (!equal)
+                    {
+                        std::cout << "At b = " << bi << ", fi = " << fi << ", xi = " << xi << ", yi = " << yi << std::endl;
+                    }
+                }
+                
+}
+
  INSTANTIATE_TEST_CASE_P(convolution_gpu_block,
                          convolution_gpu_block_layout,
                          ::testing::Values(
@@ -7364,8 +7632,131 @@ INSTANTIATE_TEST_CASE_P(convolution_depthwise_gpu_bfyx,
                          ),
                          convolution_depthwise_gpu::PrintToStringParamName);
  
+INSTANTIATE_TEST_CASE_P(convolution_grouped_b_fs_yx_fsv4,
+                        convolution_grouped_gpu,
+                        ::testing::Values(
+                            // Input X size, Input Y size, Input features, Output features, Kernel size X, Kernel size
+                            // Y, Groups number, Stride, Output padding, Batch
+                            TestParamType_grouped_convolution_gpu(4, 4, 16, 16, 3, 3, 4, 1, 1),
+                            TestParamType_grouped_convolution_gpu(4, 4, 8, 4, 2, 2, 2, 1, 4),
+                            TestParamType_grouped_convolution_gpu(8, 8, 16, 16, 4, 4, 4, 1, 1),
+                            TestParamType_grouped_convolution_gpu(17, 17, 32, 96, 3, 3, 2, 2, 2),
+                            TestParamType_grouped_convolution_gpu(16, 16, 8, 48, 2, 2, 2, 2, 1),
+                            TestParamType_grouped_convolution_gpu(3, 3, 48, 96, 2, 2, 2, 8, 1),
+                            TestParamType_grouped_convolution_gpu(6, 6, 8, 26, 3, 3, 2, 4, 1)),
+                        convolution_grouped_gpu::PrintToStringParamName);
+
+TEST_P(convolution_grouped_gpu, grouped_b_fs_yx_fsv4) {
+    const auto& engine = get_test_engine();
+
+    const int input_x = testing::get<0>(GetParam()),
+              input_y = testing::get<1>(GetParam()),
+              input_f = testing::get<2>(GetParam()),
+              output_f = testing::get<3>(GetParam()),
+              filter_x = testing::get<4>(GetParam()),
+              filter_y = testing::get<5>(GetParam()),
+              groups = testing::get<6>(GetParam()),
+              stride = testing::get<7>(GetParam()),
+              batch_num = testing::get<8>(GetParam()),
+              output_padding = 0,
+              input_offset_y = (filter_x - 1) / 2,
+              input_offset_x = (filter_y - 1) / 2;
+
+    auto input_size = tensor(batch(batch_num), feature(input_f), spatial(input_x, input_y));
+    auto input_rnd = generate_random_4d<uint8_t>(batch_num, input_f, input_y, input_x, 0, 255);
+    auto input_rnd_vec = flatten_4d<uint8_t>(format::bfyx, input_rnd);
+    auto input = memory::allocate(engine, {data_types::u8, format::bfyx, input_size});
+    set_values(input, input_rnd_vec);
+
+    auto weights_size = tensor(group(groups), batch(output_f / groups), feature(input_f / groups), spatial(filter_x, filter_y));
+    VVVVVF<int8_t> weights_rnd = generate_random_5d<int8_t>(groups, output_f / groups, input_f / groups, filter_y, filter_x, -127, 127);
+    auto weights_lay = layout(data_types::i8, format::goiyx, weights_size);
+
+    std::vector<int8_t> weights_flat(weights_lay.get_linear_size());
+    for (int gi = 0; gi < groups; ++gi)
+        for (int ofi = 0; ofi < output_f / groups; ++ofi)
+            for (int ifi = 0; ifi < input_f / groups; ++ifi)
+                for (int kyi = 0; kyi < filter_y; ++kyi)
+                    for (int kxi = 0; kxi < filter_x; ++kxi) {
+                        tensor coords = tensor(group(gi), batch(ofi), feature(ifi), spatial(kxi, kyi, 0, 0));
+                        size_t offset = weights_lay.get_linear_offset(coords);
+                        weights_flat[offset] = weights_rnd[gi][ofi][ifi][kyi][kxi];
+                    }
+    auto weights = memory::allocate(engine, {data_types::i8, format::goiyx, weights_size});
+    set_values(weights, weights_flat);
+
+    VVVVF<float> expected_result(batch_num, VVVF<float>(output_f));
+
+    // Calculate reference values without bias
+    for (int bi = 0; bi < batch_num; ++bi)
+        for (int gi = 0; gi < groups; ++gi)
+            for (int ofi = 0; ofi < (int)weights_rnd[0].size(); ++ofi) {
+                bool grouped = groups > 1;
+                int f_begin = gi * input_f / groups;
+                int f_end = gi * input_f / groups + input_f / groups;
+
+                expected_result[bi][ofi + gi * output_f / groups] = reference_convolve<uint8_t, float, int8_t>(
+                    input_rnd[bi], weights_rnd[gi][ofi],  // input, weights
+                    stride, stride,                       // strides
+                    0,                                    // bias
+                    1, 1,                                 // dilation
+                    input_offset_y, input_offset_x,       // input padding
+                    0, 0,                                 // output_padding
+                    f_begin, f_end,                       // f_begin, f_end
+                    false,                                // depthwise
+                    grouped);                             // grouped
+            }
+
+    topology topology(input_layout("input", input.get_layout()),
+                      data("weights", weights),
+                      reorder("input_fsv", "input", {data_types::u8, format::b_fs_yx_fsv4, input_size}),
+                      convolution("conv",
+                                  "input_fsv",
+                                  {"weights"},
+                                  groups,
+                                  {1, 1, stride, stride},
+                                  {0, 0, -input_offset_x, -input_offset_y},
+                                  {1, 1, 1, 1},
+                                  padding({0, 0, output_padding, output_padding}, 0.f)));
+
+    build_options options;
+    options.set_option(build_option::optimize_data(true));
+    implementation_desc conv_impl = {format::b_fs_yx_fsv4, "fused_conv_eltwise_gpu_imad"};
+    options.set_option(build_option::force_implementations({{"conv", conv_impl}}));
+
+    network network(engine, topology, options);
+    network.set_input_data("input", input);
+    network.execute();
+
+    auto out_mem = network.get_output("conv").get_memory();
+    auto out_ptr = out_mem.pointer<float>();
+    auto out_lay = out_mem.get_layout();
+
+    ASSERT_EQ(out_mem.get_layout().format, format::b_fs_yx_fsv4);
+    ASSERT_EQ(out_lay.size.batch[0], expected_result.size());
+    ASSERT_EQ(out_lay.size.feature[0], expected_result[0].size());
+    ASSERT_EQ(out_lay.size.spatial[1], expected_result[0][0].size());
+    ASSERT_EQ(out_lay.size.spatial[0], expected_result[0][0][0].size());
+
+    for (int bi = 0; bi < batch_num; ++bi)
+        for (int ofi = 0; ofi < output_f; ++ofi)
+            for (int yi = 0; yi < (int)expected_result[0][0].size(); ++yi)
+                for (int xi = 0; xi < (int)expected_result[0][0][0].size(); ++xi) {
+                    tensor coords = tensor(batch(bi), feature(ofi), spatial(xi, yi, 0, 0));
+                    auto offset = out_lay.get_linear_offset(coords);
+                    auto val = out_ptr[offset];
+                    auto val_ref = expected_result[bi][ofi][yi][xi];
+                    auto equal = are_equal(val_ref, val, 1e-2f);
+                    if (!equal) {
+                        std::cout << "Value at batch: " << bi << ", output_f: " << ofi << ", y: " << yi << ", x: " << xi << " = " << val << std::endl;
+                        std::cout << "Reference value at batch: " << bi << ", output_f: " << ofi << ", y: " << yi << ", x: " << xi << " = " << val_ref << std::endl;
+                    }
+                    EXPECT_TRUE(equal);
+                }
+}
+
  template <typename InputT, typename WeightsT, typename OutputT>
-class convolution_test_base : public testing::Test {
+class convolution_test_base {
  public:
      virtual topology build_topology(const cldnn::engine& engine) {
          auto input_lay = layout(input_type(), input_format(), input_size());
@@ -7377,12 +7768,30 @@ public:
  
          auto topo = topology();
          topo.add(input_layout("input", input_lay));
+        std::string input_id = "input";
+        if (has_input_zp()) {
+            auto input_zp_lay = layout(input_type(), format::bfyx, tensor(feature(input_features())));
+            auto input_zp_mem = memory::allocate(engine, input_zp_lay);
+            set_values(input_zp_mem, _input_zp);
+            topo.add(data("input_zp", input_zp_mem));
+            topo.add(eltwise("input_asymm", { "input", "input_zp" }, eltwise_mode::sub));
+            input_id = "input_asymm";
+        }
          topo.add(data("weights", wei_mem));
+        std::string weights_id = "weights";
+        if (has_weights_zp()) {
+            auto weights_zp_lay = layout(weights_type(), format::bfyx, tensor(batch(output_features())));
+            auto weights_zp_mem = memory::allocate(engine, weights_zp_lay);
+            set_values(weights_zp_mem, _weights_zp);
+            topo.add(data("weights_zp", weights_zp_mem));
+            topo.add(eltwise("weights_asymm", { "weights", "weights_zp" }, eltwise_mode::sub));
+            weights_id = "weights_asymm";
+        }
          if (!has_bias()) {
              auto conv_prim = convolution(
                  "conv",
-                "input",
-                { "weights" },
+                input_id,
+                { weights_id },
                  static_cast<uint32_t>(groups()),
                  tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)),
                  tensor(batch(0), feature(0), spatial(_offset_x, _offset_y)),
@@ -7396,8 +7805,8 @@ public:
              topo.add(data("bias", bias_mem));
              auto conv_prim = convolution(
                  "conv",
-                "input",
-                { "weights" },
+                input_id,
+                { weights_id },
                  { "bias" },
                  static_cast<uint32_t>(groups()),
                  tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)),
@@ -7420,7 +7829,8 @@ public:
          auto topo = build_topology(engine);
  
          auto build_opts = build_options(
-            build_option::optimize_data(true)
+            build_option::optimize_data(true),
+            build_option::force_implementations({ {"conv", {input_format(), ""}} })
          );
          auto prog = program(engine, topo, build_opts);
  
@@ -7445,6 +7855,17 @@ public:
          auto out_lay = out_mem.get_layout();
          auto out_ptr = out_mem.cldnn::memory::template pointer<OutputT>();
  
+        std::stringstream description;
+        for (auto i : net.get_primitives_info()) {
+            if (i.original_id == "conv") {
+                description << "  kernel: " << i.kernel_id << std::endl;
+            }
+        }
+        description << "  executed: ";
+        for (auto e : net.get_executed_primitive_ids()) {
+            description << e << ", ";
+        }
+
          ASSERT_EQ(out_lay.data_type, output_type());
          ASSERT_EQ(out_lay.size.batch[0], expected.size());
          ASSERT_EQ(out_lay.size.feature[0], expected[0].size());
@@ -7458,8 +7879,9 @@ public:
                          tensor coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
                          size_t offset = out_lay.get_linear_offset(coords);
  
-                        EXPECT_EQ(out_ptr[offset], expected[bi][fi][yi][xi])
-                            << "at b= " << bi << ", f= " << fi << ", y= " << yi << ", x= " << xi;
+                        ASSERT_EQ(out_ptr[offset], expected[bi][fi][yi][xi])
+                            << "at b= " << bi << ", f= " << fi << ", y= " << yi << ", x= " << xi << std::endl
+                            << description.str();
                      }
      }
  
@@ -7491,10 +7913,20 @@ public:
          _dilation_y = dilation_y;
      }
  
+    void set_input_zp(VF<InputT> input_zp) {
+        _input_zp = std::move(input_zp);
+    }
+
+    void set_weights_zp(VF<WeightsT> weights_zp) {
+        _weights_zp = std::move(weights_zp);
+    }
+
  protected:
      VVVVF<InputT> _input;
      VVVVF<WeightsT> _weights;
      VF<OutputT> _bias;
+    VF<InputT> _input_zp;
+    VF<WeightsT> _weights_zp;
      format::type _input_fmt;
      int _stride_x, _stride_y;
      int _offset_x, _offset_y;
@@ -7511,6 +7943,8 @@ protected:
      size_t groups() const { return input_features() / weights_input_features(); }
  
      bool has_bias() { return _bias.size() > 0; }
+    bool has_input_zp() { return _input_zp.size() > 0; }
+    bool has_weights_zp() { return _weights_zp.size() > 0; }
  
      data_types input_type() const { return type_to_data_type<InputT>::value; }
      format input_format() const { return _input_fmt; }
@@ -7545,78 +7979,23 @@ struct convolution_random_test_all_params {
      bool with_bias;
      size_t groups;
      format::type input_format;
+    bool asymmetric_weights;
+    bool asymmetric_data;
  };
  
-using convolution_random_test_params = std::tuple<
-    size_t,                     // batch
-    size_t,                     // input features
-    size_t,                     // output features
-    std::tuple<size_t, size_t>, // input x, y
-    std::tuple<size_t, size_t>, // filter x, y
-    std::tuple<int, int>,       // stride x, y
-    std::tuple<int, int>,       // offset x, y
-    std::tuple<int, int>,       // dilation x, y
-    bool,                       // with bias
-    format::type                // input format
->;
-
-static convolution_random_test_all_params convert_random_test_params(const convolution_random_test_params& params) {
-    convolution_random_test_all_params all_params;
-    std::forward_as_tuple(
-        all_params.batch,
-        all_params.input_features,
-        all_params.output_features,
-        std::forward_as_tuple(all_params.input_xy[0], all_params.input_xy[1]),
-        std::forward_as_tuple(all_params.filter_xy[0], all_params.filter_xy[1]),
-        std::forward_as_tuple(all_params.stride_xy[0], all_params.stride_xy[1]),
-        std::forward_as_tuple(all_params.offset_xy[0], all_params.offset_xy[1]),
-        std::forward_as_tuple(all_params.dilation_xy[0], all_params.dilation_xy[1]),
-        all_params.with_bias,
-        all_params.input_format) = params;
-    all_params.groups = 1;
-    return all_params;
-}
-
-using convolution_random_test_depthwise_params = std::tuple<
-    size_t,                     // batch
-    size_t,                     // input/output features
-    std::tuple<size_t, size_t>, // input x, y
-    std::tuple<size_t, size_t>, // filter x, y
-    std::tuple<int, int>,       // stride x, y
-    std::tuple<int, int>,       // offset x, y
-    std::tuple<int, int>,       // dilation x, y
-    bool,                       // with bias
-    format::type                // input format
->;
-
-static convolution_random_test_all_params convert_random_test_params(const convolution_random_test_depthwise_params& params) {
-    convolution_random_test_all_params all_params;
-    std::forward_as_tuple(
-        all_params.batch,
-        all_params.input_features,
-        std::forward_as_tuple(all_params.input_xy[0], all_params.input_xy[1]),
-        std::forward_as_tuple(all_params.filter_xy[0], all_params.filter_xy[1]),
-        std::forward_as_tuple(all_params.stride_xy[0], all_params.stride_xy[1]),
-        std::forward_as_tuple(all_params.offset_xy[0], all_params.offset_xy[1]),
-        std::forward_as_tuple(all_params.dilation_xy[0], all_params.dilation_xy[1]),
-        all_params.with_bias,
-        all_params.input_format) = params;
-    all_params.groups = all_params.input_features;
-    all_params.output_features = all_params.input_features;
-    return all_params;
-}
-
  template <typename InputT, typename WeightsT, typename OutputT>
  class convolution_random_test_base : public convolution_test_base<InputT, WeightsT, OutputT> {
  public:
      virtual VVVVF<OutputT> calculate_reference() {
          VVVVF<OutputT> expected = VVVVF<OutputT>(this->batch_num(), VVVF<OutputT>(this->output_features()));
          bool depthwise = this->groups() == this->input_features();
+        bool grouped = (this->groups() > 1 && !depthwise) ? true : false;
          for (size_t bi = 0; bi < this->batch_num(); ++bi)
          for (size_t fi = 0; fi < this->output_features(); ++fi) {
              size_t f_begin = depthwise ? fi : 0;
              size_t f_end = (depthwise ? fi : 0) + this->weights_input_features();
              auto bias = this->has_bias() ? this->_bias[fi] : static_cast<OutputT>(0);
+            auto weights_zp = this->has_weights_zp() ? this->_weights_zp[fi] : static_cast<WeightsT>(0);
              expected[bi][fi] = reference_convolve<InputT, OutputT, WeightsT>(
                  this->_input[bi],
                  this->_weights[fi],
@@ -7631,7 +8010,10 @@ public:
                  0,
                  f_begin,
                  f_end,
-                depthwise);
+                depthwise,
+                grouped,
+                this->_input_zp,
+                weights_zp);
          }
          return expected;
      }
@@ -7644,6 +8026,8 @@ public:
          auto weights_data = generate_random_4d<WeightsT>(
              params.output_features, wei_in_f, params.filter_xy[1], params.filter_xy[0], -256, 256);
          auto bias_data = params.with_bias ? generate_random_1d<OutputT>(params.output_features, -256, 256) : VF<OutputT>();
+        auto weights_zp_data = params.asymmetric_weights ? generate_random_1d<WeightsT>(params.output_features, -256, 256) : VF<WeightsT>();
+        auto input_zp_data = params.asymmetric_data ? generate_random_1d<InputT>(params.input_features, -256, 256) : VF<InputT>();
  
          this->set_input(params.input_format, std::move(input_data));
          this->set_weights(std::move(weights_data));
@@ -7651,6 +8035,8 @@ public:
          this->set_strides(params.stride_xy[0], params.stride_xy[1]);
          this->set_offsets(params.offset_xy[0], params.offset_xy[1]);
          this->set_dilation(params.dilation_xy[0], params.dilation_xy[1]);
+        this->set_weights_zp(std::move(weights_zp_data));
+        this->set_input_zp(std::move(input_zp_data));
      }
  
      void run_random(const convolution_random_test_all_params& params) {
@@ -7663,7 +8049,8 @@ public:
  
  // construct a readable name in format as follows:
  // <out format>_i<input>_w<weights>_s<stride>_ofs<offset>_d<dilation>_g<groups>_<bias>
-static std::string to_string_convolution_all_params(const convolution_random_test_all_params& params) {
+static std::string to_string_convolution_all_params(const testing::TestParamInfo<convolution_random_test_all_params>& param_info) {
+    auto& params = param_info.param;
      int Batch = (int)params.batch;
      int iF = (int)params.input_features;
      int oF = (int)params.output_features;
@@ -7675,6 +8062,8 @@ static std::string to_string_convolution_all_params(const convolution_random_tes
      auto groups = params.groups;
      bool Bias = params.with_bias;
      format::type iType = params.input_format;  // input format
+    bool asymm_weights = params.asymmetric_weights;
+    bool asymm_input = params.asymmetric_data;
      // Wrapper for negative walues as ex. "-1" will generate invalid gtest param string
      auto to_string_neg = [](int val) {
          if (val >= 0)
@@ -7690,142 +8079,13 @@ static std::string to_string_convolution_all_params(const convolution_random_tes
          "_ofs" + to_string_neg(Offset[0]) + 'x' + to_string_neg(Offset[1]) +
          "_d" + std::to_string(Dilation[0]) + 'x' + std::to_string(Dilation[1]) +
          "_g" + std::to_string(groups) +
-        (Bias ? "_bias" : "");
-}
-
-template <typename T>
-std::string to_string_convolution_random_params(testing::TestParamInfo<T> param_info) {
-    return to_string_convolution_all_params(convert_random_test_params(param_info.param));
+        (Bias ? "_bias" : "") + (asymm_weights ? "_wzp" : "") + (asymm_input ? "_izp" : "");
  }
  
  template <typename InputT, typename WeightsT, typename OutputT>
-class convolution_random_test : public convolution_random_test_base<InputT, WeightsT, OutputT>
-                              , public testing::WithParamInterface<convolution_random_test_params> {};
-
-
-using convolution_random_test_s8s8f32 = convolution_random_test<int8_t, int8_t, float>;
-using convolution_random_test_u8s8f32 = convolution_random_test<uint8_t, int8_t, float>;
-
-TEST_P(convolution_random_test_s8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-TEST_P(convolution_random_test_u8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_random_test_s8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_params>);
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_random_test_u8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_params>);
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4_1x1_lwg_opt,
-    convolution_random_test_s8s8f32,
-    testing::Combine(
-        testing::Values(1),                               // batch
-        testing::Values(128, 256, 512),                   // input features
-        testing::Values(64),                              // output features
-        testing::Values(std::pair<size_t, size_t>(3, 3)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1)),       // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),       // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),       // dilation x, y
-        testing::Values(false),                           // bias
-        testing::Values(format::b_fs_yx_fsv4)             // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_params>);
-
-template <typename InputT, typename WeightsT, typename OutputT>
-class convolution_random_dw_test : public convolution_random_test_base<InputT, WeightsT, OutputT>
-                                 , public testing::WithParamInterface<convolution_random_test_depthwise_params> {};
-
-using convolution_random_test_dw_s8s8f32 = convolution_random_dw_test<int8_t, int8_t, float>;
-using convolution_random_test_dw_u8s8f32 = convolution_random_dw_test<uint8_t, int8_t, float>;
-
-TEST_P(convolution_random_test_dw_s8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-TEST_P(convolution_random_test_dw_u8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_random_test_dw_s8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input/output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0), std::pair<int, int>(-1, -1)),           // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_depthwise_params>);
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_random_test_dw_u8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input/output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0), std::pair<int, int>(-1, -1)),           // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_depthwise_params>);
-
-INSTANTIATE_TEST_CASE_P(
-    special_cases,
-    convolution_random_test_dw_u8s8f32,
-    testing::Values(
-        convolution_random_test_depthwise_params(
-            1, 32, std::pair<size_t, size_t>(28, 28), std::pair<size_t, size_t>(3, 3),
-            std::pair<int, int>(1, 1), std::pair<int, int>(-1, -1), std::pair<int, int>(1, 1), true, format::b_fs_yx_fsv4)
-    ),
-    to_string_convolution_random_params<convolution_random_test_depthwise_params>);
-
-template <typename InputT, typename WeightsT, typename OutputT>
-class convolution_scale_random_test : public convolution_random_test<InputT, WeightsT, OutputT> {
+class convolution_scale_random_test : public convolution_random_test_base<InputT, WeightsT, OutputT> {
  public:
-    using parent = convolution_random_test<InputT, WeightsT, OutputT>;
+    using parent = convolution_random_test_base<InputT, WeightsT, OutputT>;
  
      virtual primitive_id output_primitive_id() const {
          return "scale_wa_reorder";
@@ -7855,9 +8115,9 @@ public:
          auto expected = parent::calculate_reference();
  
          for (size_t bi = 0; bi < this->batch_num(); ++bi)
-        for (size_t fi = 0; fi < this->output_features(); ++fi) {
-            expected[bi][fi] = reference_scale_post_op<OutputT>(expected[bi][fi], _scale[fi], _shift[fi]);
-        }
+            for (size_t fi = 0; fi < this->output_features(); ++fi) {
+                expected[bi][fi] = reference_scale_post_op<OutputT>(expected[bi][fi], _scale[fi], _shift[fi]);
+            }
          return expected;
      }
  
@@ -7872,197 +8132,153 @@ protected:
      VF<OutputT> _shift;
  };
  
-using convolution_scale_random_test_s8s8f32 = convolution_scale_random_test<int8_t, int8_t, float>;
-using convolution_scale_random_test_u8s8f32 = convolution_scale_random_test<uint8_t, int8_t, float>;
-
-TEST_P(convolution_scale_random_test_s8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-TEST_P(convolution_scale_random_test_u8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
-}
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_scale_random_test_s8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_params>);
-
-INSTANTIATE_TEST_CASE_P(
-    b_fs_yx_fsv4,
-    convolution_scale_random_test_u8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::b_fs_yx_fsv4)                                              // input format
-    ),
-    to_string_convolution_random_params<convolution_random_test_params>);
-
-template <typename InputT, typename WeightsT, typename OutputT>
-class convolution_asymm_weights_data_random_test : public convolution_random_test<InputT, WeightsT, OutputT> {
-    using parent = convolution_random_test<InputT, WeightsT, OutputT>;
-
-    virtual primitive_id output_primitive_id() const {
-       return "conv_wa_reorder";
-    }
+class convolution_random_smoke_test : public testing::TestWithParam<convolution_random_test_all_params> {};
  
-    topology build_topology(const cldnn::engine& engine) override {
-        auto input_lay = layout(this->input_type(), this->input_format(), this->input_size());
-        auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size());
-        auto data_zp_lay = layout(this->input_type(), format::bfyx, tensor(batch(1), feature(this->input_features()), spatial(1, 1)));
-        auto wei_zp_lay = layout(this->weights_type(), format::bfyx, tensor(batch(this->output_features()), feature(1), spatial(1, 1)));
+using convolution_random_test_s8s8f32 = convolution_random_test_base<int8_t, int8_t, float>;
+using convolution_random_test_u8s8f32 = convolution_random_test_base<uint8_t, int8_t, float>;
  
-        auto wei_mem = memory::allocate(engine, wei_lay);
-        auto data_zp_mem = memory::allocate(engine, data_zp_lay);
-        auto wei_zp_mem = memory::allocate(engine, wei_zp_lay);
-        auto weights_flat = flatten_4d(format::bfyx, this->_weights);
-        set_values(wei_mem, weights_flat);
-        set_values(data_zp_mem, _data_zp);
-        set_values(wei_zp_mem, _weights_zp);
+using convolution_scale_random_test_s8s8f32 = convolution_scale_random_test<int8_t, int8_t, float>;
+using convolution_scale_random_test_u8s8f32 = convolution_scale_random_test<uint8_t, int8_t, float>;
  
-        auto topo = topology();
-        topo.add(input_layout("input", input_lay));
-        topo.add(data("weights", wei_mem));
-        topo.add(data("data_zp", data_zp_mem));
-        topo.add(data("weights_zp", wei_zp_mem));
-        auto input_asymm_prim = eltwise("input_asymm", "input", "data_zp", eltwise_mode::sub);
-        auto weights_asymm_prim = eltwise("weights_asymm", "weights", "weights_zp", eltwise_mode::sub);
-        input_asymm_prim.output_data_type = data_types::f32;
-        weights_asymm_prim.output_data_type = data_types::f32;
-        topo.add(input_asymm_prim);
-        topo.add(weights_asymm_prim);
-        if (!this->has_bias()) {
-            auto conv_prim = convolution(
-                "conv",
-                "input_asymm",
-                { "weights_asymm" },
-                tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
-                tensor(batch(0), feature(0), spatial(this->_offset_x, this->_offset_y)),
-                tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
-            conv_prim.output_data_type = this->output_type();
-            topo.add(conv_prim);
-        } else {
-            auto bias_lay = layout(this->output_type(), format::bfyx, tensor(feature(this->output_features())));
-            auto bias_mem = memory::allocate(engine, bias_lay);
-            set_values(bias_mem, this->_bias);
-            topo.add(data("bias", bias_mem));
-            auto conv_prim = convolution(
-                "conv",
-                "input_asymm",
-                { "weights_asymm" },
-                { "bias" },
-                tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)),
-                tensor(batch(0), feature(0), spatial(this->_offset_x, this->_offset_y)),
-                tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y)));
-            conv_prim.output_data_type = this->output_type();
-            topo.add(conv_prim);
+struct params_generator : std::vector<convolution_random_test_all_params> {
+    params_generator& smoke_test_params(format::type input_format, bool asymm_weights = false, bool asymm_data = false) {
+        std::vector<size_t> batches = { 1, 2 };
+        for (auto b : batches) {
+            // 7x7
+            push_back(convolution_random_test_all_params{
+                b, 3, 32, { 28, 28 }, { 7, 7 }, { 2, 2 }, { -3, -3 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // 3x3
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 14, 14 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 14, 14 }, { 3, 3 }, { 2, 2 }, { -1, -1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // 1x1
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 28, 28 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 28, 28 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // 5x5
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 28, 28 }, { 5, 5 }, { 1, 1 }, { -2, -2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 32, 48, { 28, 28 }, { 5, 5 }, { 2, 2 }, { -2, -2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // depthwise
+            push_back(convolution_random_test_all_params{
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 64, 64, { 19, 19 }, { 3, 3 }, { 2, 2 }, { -1, -1 }, { 1, 1 }, true, 64, input_format, asymm_weights, asymm_data });
+            // dilation
+            push_back(convolution_random_test_all_params{
+                b, 32, 24, { 19, 19 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 32, 24, { 19, 19 }, { 3, 3 }, { 2, 2 }, { -1, -1 }, { 2, 2 }, true, 1, input_format, asymm_weights, asymm_data });
          }
-        topo.add(reorder("conv_wa_reorder", "conv", format::bfyx, this->output_type()));
-
-        return topo;
+        return *this;
+    }
+
+    params_generator& extra_test_params(format::type input_format, bool asymm_weights = false, bool asymm_data = false) {
+        std::vector<size_t> batches = { 1, 2 };
+        for (auto b : batches) {
+            // 1x1
+            push_back(convolution_random_test_all_params{
+                b, 23, 41, { 19, 19 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 23, 41, { 19, 19 }, { 1, 1 }, { 2, 2 }, { 0, 0 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // 3x3
+            push_back(convolution_random_test_all_params{
+                b, 16, 28, { 14, 14 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 23, 41, { 19, 17 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            // 5x5
+            push_back(convolution_random_test_all_params{
+                b, 16, 28, { 14, 14 }, { 5, 5 }, { 1, 1 }, { -2, -2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+            push_back(convolution_random_test_all_params{
+                b, 23, 41, { 19, 17 }, { 5, 5 }, { 1, 1 }, { -2, -2 }, { 1, 1 }, true, 1, input_format, asymm_weights, asymm_data });
+        }
+        return *this;
      }
  
-    VVVVF<OutputT> calculate_reference() override {
-        VVVVF<OutputT> expected = VVVVF<OutputT>(this->batch_num(), VVVF<OutputT>(this->output_features()));
-        for (size_t bi = 0; bi < this->batch_num(); ++bi)
-            for (size_t fi = 0; fi < this->output_features(); ++fi) {
-                auto bias = this->has_bias() ? this->_bias[fi] : static_cast<OutputT>(0);
-                expected[bi][fi] = reference_convolve<InputT, OutputT, WeightsT>(
-                    this->_input[bi],
-                    this->_weights[fi],
-                    this->_stride_y,
-                    this->_stride_x,
-                    static_cast<float>(bias),
-                    this->_dilation_y,
-                    this->_dilation_x,
-                    this->_offset_y,
-                    this->_offset_x,
-                    0,
-                    0,
-                    0,
-                    0,
-                    false,
-                    _data_zp,
-                    _weights_zp[fi]);
-            }
-        return expected;
+    params_generator& all_test_params(format::type input_format, bool asymm_weights = false, bool asymm_data = false) {
+        return smoke_test_params(input_format, asymm_weights, asymm_data)
+            .extra_test_params(input_format, asymm_weights, asymm_data);
      }
  
-    void param_set_up(const convolution_random_test_all_params& params) override {
-        parent::param_set_up(params);
-
-        _data_zp = generate_random_1d<InputT>(this->input_features(), -128, 128);
-        _weights_zp = generate_random_1d<WeightsT>(this->output_features(), -128, 128);
+    params_generator& add(convolution_random_test_all_params params) {
+        push_back(params);
+        return *this;
      }
-
-protected:
-    VF<InputT> _data_zp;
-    VF<WeightsT> _weights_zp;
  };
  
-using convolution_asymm_random_test_s8s8f32 = convolution_asymm_weights_data_random_test<int8_t, int8_t, float>;
-using convolution_asymm_random_test_u8s8f32 = convolution_asymm_weights_data_random_test<uint8_t, int8_t, float>;
-
-TEST_P(convolution_asymm_random_test_s8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
+TEST_P(convolution_random_smoke_test, u8s8f32) {
+    convolution_random_test_u8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
  }
  
-TEST_P(convolution_asymm_random_test_u8s8f32, random) {
-    ASSERT_NO_FATAL_FAILURE(run_random(convert_random_test_params(GetParam())));
+TEST_P(convolution_random_smoke_test, u8s8f32_scale) {
+    convolution_scale_random_test_u8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
  }
  
  INSTANTIATE_TEST_CASE_P(
-    basic_asymm,
-    convolution_asymm_random_test_s8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::bfyx, format::b_fs_yx_fsv32)                               // input format
+    basic,
+    convolution_random_smoke_test,
+    testing::ValuesIn(
+        params_generator()
+        .smoke_test_params(format::b_fs_yx_fsv4)
+        .smoke_test_params(format::bfyx)
+        .smoke_test_params(format::b_fs_yx_fsv32)
+        .smoke_test_params(format::b_fs_yx_fsv32, true, true)
+        .smoke_test_params(format::b_fs_yx_fsv32, false, true)
+        .smoke_test_params(format::b_fs_yx_fsv32, true, false)
+        .smoke_test_params(format::b_fs_yx_fsv16)
      ),
-    to_string_convolution_random_params<convolution_random_test_params>);
+    to_string_convolution_all_params
+);
+
+class convolution_random_all_test : public testing::TestWithParam<convolution_random_test_all_params> {};
+
+TEST_P(convolution_random_all_test, u8s8f32) {
+    convolution_random_test_u8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
+
+TEST_P(convolution_random_all_test, s8s8f32) {
+    convolution_random_test_s8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
+
+TEST_P(convolution_random_all_test, u8s8f32_scale) {
+    convolution_scale_random_test_u8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
+
+TEST_P(convolution_random_all_test, s8s8f32_scale) {
+    convolution_scale_random_test_s8s8f32 test;
+    ASSERT_NO_FATAL_FAILURE(test.run_random(GetParam()));
+}
  
  INSTANTIATE_TEST_CASE_P(
-    basic_asymm,
-    convolution_asymm_random_test_u8s8f32,
-    testing::Combine(
-        testing::Values(1, 2),                                                             // batch
-        testing::Values(3, 32),                                                            // input features
-        testing::Values(16, 32),                                                           // output features
-        testing::Values(std::pair<size_t, size_t>(7, 7), std::pair<size_t, size_t>(8, 8)), // input x, y
-        testing::Values(std::pair<size_t, size_t>(1, 1), std::pair<size_t, size_t>(3, 3)), // filter x, y
-        testing::Values(std::pair<int, int>(1, 1), std::pair<int, int>(2, 2)),             // strides x, y
-        testing::Values(std::pair<int, int>(0, 0)),                                        // offsets x, y
-        testing::Values(std::pair<int, int>(1, 1)),                                        // dilation x, y
-        testing::Values(false, true),                                                      // bias
-        testing::Values(format::bfyx, format::b_fs_yx_fsv32)                               // input format
+    DISABLED_basic,
+    convolution_random_all_test,
+    testing::ValuesIn(
+        params_generator()
+        .all_test_params(format::bfyx)
+        .all_test_params(format::bfyx, true, true)
+        .all_test_params(format::bfyx, false, true)
+        .all_test_params(format::bfyx, true, false)
+        .all_test_params(format::b_fs_yx_fsv4)
+        // byxf_af32 - depthwise broken for batch > 1
+        // .smoke_test_params(format::byxf_af32)
+        .all_test_params(format::b_fs_yx_fsv32)
+        .all_test_params(format::b_fs_yx_fsv32, true, true)
+        .all_test_params(format::b_fs_yx_fsv32, false, true)
+        .all_test_params(format::b_fs_yx_fsv32, true, false)
+        .all_test_params(format::b_fs_yx_fsv16)
+        .add(convolution_random_test_all_params{
+            1, 89, 3, { 1, 1 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv4, false, false })
      ),
-    to_string_convolution_random_params<convolution_random_test_params>);
+    to_string_convolution_all_params
+);
  
  class convolution_test : public tests::generic_test
  {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp

index 522559e..3a1210e 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
@@ -20,6 +20,7 @@
  #include "api/memory.hpp"
  #include <api/input_layout.hpp>
  #include "api/deconvolution.hpp"
+#include "api/crop.hpp"
  #include <api/data.hpp>
  #include <api/topology.hpp>
  #include <api/network.hpp>
@@ -1548,6 +1549,89 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) {
  
  }
  
+TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) {
+    //  Filter : 1x32x9x9
+    //  Input  : 1x32x16x16
+    //  Stride : 2x2
+    //  Pad    : 4x4
+
+    //const auto& engine = get_test_engine();
+    engine engine;
+
+    VVVVF<FLOAT16> input_rnd = generate_random_4d<FLOAT16>(1, 32, 16, 16, -2, 2);
+    VF<FLOAT16> input_rnd_vec = flatten_4d<FLOAT16>(format::bfyx, input_rnd);
+    VVVVF<FLOAT16> filter_rnd = generate_random_4d<FLOAT16>(1, 32, 9, 9, -1, 1);
+    VF<FLOAT16> filter_rnd_vec = flatten_4d<FLOAT16>(format::bfyx, filter_rnd);
+    VF<FLOAT16> bias_rnd = generate_random_1d<FLOAT16>(1, -1, 1);
+    VF<float> filter_rnd_f32_vec, bias_f32_rnd;
+
+    for (unsigned int i = 0; i < filter_rnd_vec.size(); i++)
+        filter_rnd_f32_vec.push_back(float(filter_rnd_vec[i]));
+
+    for (unsigned int i = 0; i < bias_rnd.size(); i++)
+        bias_f32_rnd.push_back(float(bias_rnd[i]));
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 32, 16, 16 } });
+    auto weights = memory::allocate(engine, { data_types::f16, format::oiyx, { 1, 32, 9, 9 } });
+    auto biases = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 1, 1 } });
+    auto weights_f32 = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 32, 9, 9 } });
+    auto biases_f32 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    set_values(input, input_rnd_vec);
+    set_values(weights, filter_rnd_vec);
+    set_values(biases, bias_rnd);
+    set_values(weights_f32, filter_rnd_f32_vec);
+    set_values(biases_f32, bias_f32_rnd);
+
+    topology topology_ref(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -4, -4 }, tensor{ 1, 1, 32, 32 })
+    );
+
+    network network_ref(engine, topology_ref);
+    network_ref.set_input_data("input", input);
+
+    auto outputs_ref = network_ref.execute();
+    EXPECT_EQ(outputs_ref.size(), size_t(1));
+    EXPECT_EQ(outputs_ref.begin()->first, "deconv");
+    auto output_ref_prim = outputs_ref.begin()->second.get_memory();
+    auto output_ref_ptr = output_ref_prim.pointer<FLOAT16>();
+
+    std::vector<FLOAT16> output_vec_ref;
+    for (unsigned int i = 0; i < output_ref_prim.get_layout().count(); i++)
+    {
+        output_vec_ref.push_back(output_ref_ptr[i]);
+    }
+
+    topology topology_act(
+        input_layout("input_act", input.get_layout()),
+        data("weights_f32", weights_f32),
+        data("biases_f32", biases_f32),
+        deconvolution("deconv_act", "input_act", { "weights_f32" }, { "biases_f32" }, { 1, 1, 2, 2 }, { 0, 0, -4, -4 }),
+        reorder("out", "deconv_act", format::bfyx, data_types::f16)
+    );
+
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+    network network_act(engine, topology_act, options);
+    network_act.set_input_data("input_act", input);
+
+    auto outputs_act = network_act.execute();
+    EXPECT_EQ(outputs_act.size(), size_t(1));
+    EXPECT_EQ(outputs_act.begin()->first, "out");
+    auto output_act_prim = outputs_act.begin()->second.get_memory();
+    auto output_act_ptr = output_act_prim.pointer<FLOAT16>();
+
+    std::vector<float> output_vec;
+    for (unsigned int i = 0; i < output_act_prim.get_layout().count(); i++)
+    {
+        float x = float_round(output_act_ptr[i]), y = float_round(output_vec_ref[i]);
+        EXPECT_NEAR(x, y, 1e-0f);
+    }
+}
+
  TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad1) {
      //  Filter : 2x2
      //  Input  : 2x2x1x2
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp

index cf435ea..4cc7997 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp
@@ -19,7 +19,10 @@
  #include <api/memory.hpp>
  #include <api/depth_to_space.hpp>
  #include <api/topology.hpp>
+#include <api/reshape.hpp>
  #include <api/network.hpp>
+#include "api/permute.hpp"
+#include "api/reorder.hpp"
  
  #include <cstddef>
  #include <tests/test_utils/test_utils.h>
@@ -211,42 +214,65 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) {
      }
  }
  
-TEST(depth_to_space_fp32_gpu, d1421_bs2) {
-    //  Input  : 1x4x2x1
+TEST(depth_to_space_fp32_gpu, d112960540_bs2) {
+    //  Input  : 1x12x960x540
      //  Block size : 2
-    //  Output : 1x1x4x2
-    //  Input values in fp32
+    //  Output : 1x3x1920x1080
+    //  Input values in fp16
  
      engine engine;
  
-    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 2 } });
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 12, 960, 540 } });
      size_t block_size = 2;
  
-    set_values(input1, {
-        0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
-    });
+    auto random_input = generate_random_4d<FLOAT16>(1, 12, 540, 960, -1, 1);
+    auto input_rnd_vec = flatten_4d<FLOAT16>(format::bfyx, random_input);
+    set_values(input1, input_rnd_vec);
  
-    topology topology;
-    topology.add(input_layout("Input0", input1.get_layout()));
-    topology.add(
+    topology topology_act;
+    topology_act.add(input_layout("Input0", input1.get_layout()));
+    topology_act.add(
          depth_to_space("depth_to_space", "Input0", block_size)
      );
  
-    network network(engine, topology);
+    network network_act(engine, topology_act);
  
-    network.set_input_data("Input0", input1);
+    network_act.set_input_data("Input0", input1);
  
-    auto outputs = network.execute();
+    auto outputs = network_act.execute();
  
      auto output = outputs.at("depth_to_space").get_memory();
-    auto output_ptr = output.pointer<float>();
+    auto output_ptr = output.pointer<FLOAT16>();
  
-    std::vector<float> expected_results = {
-        0.f, 2.f, 4.f, 6.f, 1.f, 3.f, 5.f, 7.f
-    };
+    std::vector<uint16_t> perm = { 0,4,5,2,1,3 };
  
-    for (size_t i = 0; i < expected_results.size(); ++i) {
-        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    topology topology_ref;
+    topology_ref.add(input_layout("Input0", input1.get_layout()));
+    topology_ref.add(reorder("reorder1", "Input0", { data_types::f16, format::bfwzyx, tensor{ batch(1), feature(12), spatial(1, 1, 960, 540) }
+        }));
+    topology_ref.add(
+        reshape("reshape", "reorder1", tensor{ batch(1), feature(2), spatial(960, 540, 3, 2) })
+    );
+    topology_ref.add(
+        permute("perm", "reshape", perm)
+    );
+    topology_ref.add(
+        reshape("reshape2", "perm", tensor(1, 3, 2 * 960, 2 * 540))
+    );
+
+    build_options build_opt;
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network_ref(engine, topology_ref, build_opt);
+    network_ref.set_input_data("Input0", input1);
+
+    auto outputs_ref = network_ref.execute();
+
+    auto output_ref = outputs_ref.at("reshape2").get_memory();
+    auto output_ptr_ref = output_ref.pointer<FLOAT16>();
+
+    for (size_t i = 0; i < output.get_layout().count(); ++i) {
+        EXPECT_EQ(output_ptr_ref[i], output_ptr[i]);
      }
  }
  
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp

index 927bf53..9b750a1 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@@ -3316,6 +3316,7 @@ static std::vector<data_types> types = {data_types::f32, data_types::f16};
  static std::vector<std::vector<tensor>> inputs = {
          {{1, 2, 3, 4}, {1, 2, 3, 4}},
          {{1, 16, 8, 2}, {1, 16, 8, 2}},
+        {{1, 128, 16, 8}, {1, 1, 16, 8}},
          {{1, 32, 2, 2}, {1, 32, 2, 2}},
          {{8, 32, 4, 5}, {8, 32, 4, 5}},
          {{1, 2, 3, 4}, {1, 2, 1, 1}},
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp

index 0171607..d6ff3ab 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp
@@ -26,6 +26,7 @@
  #include <api/engine.hpp>
  #include "test_utils/test_utils.h"
  #include <api/data.hpp>
+#include <api/depth_to_space.hpp>
  
  #include <api_extension/fused_conv_eltwise.hpp>
  
@@ -76,6 +77,77 @@ TEST(fused_conv_eltwise, basic_0)
      EXPECT_EQ(out_layout.size.spatial[1], 5);
  }
  
+TEST(fused_conv_eltwise, basic_image2d)
+{
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 128, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 3, 256, 4 } });
+    auto weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 12, 4, 1, 1 } });
+
+    auto input_data1 = generate_random_4d<FLOAT16>(1, 4, 2, 128, -1, 1);
+    auto input_data1_bfyx = flatten_4d(format::bfyx, input_data1);
+    set_values(input, input_data1_bfyx);
+
+    auto input_data2 = generate_random_4d<FLOAT16>(1, 3, 4, 256, -1, 1);
+    auto input_data2_bfyx = flatten_4d(format::bfyx, input_data2);
+    set_values(input2, input_data2_bfyx);
+
+    auto weights_data= generate_random_4d<FLOAT16>(12, 4, 1, 1, -1, 1);
+    auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data);
+    set_values(weights, weights_data_bfyx);
+
+    topology topology_act(
+        input_layout("input", input.get_layout()),
+        input_layout("input2", input2.get_layout()),
+        data("weights", weights),
+        convolution("conv", "input", { "weights" }),
+        depth_to_space("depth_to_space", "conv", 2),
+        eltwise("eltwise", "input2", "depth_to_space", eltwise_mode::sum)
+    );
+
+    build_options opt_act;
+    opt_act.set_option(build_option::optimize_data(true));
+    network network_act(engine, topology_act, opt_act);
+    network_act.set_input_data("input", input);
+    network_act.set_input_data("input2", input2);
+
+    auto outputs_act = network_act.execute();
+    EXPECT_EQ(outputs_act.size(), size_t(1));
+    EXPECT_EQ(outputs_act.begin()->first, "eltwise");
+
+    auto output_act = outputs_act.begin()->second.get_memory();
+    auto&& out_act_layout = output_act.get_layout();
+    auto out_act_ptr = output_act.pointer<uint8_t>();
+
+    topology topology_ref(
+        input_layout("input", input.get_layout()),
+        input_layout("input2", input2.get_layout()),
+        data("weights", weights),
+        convolution("conv", "input", { "weights" }),
+        depth_to_space("depth_to_space", "conv", 2),
+        eltwise("eltwise", "input2", "depth_to_space", eltwise_mode::sum),
+        reorder("out", "eltwise", format::image_2d_rgba, data_types::u8));
+
+    build_options opt_ref;
+    opt_ref.set_option(build_option::optimize_data(false));
+    network network_ref(engine, topology_ref, opt_ref);
+    network_ref.set_input_data("input", input);
+    network_ref.set_input_data("input2", input2);
+
+    auto outputs_ref = network_ref.execute();
+    EXPECT_EQ(outputs_ref.size(), size_t(1));
+    EXPECT_EQ(outputs_ref.begin()->first, "out");
+
+    auto output_ref = outputs_ref.begin()->second.get_memory();
+    auto&& out_ref_layout = output_ref.get_layout();
+    auto out_ref_ptr = output_ref.pointer<uint8_t>();
+
+    for (int i = 0;i < 3 * 256 * 4;i++) {
+        EXPECT_EQ(out_act_ptr[i], out_ref_ptr[i]);
+    }
+}
+
  TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
  {
      const auto& engine = get_test_engine();
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp

index e5cd5db..3ee79aa 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -87,7 +87,8 @@ template<typename T>
  class BaseFusingTest : public ::testing::TestWithParam<T> {
  public:
      cldnn::engine engine;
-    cldnn::topology topology;
+    cldnn::topology topology_fused;
+    cldnn::topology topology_non_fused;
      cldnn::build_options bo_fused;
      cldnn::build_options bo_not_fused;
  
@@ -110,7 +111,13 @@ public:
              size_t count = 0;
              for (auto& pi : net.get_primitives_info()) {
                  if (pi.type_id == "reorder") {
-                    count++;
+                    auto exec_prims = net.get_executed_primitives();
+                    auto it = std::find_if(exec_prims.begin(), exec_prims.end(), [&](const std::pair<primitive_id, event>& e) -> bool {
+                        return e.first == pi.original_id;
+                    });
+                    // We count executed reorders only
+                    if (it != exec_prims.end())
+                        count++;
                  }
              }
              return count;
@@ -238,6 +245,12 @@ public:
      layout get_single_element_layout(T& p) {
          return layout{ p.default_type, p.default_format, tensor{1, 1, 1, 1} };
      }
+
+    template <class... Args>
+    void create_topologies(Args const&... args) {
+        topology_fused.add(args...);
+        topology_non_fused.add(args...);
+    }
  };
  
  class WeightsPrimitiveFusingTest : public ::BaseFusingTest<bc_test_params> {
@@ -245,8 +258,8 @@ public:
  
      void execute(bc_test_params& p) {
          auto input_prim = get_mem(get_input_layout(p));
-        network network_not_fused(this->engine, this->topology, bo_not_fused);
-        network network_fused(this->engine, this->topology, bo_fused);
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
          network_fused.set_input_data("input", input_prim);
          network_not_fused.set_input_data("input", input_prim);
  
@@ -269,8 +282,8 @@ public:
  
      void execute(resample_test_params& p) {
          auto input_prim = get_mem(get_input_layout(p));
-        network network_not_fused(this->engine, this->topology, bo_not_fused);
-        network network_fused(this->engine, this->topology, bo_fused);
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
          network_fused.set_input_data("input", input_prim);
          network_not_fused.set_input_data("input", input_prim);
  
@@ -293,8 +306,8 @@ public:
          auto input0_prim = get_mem(get_input_layout(p, 0));
          auto input1_prim = get_mem(get_input_layout(p, 1));
  
-        network network_not_fused(this->engine, this->topology, bo_not_fused);
-        network network_fused(this->engine, this->topology, bo_fused);
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
          network_fused.set_input_data("input0", input0_prim);
          network_not_fused.set_input_data("input0", input0_prim);
          network_fused.set_input_data("input1", input1_prim);
@@ -357,6 +370,7 @@ public:
  #define CASE_CONV_U8S8_4 {1, 17, 4, 5}, {1, 17, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 17, data_types::u8, format::bfyx, data_types::i8, format::goiyx, data_types::f32, format::bfyx
  #define CASE_CONV_U8S8_5 {1, 16, 5, 5}, {1, 32, 5, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
  #define CASE_CONV_U8S8_6 {1, 17, 4, 5}, {1, 17, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 17, data_types::u8, format::bfyx, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+#define CASE_CONV_U8S8_7 {1, 64, 7, 7}, {1, 32, 7, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
  
  #define CASE_CONV_S8S8_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
  #define CASE_CONV_S8S8_2 {1, 15, 5, 5}, {1, 30, 3, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
@@ -364,6 +378,7 @@ public:
  #define CASE_CONV_S8S8_4 {1, 17, 4, 5}, {1, 17, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 17, data_types::i8, format::bfyx, data_types::i8, format::goiyx, data_types::f32, format::bfyx
  #define CASE_CONV_S8S8_5 {1, 16, 5, 5}, {1, 32, 5, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
  #define CASE_CONV_S8S8_6 {1, 17, 4, 5}, {1, 17, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 17, data_types::i8, format::bfyx, data_types::i8, format::goiyx, data_types::f32, format::bfyx
+#define CASE_CONV_S8S8_7  {1, 64, 7, 7}, {1, 32, 7, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
  
  #define CASE_CONV3D_U8S8_1 {1, 15, 5, 4, 5}, {1, 30, 3, 2, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
  #define CASE_CONV3D_U8S8_2 {1, 15, 5, 5, 5}, {1, 30, 3, 3, 3}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfzyx, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
@@ -406,7 +421,7 @@ public:
  class conv_fp32_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
@@ -433,7 +448,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_activation, ::testing::ValuesIn(s
  class conv_fp32_scale : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
@@ -464,7 +479,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale,
  class conv_fp32_prelu_eltwise : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_prelu_eltwise, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("slope_data", get_mem(get_per_channel_layout(p))),
@@ -481,7 +496,7 @@ TEST_P(conv_fp32_prelu_eltwise, basic) {
  
  TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("slope_data", get_mem(get_per_channel_layout(p))),
@@ -502,7 +517,7 @@ TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
  TEST_P(conv_fp32_prelu_eltwise, vector_ops_mixed_types) {
      auto p = GetParam();
      auto slope_type = p.default_type == data_types::f32 ? data_types::f16 : data_types::f32;
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("slope_data", get_mem(layout{ slope_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} })),
@@ -537,7 +552,7 @@ class conv_fp32_eltwise_b_fs_zyx_fsv16 : public WeightsPrimitiveFusingTest {};
  
  TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, vector_ops) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("eltwise_data", get_mem(get_output_layout(p))),
@@ -556,7 +571,7 @@ TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, vector_ops) {
  class conv_fp32_swish : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_swish, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
@@ -587,11 +602,11 @@ TEST_P(conv_fp32_eltwise_b_fs_zyx_fsv16, splitted_vector_ops) {
  
      std::vector<std::string> weights_idx;
      for (size_t w = 0; w < p.groups; w++) {
-        topology.add(data("weights" + std::to_string(w), get_mem(get_weights_layout(p, p.groups))));
+        create_topologies(data("weights" + std::to_string(w), get_mem(get_weights_layout(p, p.groups))));
          weights_idx.push_back(("weights" + std::to_string(w)));
      }
  
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("eltwise_data", get_mem(get_output_layout(p))),
                   convolution("conv_prim", "input", weights_idx, {}, 1, p.stride, p.pad, p.dilation),
                   eltwise("eltwise", "conv_prim", "eltwise_data", eltwise_mode::sum),
@@ -626,7 +641,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_eltwise_b_fs_zyx_fsv16,
  class conv_fp32_quantize_u8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_quantize_u8, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -650,7 +665,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_quantize_u8,
  class conv_fp32_scale_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale_quantize_i8, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -678,7 +693,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_quantize_i8,
  class conv_fp32_scale_activation_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale_activation_quantize_i8, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -705,7 +720,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8,
  class conv_fp32_scale_activation_quantize_i8_eltwise_fp32 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale_activation_quantize_i8_eltwise_fp32, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -733,7 +748,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_eltw
  class conv_fp32_scale_activation_quantize_i8_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale_activation_quantize_i8_activation, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -762,7 +777,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_acti
  class conv_fp32_scale_activation_quantize_i8_eltwise_fp32_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_fp32_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, DISABLED_basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -800,7 +815,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale_activation_quantize_i8_eltw
  class conv_bin_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_bin_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
                   activation("activation", "bin_conv_prim", activation_func::relu),
@@ -819,7 +834,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_activation,
  class conv_bin_scale_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_bin_scale_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
                   binary_convolution("bin_conv_prim", "input", {"weights"}, p.stride, p.pad, p.dilation, p.out_shape, p.groups),
@@ -841,7 +856,7 @@ class conv_bin_quantize_bin : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_bin_quantize_bin, channel_wise_quantize) {
      auto p = GetParam();
      auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("in_lo", in_thresh),
                   data("in_hi", in_thresh),
@@ -858,7 +873,7 @@ TEST_P(conv_bin_quantize_bin, channel_wise_quantize) {
  TEST_P(conv_bin_quantize_bin, blob_wise_quantize) {
      auto p = GetParam();
      auto in_thresh = get_mem(get_single_element_layout(p), min_random, max_random);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("in_lo", in_thresh),
                   data("in_hi", in_thresh),
@@ -885,7 +900,7 @@ TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride2) {
      auto dw_weights_layout = layout{p.default_type, format::goiyx, dw_tensor};
  
      auto dw_stride = tensor{1, 1, 2, 2};
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)),
@@ -904,7 +919,7 @@ TEST_P(conv_bin_scale_conv_dw, dw_kernel_3x3_stride1) {
      auto dw_weights_layout = layout{p.default_type, format::goiyx, dw_tensor};
  
      auto dw_stride = tensor{1, 1, 1, 1};
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)),
@@ -931,7 +946,7 @@ TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride2) {
  
      auto dw_stride = tensor{1, 1, 2, 2};
      auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)),
@@ -953,7 +968,7 @@ TEST_P(conv_bin_scale_conv_dw_prelu, dw_kernel_3x3_stride1) {
  
      auto dw_stride = tensor{1, 1, 1, 1};
      auto in_thresh = get_mem(get_per_channel_layout(p), min_random, max_random);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p), -127, 127)),
                   data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1e-1f)),
@@ -981,7 +996,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_bin_scale_conv_dw_prelu,
  class conv_int8_scale : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
@@ -1018,7 +1033,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale,
  class conv_int8_scale_shift_swish : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_shift_swish, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
@@ -1060,7 +1075,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_shift_swish,
  class conv_int8_byxf_af32 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_byxf_af32, per_channel_coeffs) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count()/255)),
@@ -1078,7 +1093,7 @@ TEST_P(conv_int8_byxf_af32, per_channel_coeffs) {
  
  TEST_P(conv_int8_byxf_af32, per_element_coeffs) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("eltwise_data", get_mem(get_output_layout(p))),
@@ -1111,7 +1126,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_byxf_af32,
  class conv_int8_prelu_eltwise : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_prelu_eltwise, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("slope_data", get_mem(get_per_channel_layout(p))),
@@ -1126,16 +1141,43 @@ TEST_P(conv_int8_prelu_eltwise, basic) {
      execute(p);
  }
  
+TEST_P(conv_int8_prelu_eltwise, fsv16) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                 data("weights", get_mem(get_weights_layout(p))),
+                 data("bias", get_mem(get_bias_layout(p))),
+                 data("slope_data", get_mem(get_per_channel_layout(p))),
+                 data("eltwise_data", get_mem(get_output_layout(p))),
+                 convolution("conv_prim", "input", { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+                 activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
+                 eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::sum),
+                 reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    if (p.default_format.dimension() == 4) {
+        implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" };
+        bo_fused.set_option(build_option::force_implementations({ {"conv_prim", conv_impl} }));
+    } else {
+        // TODO Add 5D int8 optimized convolution implementations
+        return;
+    }
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
  INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_prelu_eltwise,
                          ::testing::ValuesIn(std::vector<bc_test_params>{
                                  bc_test_params{CASE_CONV_U8S8_1, 2, 4},
                                  bc_test_params{CASE_CONV_U8S8_2, 2, 4},
                                  bc_test_params{CASE_CONV_U8S8_3, 2, 4},
                                  bc_test_params{CASE_CONV_U8S8_4, 2, 4},
+                                bc_test_params{CASE_CONV_U8S8_7, 2, 4},
                                  bc_test_params{CASE_CONV_S8S8_1, 2, 4},
                                  bc_test_params{CASE_CONV_S8S8_2, 2, 4},
                                  bc_test_params{CASE_CONV_S8S8_3, 2, 4},
                                  bc_test_params{CASE_CONV_S8S8_4, 2, 4},
+                                bc_test_params{CASE_CONV_S8S8_7, 2, 4},
  
                                  bc_test_params{CASE_CONV3D_U8S8_1, 2, 4},
                                  bc_test_params{CASE_CONV3D_U8S8_2, 2, 4},
@@ -1150,7 +1192,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_prelu_eltwise,
  class conv_int8_quantize_u8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_quantize_u8, per_channel) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1168,7 +1210,7 @@ TEST_P(conv_int8_quantize_u8, per_channel) {
  
  TEST_P(conv_int8_quantize_u8, per_tensor) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_single_element_layout(p), -10)),
@@ -1208,7 +1250,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_quantize_u8,
  class conv_int8_scale_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1252,7 +1294,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_quantize_i8,
  class conv_int8_scale_activation_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_activation_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1295,7 +1337,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_activation_quantize_i8,
  class conv_int8_scale_activation_quantize_i8_eltwise_fp32 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_activation_quantize_i8_eltwise_fp32, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1339,7 +1381,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_activation_quantize_i8_eltw
  class conv_int8_scale_activation_quantize_i8_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_activation_quantize_i8_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1384,7 +1426,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_activation_quantize_i8_acti
  class conv_int8_scale_activation_quantize_i8_eltwise_fp32_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1433,7 +1475,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale_activation_quantize_i8_eltw
  class conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec : public WeightsPrimitiveFusingTest {};
  TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_ops) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1465,7 +1507,7 @@ TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_op
  
  TEST_P(conv_int8_scale_prelu_quantize_i8_eltwise_fp32_quantize_i8_vec, vector_ops_mixed_types) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(get_weights_layout(p))),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1509,7 +1551,7 @@ TEST_P(conv_int8_asymmetric_weights, basic) {
      auto weights_format = (p.weights_format == format::goiyx) ? format::bfyx : format::bfzyx;
      auto weights_layout = (p.groups > 1) ? get_weights_layout(p, 1, weights_format) :
                                             get_weights_layout(p);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(weights_layout)),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("w_zp", get_mem(get_weights_zp_layout(p), 1, 127)),
@@ -1520,16 +1562,17 @@ TEST_P(conv_int8_asymmetric_weights, basic) {
      tolerance = 1.f;
  
      auto input_prim = get_mem(get_input_layout(p));
-    network network_not_fused(this->engine, this->topology, bo_not_fused);
-    network network_fused(this->engine, this->topology, bo_fused);
+    network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+    network network_fused(this->engine, this->topology_fused, bo_fused);
      network_fused.set_input_data("input", input_prim);
      network_not_fused.set_input_data("input", input_prim);
  
      ASSERT_FALSE(network_fused.get_primitives_info().empty());
      ASSERT_FALSE(network_not_fused.get_primitives_info().empty());
  
+    // Search for both conv_prim and reorder_bfyx, as in case of fused topology convolution will be merged with the last reorder
      auto find_conv = [](primitive_info& p) -> bool {
-        if (p.original_id == "conv_prim")
+        if (p.original_id == "conv_prim" || p.original_id == "reorder_bfyx")
              return true;
          return false;
      };
@@ -1575,7 +1618,7 @@ TEST_P(conv_int8_asymmetric_data, basic) {
      auto weights_format = (p.weights_format == format::goiyx) ? format::bfyx : format::bfzyx;
      auto weights_layout = (p.groups > 1) ? get_weights_layout(p, 1, weights_format) :
                            get_weights_layout(p);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(weights_layout)),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("a_zp", get_mem(get_activations_zp_layout(p), 1, 127)),
@@ -1586,16 +1629,17 @@ TEST_P(conv_int8_asymmetric_data, basic) {
      tolerance = 1.f;
  
      auto input_prim = get_mem(get_input_layout(p));
-    network network_not_fused(this->engine, this->topology, bo_not_fused);
-    network network_fused(this->engine, this->topology, bo_fused);
+    network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+    network network_fused(this->engine, this->topology_fused, bo_fused);
      network_fused.set_input_data("input", input_prim);
      network_not_fused.set_input_data("input", input_prim);
  
      ASSERT_FALSE(network_fused.get_primitives_info().empty());
      ASSERT_FALSE(network_not_fused.get_primitives_info().empty());
  
+    // Search for both conv_prim and reorder_bfyx, as in case of fused topology convolution will be merged with the last reorder
      auto find_conv = [](primitive_info& p) -> bool {
-        if (p.original_id == "conv_prim")
+        if (p.original_id == "conv_prim" || p.original_id == "reorder_bfyx")
              return true;
          return false;
      };
@@ -1641,7 +1685,7 @@ TEST_P(conv_int8_asymmetric_data_and_weights, basic) {
      auto weights_format = (p.weights_format == format::goiyx) ? format::bfyx : format::bfzyx;
      auto weights_layout = (p.groups > 1) ? get_weights_layout(p, 1, weights_format) :
                            get_weights_layout(p);
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                   data("weights", get_mem(weights_layout)),
                   data("bias", get_mem(get_bias_layout(p))),
                   data("a_zp", get_mem(get_activations_zp_layout(p), 1, 127)),
@@ -1654,16 +1698,17 @@ TEST_P(conv_int8_asymmetric_data_and_weights, basic) {
      tolerance = 1.f;
  
      auto input_prim = get_mem(get_input_layout(p));
-    network network_not_fused(this->engine, this->topology, bo_not_fused);
-    network network_fused(this->engine, this->topology, bo_fused);
+    network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+    network network_fused(this->engine, this->topology_fused, bo_fused);
      network_fused.set_input_data("input", input_prim);
      network_not_fused.set_input_data("input", input_prim);
  
      ASSERT_FALSE(network_fused.get_primitives_info().empty());
      ASSERT_FALSE(network_not_fused.get_primitives_info().empty());
  
+    // Search for both conv_prim and reorder_bfyx, as in case of fused topology convolution will be merged with the last reorder
      auto find_conv = [](primitive_info& p) -> bool {
-        if (p.original_id == "conv_prim")
+        if (p.original_id == "conv_prim" || p.original_id == "reorder_bfyx")
              return true;
          return false;
      };
@@ -1709,7 +1754,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_asymmetric_data_and_weights,
  class fc_fp32_activation : public WeightsPrimitiveFusingTest {};
  TEST_P(fc_fp32_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
                  data("weights", get_mem(get_weights_layout(p))),
                  data("bias", get_mem(get_bias_layout(p))),
                  fully_connected("fc_prim", "input", "weights", "bias"),
@@ -1730,7 +1775,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_fp32_activation, ::testing::ValuesIn(std
  class fc_int8_scale : public WeightsPrimitiveFusingTest {};
  TEST_P(fc_int8_scale, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("weights", get_mem(get_weights_layout(p))),
          data("bias", get_mem(get_bias_layout(p))),
          data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count())),
@@ -1753,7 +1798,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_int8_scale,
  class fc_int8_quantize_u8 : public WeightsPrimitiveFusingTest {};
  TEST_P(fc_int8_quantize_u8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("weights", get_mem(get_weights_layout(p))),
          data("bias", get_mem(get_bias_layout(p))),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1779,7 +1824,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu_fc, fc_int8_quantize_u8,
  class fc_int8_scale_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(fc_int8_scale_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("weights", get_mem(get_weights_layout(p))),
          data("bias", get_mem(get_bias_layout(p))),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1808,7 +1853,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_int8_scale_quantize_i8,
  class fc_int8_scale_activation_quantize_i8 : public WeightsPrimitiveFusingTest {};
  TEST_P(fc_int8_scale_activation_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("weights", get_mem(get_weights_layout(p))),
          data("bias", get_mem(get_bias_layout(p))),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1837,7 +1882,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, fc_int8_scale_activation_quantize_i8,
  class gemm_int8_3in_quantize_i8 : public GemmFusingTest {};
  TEST_P(gemm_int8_3in_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input0", get_input_layout(p, 0)),
+    create_topologies(input_layout("input0", get_input_layout(p, 0)),
          input_layout("input1", get_input_layout(p, 1)),
          input_layout("input2", get_input_layout(p, 2)),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
@@ -1863,7 +1908,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_int8_3in_quantize_i8,
  class gemm_int8_2in_quantize_u8 : public GemmFusingTest {};
  TEST_P(gemm_int8_2in_quantize_u8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input0", get_input_layout(p, 0)),
+    create_topologies(input_layout("input0", get_input_layout(p, 0)),
          input_layout("input1", get_input_layout(p, 1)),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
          data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
@@ -1888,7 +1933,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_int8_2in_quantize_u8,
  class gemm_int8_2in_act_scale_quantize_i8 : public GemmFusingTest {};
  TEST_P(gemm_int8_2in_act_scale_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input0", get_input_layout(p, 0)),
+    create_topologies(input_layout("input0", get_input_layout(p, 0)),
          input_layout("input1", get_input_layout(p, 1)),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
          data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
@@ -1939,7 +1984,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gemm_int8_2in_act_scale_quantize_i8,
  class resample_quantize : public ResamplePrimitiveFusingTest {};
  TEST_P(resample_quantize, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
          data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
          data("out_lo", get_mem(get_single_element_layout(p), -127)),
@@ -1980,7 +2025,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, resample_quantize,
  class resample_scale_activation : public ResamplePrimitiveFusingTest {};
  TEST_P(resample_scale_activation, basic) {
      auto p = GetParam();
-    topology.add(input_layout("input", get_input_layout(p)),
+    create_topologies(input_layout("input", get_input_layout(p)),
          data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
          resample("resample_prim", "input", p.out_shape, p.in_shape.feature[0], p.type),
          scale("scale", "resample_prim", "scale_data"),
@@ -2041,16 +2086,24 @@ struct mvn_test_params {
  #define CASE_MVN_3D_F16_2   {2, 16, 8, 8, 8}, data_types::f16, format::bfzyx, true, true, data_types::f16, format::bfzyx
  #define CASE_MVN_I8_1       {1, 16, 8, 8},    data_types::i8, format::bfyx, false, true, data_types::f32, format::bfyx
  #define CASE_MVN_I8_2       {2, 16, 8, 8},    data_types::i8, format::bfyx, true, true, data_types::f32, format::bfyx
+#define CASE_MVN_I8_3       {1, 16, 8, 8},    data_types::i8, format::b_fs_yx_fsv16, false, true, data_types::f32, format::bfyx
+#define CASE_MVN_I8_4       {2, 16, 8, 8},    data_types::i8, format::b_fs_yx_fsv16, true, true, data_types::f32, format::bfyx
  #define CASE_MVN_3D_I8_1    {1, 16, 8, 8, 8}, data_types::i8, format::bfzyx, false, true, data_types::f32, format::bfzyx
  #define CASE_MVN_3D_I8_2    {2, 16, 8, 8, 8}, data_types::i8, format::bfzyx, true, true, data_types::f32, format::bfzyx
+#define CASE_MVN_U8_1       {1, 16, 8, 8},    data_types::u8, format::bfyx, false, true, data_types::f32, format::bfyx
+#define CASE_MVN_U8_2       {2, 16, 8, 8},    data_types::u8, format::bfyx, true, true, data_types::f32, format::bfyx
+#define CASE_MVN_U8_3       {1, 16, 8, 8},    data_types::u8, format::b_fs_yx_fsv16, false, true, data_types::f32, format::bfyx
+#define CASE_MVN_U8_4       {2, 16, 8, 8},    data_types::u8, format::b_fs_yx_fsv16, true, true, data_types::f32, format::bfyx
+#define CASE_MVN_3D_U8_1    {1, 16, 8, 8, 8}, data_types::u8, format::bfzyx, false, true, data_types::f32, format::bfzyx
+#define CASE_MVN_3D_U8_2    {2, 16, 8, 8, 8}, data_types::u8, format::bfzyx, true, true, data_types::f32, format::bfzyx
  
  class MVNFusingTest : public ::BaseFusingTest<mvn_test_params> {
  public:
      void execute(mvn_test_params& p) {
          auto input_prim = get_mem(get_input_layout(p));
  
-        network network_not_fused(this->engine, this->topology, bo_not_fused);
-        network network_fused(this->engine, this->topology, bo_fused);
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
  
          network_fused.set_input_data("input", input_prim);
          network_not_fused.set_input_data("input", input_prim);
@@ -2070,7 +2123,7 @@ public:
  class mvn_activation : public MVNFusingTest {};
  TEST_P(mvn_activation, basic) {
      auto p = GetParam();
-    topology.add(
+    create_topologies(
          input_layout("input", get_input_layout(p)),
          mvn("mvn", "input", false, p.normalize_variance),
          activation("act", "mvn", activation_func::hyperbolic_tan),
@@ -2093,14 +2146,22 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_activation,
                          mvn_test_params{ CASE_MVN_3D_F16_2, 2, 3 },
                          mvn_test_params{ CASE_MVN_I8_1, 2, 3 },
                          mvn_test_params{ CASE_MVN_I8_2, 2, 3 },
+                        mvn_test_params{ CASE_MVN_I8_3, 2, 3 },
+                        mvn_test_params{ CASE_MVN_I8_4, 2, 3 },
                          mvn_test_params{ CASE_MVN_3D_I8_1, 2, 3 },
                          mvn_test_params{ CASE_MVN_3D_I8_2, 2, 3 },
+                        mvn_test_params{ CASE_MVN_U8_1, 2, 3 },
+                        mvn_test_params{ CASE_MVN_U8_2, 2, 3 },
+                        mvn_test_params{ CASE_MVN_U8_3, 2, 3 },
+                        mvn_test_params{ CASE_MVN_U8_4, 2, 3 },
+                        mvn_test_params{ CASE_MVN_3D_U8_1, 2, 3 },
+                        mvn_test_params{ CASE_MVN_3D_U8_2, 2, 3 },
  }), );
  
  class mvn_scale_quantize_i8 : public MVNFusingTest {};
  TEST_P(mvn_scale_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(
+    create_topologies(
          input_layout("input", get_input_layout(p)),
          mvn("mvn", "input", false, p.normalize_variance),
          data("scale_data", get_mem(get_per_channel_layout(p))),
@@ -2130,14 +2191,22 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_quantize_i8,
          // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 4 },
          mvn_test_params{ CASE_MVN_I8_1, 2, 4 },
          mvn_test_params{ CASE_MVN_I8_2, 2, 4 },
+        mvn_test_params{ CASE_MVN_I8_3, 2, 4 },
+        mvn_test_params{ CASE_MVN_I8_4, 2, 4 },
          mvn_test_params{ CASE_MVN_3D_I8_1, 2, 4 },
          mvn_test_params{ CASE_MVN_3D_I8_2, 2, 4 },
+        mvn_test_params{ CASE_MVN_U8_1, 2, 4 },
+        mvn_test_params{ CASE_MVN_U8_2, 2, 4 },
+        mvn_test_params{ CASE_MVN_U8_3, 2, 4 },
+        mvn_test_params{ CASE_MVN_U8_4, 2, 4 },
+        mvn_test_params{ CASE_MVN_3D_U8_1, 2, 4 },
+        mvn_test_params{ CASE_MVN_3D_U8_2, 2, 4 },
  }), );
  
  class mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8 : public MVNFusingTest {};
  TEST_P(mvn_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) {
      auto p = GetParam();
-    topology.add(
+    create_topologies(
          input_layout("input", get_input_layout(p)),
          mvn("mvn", "input", false, p.normalize_variance),
          data("scale_data", get_mem(get_per_channel_layout(p))),
@@ -2175,8 +2244,16 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, mvn_scale_activation_quantize_i8_eltwise_fp
          // mvn_test_params{ CASE_MVN_3D_F16_2, 2, 7 },
          mvn_test_params{ CASE_MVN_I8_1, 2, 7 },
          mvn_test_params{ CASE_MVN_I8_2, 2, 7 },
+        mvn_test_params{ CASE_MVN_I8_3, 2, 7 },
+        mvn_test_params{ CASE_MVN_I8_4, 2, 7 },
          mvn_test_params{ CASE_MVN_3D_I8_1, 2, 7 },
          mvn_test_params{ CASE_MVN_3D_I8_2, 2, 7 },
+        mvn_test_params{ CASE_MVN_U8_1, 2, 7 },
+        mvn_test_params{ CASE_MVN_U8_2, 2, 7 },
+        mvn_test_params{ CASE_MVN_U8_3, 2, 7 },
+        mvn_test_params{ CASE_MVN_U8_4, 2, 7 },
+        mvn_test_params{ CASE_MVN_3D_U8_1, 2, 7 },
+        mvn_test_params{ CASE_MVN_3D_U8_2, 2, 7 },
  }), );
  
  
@@ -2216,8 +2293,8 @@ public:
      void execute(pooling_test_params& p) {
          auto input_prim = get_mem(get_input_layout(p));
  
-        network network_not_fused(this->engine, this->topology, bo_not_fused);
-        network network_fused(this->engine, this->topology, bo_fused);
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
  
          network_fused.set_input_data("input", input_prim);
          network_not_fused.set_input_data("input", input_prim);
@@ -2237,7 +2314,7 @@ public:
  class pooling_activation : public PoolingFusingTest {};
  TEST_P(pooling_activation, basic) {
      auto p = GetParam();
-    topology.add(
+    create_topologies(
          input_layout("input", get_input_layout(p)),
          pooling("pooling", "input", p.mode, p.kernel_size, p.stride, p.offset),
          activation("act", "pooling", activation_func::relu),
@@ -2265,7 +2342,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, pooling_activation,
  class pooling_scale : public PoolingFusingTest {};
  TEST_P(pooling_scale, basic) {
      auto p = GetParam();
-    topology.add(
+    create_topologies(
          input_layout("input", get_input_layout(p)),
          data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel_size.count())),
          pooling("pooling", "input", p.mode, p.kernel_size, p.stride, p.offset),
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp

index d5f73b9..35c9ba8 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
@@ -36,18 +36,13 @@ class mvn_gpu_test : public ::testing::TestWithParam<cldnn::format>
  };
  
  template <typename T>
-void mvn_compute_mean_accross_channels_bfyx(cldnn::memory &output, bool normalize_variance)
-{
-    using namespace tests;
-
-    const auto output_desc = generic_test::get_linear_memory_desc(output.get_layout());
+void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_variance) {
+    auto output_size = output.get_layout().size;
  
-    auto output_sizes = output.get_layout().size.sizes();
-
-    uint32_t batch_size = output_sizes[0];
-    uint32_t feature_size = output_sizes[1];
-    uint32_t y_size = output_sizes[3];
-    uint32_t x_size = output_sizes[2];
+    uint32_t batch_size = output_size.batch[0];
+    uint32_t feature_size = output_size.feature[0];
+    uint32_t y_size = output_size.spatial[1];
+    uint32_t x_size = output_size.spatial[0];
  
      auto buff = output.pointer<T>();
  
@@ -63,7 +58,8 @@ void mvn_compute_mean_accross_channels_bfyx(cldnn::memory &output, bool normaliz
              {
                  for (uint32_t x = 0; x < x_size; ++x)
                  {
-                    size_t data_index = generic_test::get_linear_index(output.get_layout(), b, f, y, x, output_desc);
+                    auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
+                    size_t data_index = output.get_layout().get_linear_offset(index_tensor);
                      float data = static_cast<float>(buff[data_index]);
                      sum += data;
                      if (normalize_variance)
@@ -73,30 +69,25 @@ void mvn_compute_mean_accross_channels_bfyx(cldnn::memory &output, bool normaliz
          }
          sum /= feature_size * y_size * x_size;
          T result_sum = static_cast<T>(sum);
-        EXPECT_NEAR(result_sum, 0.f, err_margin);
+        EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b;
  
          if (normalize_variance)
          {
              variance /= feature_size * y_size * x_size;
              T result_variance = static_cast<T>(variance);
-            EXPECT_NEAR(result_variance, 1.f, err_margin);
+            EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b;
          }
      }
  }
  
  template <typename T>
-void mvn_compute_mean_within_channels_bfyx(cldnn::memory &output, bool normalize_variance)
-{
-    using namespace tests;
+void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_variance) {
+    auto output_size = output.get_layout().size;
  
-    const auto output_desc = generic_test::get_linear_memory_desc(output.get_layout());
-
-    auto output_sizes = output.get_layout().size.sizes();
-
-    uint32_t batch_size = output_sizes[0];
-    uint32_t feature_size = output_sizes[1];
-    uint32_t y_size = output_sizes[3];
-    uint32_t x_size = output_sizes[2];
+    uint32_t batch_size = output_size.batch[0];
+    uint32_t feature_size = output_size.feature[0];
+    uint32_t y_size = output_size.spatial[1];
+    uint32_t x_size = output_size.spatial[0];
  
      auto buff = output.pointer<T>();
  
@@ -112,7 +103,8 @@ void mvn_compute_mean_within_channels_bfyx(cldnn::memory &output, bool normalize
              {
                  for (uint32_t x = 0; x < x_size; ++x)
                  {
-                    size_t data_index = generic_test::get_linear_index(output.get_layout(), b, f, y, x, output_desc);
+                    auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
+                    size_t data_index = output.get_layout().get_linear_offset(index_tensor);
                      float data = static_cast<float>(buff[data_index]);
                      sum += data;
                      if (normalize_variance)
@@ -121,13 +113,13 @@ void mvn_compute_mean_within_channels_bfyx(cldnn::memory &output, bool normalize
              }
              sum /= y_size * x_size;
              T result_sum = static_cast<T>(sum);
-            EXPECT_NEAR(result_sum, 0.f, err_margin);
+            EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b << ", f=" << f;
  
              if (normalize_variance)
              {
                  variance /= y_size * x_size;
                  T result_variance = static_cast<T>(variance);
-                EXPECT_NEAR(result_variance, 1.f, err_margin);
+                EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b << ", f=" << f;
              }
          }
      }
@@ -158,7 +150,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_accross_channels_bfyx<float>(output, false);
+    mvn_compute_mean_accross_channels<float>(output, false);
  }
  
  TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
@@ -186,7 +178,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_accross_channels_bfyx<FLOAT16>(output, false);
+    mvn_compute_mean_accross_channels<FLOAT16>(output, false);
  }
  
  TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
@@ -214,7 +206,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_accross_channels_bfyx<float>(output, true);
+    mvn_compute_mean_accross_channels<float>(output, true);
  }
  
  TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
@@ -242,7 +234,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_accross_channels_bfyx<FLOAT16>(output, true);
+    mvn_compute_mean_accross_channels<FLOAT16>(output, true);
  }
  
  TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
@@ -270,7 +262,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_within_channels_bfyx<float>(output, false);
+    mvn_compute_mean_within_channels<float>(output, false);
  }
  
  TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
@@ -298,7 +290,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_within_channels_bfyx<FLOAT16>(output, false);
+    mvn_compute_mean_within_channels<FLOAT16>(output, false);
  }
  
  TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
@@ -326,7 +318,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_within_channels_bfyx<float>(output, true);
+    mvn_compute_mean_within_channels<float>(output, true);
  }
  
  TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16)
@@ -354,5 +346,147 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16)
      EXPECT_EQ(outputs.begin()->first, "mvn");
  
      auto output = outputs.begin()->second.get_memory();
-    mvn_compute_mean_within_channels_bfyx<FLOAT16>(output, true);
+    mvn_compute_mean_within_channels<FLOAT16>(output, true);
  }
+
+struct mvn_basic_test_params {
+    format::type input_format;
+    data_types input_type;
+    tensor input_size;
+    bool across_channels;
+    bool normalize_variance;
+    padding output_pad;
+};
+
+struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
+    template <typename T>
+    void fill_data(memory& mem, const tests::VVVVF<T>& data) {
+        auto size = mem.get_layout().size;
+        auto ptr = mem.pointer<T>();
+        for (size_t bi = 0; bi < static_cast<size_t>(size.batch[0]); ++bi) {
+            for (size_t fi = 0; fi < static_cast<size_t>(size.feature[0]); ++fi) {
+                for (size_t yi = 0; yi < static_cast<size_t>(size.spatial[1]); ++yi) {
+                    for (size_t xi = 0; xi < static_cast<size_t>(size.spatial[0]); ++xi) {
+                        auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
+                        auto offset = mem.get_layout().get_linear_offset(tensor_addr);
+                        ptr[offset] = data[bi][fi][xi][yi];
+                    }
+                }
+            }
+        }
+    }
+
+    template <typename T>
+    void fill_random_data(memory& mem, int min, int max, int k = 8) {
+        auto size = mem.get_layout().size;
+        auto input_data = tests::generate_random_4d<T>(size.batch[0], size.feature[0], size.spatial[0], size.spatial[1], min, max, k);
+        fill_data(mem, input_data);
+    }
+
+    void check_result(memory& output, bool across_channels, bool normalize_variance) {
+        if (output.get_layout().data_type == data_types::f32) {
+            if (across_channels) {
+                mvn_compute_mean_accross_channels<float>(output, normalize_variance);
+            } else {
+                mvn_compute_mean_within_channels<float>(output, normalize_variance);
+            }
+        } else if (output.get_layout().data_type == data_types::f16) {
+            if (across_channels) {
+                mvn_compute_mean_accross_channels<FLOAT16>(output, normalize_variance);
+            } else {
+                mvn_compute_mean_within_channels<FLOAT16>(output, normalize_variance);
+            }
+        }
+    }
+
+    void execute(const mvn_basic_test_params& params, const engine& eng) {
+        auto& size = params.input_size;
+        auto& output_pad = params.output_pad;
+
+        auto input = memory::allocate(eng, { params.input_type, params.input_format, size });
+
+        switch (params.input_type) {
+        case data_types::f32:
+            fill_random_data<float>(input, -127, 127);
+            break;
+        case data_types::f16:
+            fill_random_data<FLOAT16>(input, -127, 127);
+            break;
+        case data_types::i8:
+            fill_random_data<int8_t>(input, -127, 127);
+            break;
+        case data_types::u8:
+            fill_random_data<uint8_t>(input, -127, 127);
+            break;
+        default:
+            break;
+        }
+
+        topology topo;
+        topo.add(input_layout("input", input.get_layout()));
+        auto prim = mvn("mvn", "input", params.across_channels, params.normalize_variance);
+        prim.output_padding = output_pad;
+        topo.add(prim);
+
+        network net(eng, topo);
+
+        net.set_input_data("input", input);
+
+        auto outputs = net.execute();
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "mvn");
+
+        auto output = outputs.begin()->second.get_memory();
+        check_result(output, params.across_channels, params.normalize_variance);
+    }
+};
+
+TEST_P(mvn_random_test, random) {
+    auto eng = tests::get_test_engine();
+    this->execute(GetParam(), eng);
+}
+
+struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
+    mvn_test_case_generator& add(mvn_basic_test_params params) {
+        push_back(params);
+        return *this;
+    }
+
+    mvn_test_case_generator& smoke_tests(format::type fmt, data_types in_dt) {
+        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, false, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, true, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, false, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, true, padding() });
+        return *this;
+    }
+
+    mvn_test_case_generator& extended_tests(format::type fmt, data_types in_dt) {
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding() });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding() });
+        // output padding
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1}) });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1}) });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1}) });
+        push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1}) });
+
+        return *this;
+    }
+};
+
+INSTANTIATE_TEST_CASE_P(smoke,
+                        mvn_random_test,
+                        testing::ValuesIn(
+                            mvn_test_case_generator()
+                            .smoke_tests(format::b_fs_yx_fsv16, data_types::i8)
+                            .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)
+                        ), );
+
+INSTANTIATE_TEST_CASE_P(extended,
+                        mvn_random_test,
+                        testing::ValuesIn(
+                            mvn_test_case_generator()
+                            .extended_tests(format::b_fs_yx_fsv16, data_types::i8)
+                            .extended_tests(format::b_fs_yx_fsv16, data_types::u8)
+                        ), );
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp

index 5ab274c..6926224 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
@@ -1151,35 +1151,6 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse)
      EXPECT_FLOAT_EQ(out_ptr[1], -0.02f);
  }
  
-TEST(reorder_gpu_opt, basic_do_not_remove_redundant_due_it_is_output)
-{
-    engine eng;
-
-    memory in = memory::allocate(eng, { data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } });
-    memory weights = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } });
-    topology tpl{
-        input_layout("in", in.get_layout()),
-        convolution("conv", "in", { "weights" }),
-        data("weights", weights),
-        reorder("r1", "conv", format::bfyx, data_types::f32) //reoder is output - do not optimize
-    };
-
-    build_options opts;
-    opts.set_option(build_option::optimize_data(true));
-
-    network net(eng, tpl, opts);
-    net.set_input_data("in", in);
-    auto outputs = net.execute();
-    auto executed_primitives = net.get_executed_primitives();
-
-    //all pirmitives in this test needs to be executed
-    EXPECT_TRUE(executed_primitives.count("conv") == 1);
-    EXPECT_TRUE(executed_primitives.count("in") == 1);
-    EXPECT_TRUE(executed_primitives.count("r1") == 1);
-    ASSERT_TRUE(outputs.count("r1") == 1);
-    EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx);
-}
-
  TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders)
  {
      engine eng;
@@ -1915,6 +1886,98 @@ TEST(reorder_gpu, any_format) {
      }
  }
  
+TEST(reorder_image2d_rgba_to_bfyx_gpu, basic)
+{
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::u8, format::image_2d_rgba, { 1, 3, 2, 2 } });
+    layout output_layout(data_types::f16, format::bfyx, { 1, 3, 2, 2 });
+
+    set_values<unsigned char>(input, {
+        1, 0, 5, 7,
+        2, 111, 123, 8,
+        124, 125, 50, 9,
+        251, 252, 253, 210
+        });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        reorder("reorder", "input", output_layout));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "reorder");
+
+    auto output = outputs.begin()->second.get_memory();
+
+    float answers[12] = {
+        1.0f,  2.0f,
+        124.0f,  251.0f,
+
+        0.0f,  111.0f,
+        125.0f,  252.0f,
+
+        5.0f,  123.0f,
+        50.0f, 253.0f,
+    };
+
+    auto output_ptr = output.pointer<FLOAT16>();
+    for (int i = 0; i < 12; i++)
+    {
+        EXPECT_NEAR(FLOAT16(answers[i] / 255.f), output_ptr[i], 1e-3f);
+    }
+
+}
+
+TEST(reorder_bfyx_to_image2d_rgba_gpu, basic)
+{
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 3, 2, 2 } });
+    layout output_layout(data_types::u8, format::image_2d_rgba, { 1, 3, 2, 2 });
+
+    set_values<FLOAT16>(input, {
+        FLOAT16(1.0f / 255.f),  FLOAT16(2.0f / 255.f),
+        FLOAT16(124.0f / 255.f),  FLOAT16(251.0f / 255.f),
+
+        FLOAT16(0.0f / 255.f),  FLOAT16(111.0f / 255.f),
+        FLOAT16(125.0f / 255.f),  FLOAT16(252.0f / 255.f),
+
+        FLOAT16(5.0f / 255.f),  FLOAT16(123.0f / 255.f),
+        FLOAT16(50.0f / 255.f), FLOAT16(253.0f / 255.f),
+        });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        reorder("reorder", "input", output_layout));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "reorder");
+
+    auto output = outputs.begin()->second.get_memory();
+
+    unsigned char answers[16] = {
+        1, 0, 5, 0,
+        2, 111, 123, 0,
+        124, 125, 50, 0,
+        251, 252, 253, 0
+    };
+
+    auto output_ptr = output.pointer<unsigned char>();
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+
+}
+
  using namespace cldnn;
  
  class reorder_test : public tests::generic_test
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp

index 4fd9c0a..8e0b382 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
@@ -523,3 +523,165 @@ TEST(resample_gpu, bilinear_asymmetric) {
          }
      }
  }
+
+struct resample_random_test_params {
+    data_types input_type;
+    tensor input_size;
+    tensor output_size;
+    uint32_t num_filter;
+    resample_type operation_type;
+    format::type in_format;
+    format::type out_format;
+};
+
+struct resample_random_test : testing::TestWithParam<resample_random_test_params>{
+    template <typename T>
+    void fill_random_typed(memory& mem, int min, int max) {
+        auto size = mem.get_layout().size;
+        size_t b = size.batch[0];
+        size_t f = size.feature[0];
+        size_t x = size.spatial[0];
+        size_t y = size.spatial[1];
+
+        auto data = generate_random_4d<T>(b, f, y, x, min, max);
+        auto ptr = mem.pointer<T>();
+        for (size_t bi = 0; bi < b; ++bi) {
+            for (size_t fi = 0; fi < f; ++fi) {
+                for (size_t yi = 0; yi < y; ++yi) {
+                    for (size_t xi = 0; xi < x; ++xi) {
+                        auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
+                        auto offset = mem.get_layout().get_linear_offset(coords);
+                        ptr[offset] = data[bi][fi][yi][xi];
+                    }
+                }
+            }
+        }
+    }
+
+    void fill_random(memory& mem) {
+        auto dt = mem.get_layout().data_type;
+        switch (dt) {
+        case data_types::f32:
+            fill_random_typed<float>(mem, -127, 127);
+            break;
+        case data_types::f16:
+            fill_random_typed<FLOAT16>(mem, -127, 127);
+            break;
+        case data_types::i8:
+            fill_random_typed<int8_t>(mem, -127, 127);
+            break;
+        case data_types::u8:
+            fill_random_typed<uint8_t>(mem, 0, 255);
+            break;
+        default:
+            break;
+        }
+    }
+
+    template <typename T>
+    void compare_nearest_typed(const memory& input, const memory& output) {
+        auto output_lay = output.get_layout();
+        size_t b = output_lay.size.batch[0];
+        size_t f = output_lay.size.feature[0];
+        size_t x = output_lay.size.spatial[0];
+        size_t y = output_lay.size.spatial[1];
+        float x_ratio = static_cast<float>(input.get_layout().size.spatial[0]) / static_cast<float>(x);
+        float y_ratio = static_cast<float>(input.get_layout().size.spatial[1]) / static_cast<float>(y);
+
+        auto in_ptr = input.pointer<T>();
+        auto out_ptr = output.pointer<T>();
+        for (size_t bi = 0; bi < b; ++bi) {
+            for (size_t fi = 0; fi < f; ++fi) {
+                for (size_t yi = 0; yi < y; ++yi) {
+                    for (size_t xi = 0; xi < x; ++xi) {
+                        auto in_xi = static_cast<size_t>(floor(x_ratio * xi));
+                        auto in_yi = static_cast<size_t>(floor(y_ratio * yi));
+                        auto in_coords = tensor(batch(bi), feature(fi), spatial(in_xi, in_yi, 0, 0));
+                        auto in_offset = input.get_layout().get_linear_offset(in_coords);
+                        auto in_val = in_ptr[in_offset];
+                        auto out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0));
+                        auto out_offset = output.get_layout().get_linear_offset(out_coords);
+                        auto out_val = out_ptr[out_offset];
+                        EXPECT_EQ(in_val, out_val) << " at bi=" << bi << ", fi=" << fi << ", xi=" << xi << ", yi=" << yi;
+                    }
+                }
+            }
+        }
+    }
+
+    void compare(const memory& input, const memory& output, resample_type operation) {
+        auto dt = output.get_layout().data_type;
+        if (operation == resample_type::nearest) {
+            if (dt == data_types::f32) {
+                compare_nearest_typed<float>(input, output);
+            } else if (dt == data_types::f16) {
+                compare_nearest_typed<FLOAT16>(input, output);
+            } else if (dt == data_types::i8) {
+                compare_nearest_typed<int8_t>(input, output);
+            } else if (dt == data_types::u8) {
+                compare_nearest_typed<uint8_t>(input, output);
+            } else {
+                FAIL() << "Not supported data type: " << static_cast<size_t>(dt);
+            }
+        } else {
+            FAIL() << "Not supported resample_type: " << static_cast<int32_t>(operation);
+        }
+    }
+
+    void execute(const resample_random_test_params& params) {
+        auto eng = get_test_engine();
+
+        auto in_layout = layout(params.input_type, params.in_format, params.input_size);
+
+        auto topo = topology(
+            input_layout("in", in_layout),
+            resample("resample", "in", params.output_size, params.num_filter, params.operation_type)
+        );
+
+        auto build_opts = build_options(
+            build_option::force_implementations({ {"resample", {params.out_format, ""}} })
+        );
+        auto net = network(eng, topo, build_opts);
+
+        auto in_mem = memory::allocate(eng, in_layout);
+        fill_random(in_mem);
+        net.set_input_data("in", in_mem);
+
+        auto result = net.execute();
+        auto output = result.at("resample").get_memory();
+
+        compare(in_mem, output, params.operation_type);
+    }
+};
+
+TEST_P(resample_random_test, random) {
+    execute(GetParam());
+}
+
+struct resample_random_test_param_generator : std::vector<resample_random_test_params> {
+    resample_random_test_param_generator& add(resample_random_test_params params) {
+        push_back(params);
+        return *this;
+    }
+
+    resample_random_test_param_generator& smoke_params(data_types type, format::type input_format, format::type output_format) {
+        push_back(resample_random_test_params{ type, {1, 17, 5, 9}, {1, 17, 15, 18}, 1, resample_type::nearest, input_format, output_format });
+        push_back(resample_random_test_params{ type, {2, 17, 5, 9}, {2, 17, 15, 18}, 1, resample_type::nearest, input_format, output_format });
+        push_back(resample_random_test_params{ type, {1, 7, 10, 17}, {1, 7, 21, 35}, 1, resample_type::nearest, input_format, output_format });
+        push_back(resample_random_test_params{ type, {2, 7, 10, 17}, {2, 7, 21, 35}, 1, resample_type::nearest, input_format, output_format });
+        return *this;
+    }
+
+};
+
+INSTANTIATE_TEST_CASE_P(smoke,
+                        resample_random_test,
+                        testing::ValuesIn(
+                            resample_random_test_param_generator()
+                            .smoke_params(data_types::i8, format::byxf_af32, format::byxf_af32)
+                            .smoke_params(data_types::u8, format::byxf_af32, format::byxf_af32)
+                            .smoke_params(data_types::i8, format::b_fs_yx_fsv4, format::b_fs_yx_fsv4)
+                            .smoke_params(data_types::u8, format::b_fs_yx_fsv4, format::b_fs_yx_fsv4)
+                            .smoke_params(data_types::i8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+                            .smoke_params(data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16)
+                        ), );
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp

index d199df4..985162c 100644 (file)
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp
@@ -614,3 +614,113 @@ TEST(strided_slice_gpu_f32, test_2x2x2x1x1_2) {
          EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
      }
  }
+
+TEST(strided_slice_gpu_f32, test_2x2x2x2_full_negative_stride) {
+    // Input (BFYX): 2x2x2x2
+    // Begin (BFYX): 0x0x0x0
+    // End (BFYX): 2x2x2x2
+    // Stride (BFYX): -1x1x1x1
+    // Output (BFYX): 2x2x2x2
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    });
+    set_values(begin, {
+            0, 0, 0, 0
+    });
+    set_values(end, {
+            2, 2, 2, 2
+    });
+    set_values(strides, {
+            -1, -1, 1, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            12.f, 13.f, 14.f, 15.f, 8.f, 9.f, 10.f, 11.f, 4.f, 5.f, 6.f, 7.f, 0.f, 1.f, 2.f, 3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    ASSERT_EQ(output_ptr.size(), answers.size());
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x2x1x1_2_negative_all) {
+    // Input (BFZYX): 2x2x2x1x1
+    // Output (BFZYX): 2x1x1x1x1
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
+    });
+    set_values(begin, {
+            0, 0, 0
+    });
+    set_values(end, {
+            2, 2, 2
+    });
+    set_values(strides, {
+            1, 2, 2
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 4.0f
+    };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp

index fd5214c..d7be15a 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp
@@ -185,27 +185,35 @@ static const pd_create_f cpu_impl_list[] = {
      INSTANCE(ref_convolution_bwd_data_t<f32, f32, f32, f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_convolution_bwd_weights_t<f32, f32, f32, f32>),
+#endif
      /* conv (bfloat16) */
      INSTANCE(_jit_uni_dw_convolution_fwd_t<avx512_core, bf16, bf16>),
      INSTANCE(_jit_uni_dw_convolution_fwd_t<avx512_core, bf16, f32>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(_jit_uni_dw_convolution_bwd_data_t<avx512_core, bf16, bf16>),
      INSTANCE(_jit_uni_dw_convolution_bwd_data_t<avx512_core, bf16, f32>),
      INSTANCE(_jit_uni_dw_convolution_bwd_weights_t<avx512_core, bf16, bf16>),
      INSTANCE(_jit_uni_dw_convolution_bwd_weights_t<avx512_core, bf16, f32>),
+#endif
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_fwd_t<f32>),
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_bwd_data_t<f32>),
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_bwd_data_t<bf16>),
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_bwd_weights_t<f32>),
      INSTANCE(jit_avx512_core_bf16_1x1_convolution_bwd_weights_t<bf16>),
+#endif
      INSTANCE(jit_avx512_core_bf16_convolution_fwd_t<f32>),
      INSTANCE(jit_avx512_core_bf16_convolution_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_avx512_core_bf16_convolution_bwd_data_t<f32>),
      INSTANCE(jit_avx512_core_bf16_convolution_bwd_data_t<bf16>),
      INSTANCE(jit_avx512_core_bf16_convolution_bwd_weights_t<bf16>),
      INSTANCE(jit_avx512_core_bf16_convolution_bwd_weights_t<f32>),
+#endif
      INSTANCE(gemm_bf16_convolution_fwd_t<f32>),
      INSTANCE(gemm_bf16_convolution_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(gemm_bf16_convolution_bwd_data_t<f32>),
      INSTANCE(gemm_bf16_convolution_bwd_data_t<bf16>),
      INSTANCE(gemm_bf16_convolution_bwd_weights_t<f32>),
@@ -314,8 +322,8 @@ static const pd_create_f cpu_impl_list[] = {
  #endif
      /* eltwise */
      INSTANCE(jit_uni_eltwise_fwd_t<avx512_common, f32>),
-#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_uni_eltwise_fwd_t<avx512_common, bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_uni_eltwise_bwd_t<avx512_common, f32>),
      INSTANCE(jit_uni_eltwise_bwd_t<avx512_common, bf16>),
  #endif
@@ -329,8 +337,8 @@ static const pd_create_f cpu_impl_list[] = {
  #endif
  
      INSTANCE(ref_eltwise_fwd_t<f32>),
-#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_eltwise_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_eltwise_bwd_t<f32>),
      INSTANCE(ref_eltwise_bwd_t<bf16>),
  #endif
@@ -358,8 +366,10 @@ static const pd_create_f cpu_impl_list[] = {
      INSTANCE(ref_softmax_fwd_t<f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_softmax_bwd_t<f32>),
+#endif
      /* pool */
      INSTANCE(jit_uni_pooling_fwd_t<avx512_common, bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_uni_pooling_bwd_t<avx512_common, bf16>),
  #endif
      INSTANCE(jit_uni_pooling_fwd_t<avx512_common, f32>),
@@ -373,14 +383,17 @@ static const pd_create_f cpu_impl_list[] = {
      INSTANCE(jit_uni_pooling_fwd_t<sse42, f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_uni_pooling_bwd_t<sse42, f32>),
+#endif
      INSTANCE(nchw_pooling_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(nchw_pooling_bwd_t<bf16>),
  #endif
      INSTANCE(nchw_pooling_fwd_t<f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(nchw_pooling_bwd_t<f32>),
-
+#endif
      INSTANCE(nhwc_pooling_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(nhwc_pooling_bwd_t<bf16>),
  #endif
      INSTANCE(nhwc_pooling_fwd_t<f32>),
@@ -389,8 +402,8 @@ static const pd_create_f cpu_impl_list[] = {
  #endif
  
      INSTANCE(ref_pooling_fwd_t<f32, f32>),
-#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_pooling_fwd_t<bf16, bf16, f32>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_pooling_bwd_t<f32, f32>),
      INSTANCE(ref_pooling_bwd_t<bf16, bf16>),
  #endif
@@ -414,7 +427,9 @@ static const pd_create_f cpu_impl_list[] = {
      INSTANCE(jit_avx512_common_lrn_fwd_t<f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_avx512_common_lrn_bwd_t<f32>),
+#endif
      INSTANCE(jit_avx512_common_lrn_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(jit_avx512_common_lrn_bwd_t<bf16>),
  #endif
      INSTANCE(jit_uni_lrn_fwd_t<avx2>),
@@ -425,7 +440,9 @@ static const pd_create_f cpu_impl_list[] = {
      INSTANCE(ref_lrn_fwd_t<f32>),
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_lrn_bwd_t<f32>),
+#endif
      INSTANCE(ref_lrn_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_lrn_bwd_t<bf16>),
  #endif
      /* batch normalization */
@@ -475,9 +492,11 @@ static const pd_create_f cpu_impl_list[] = {
  #ifdef ENABLE_UNUSED_PRIM
      INSTANCE(ref_inner_product_bwd_data_t<f32, f32, f32, f32>),
      INSTANCE(ref_inner_product_bwd_weights_t<f32>),
+#endif
      /* inner product (bfloat16) */
      INSTANCE(gemm_bf16_inner_product_fwd_t<f32>),
      INSTANCE(gemm_bf16_inner_product_fwd_t<bf16>),
+#ifdef ENABLE_UNUSED_PRIM
      INSTANCE(gemm_bf16_inner_product_bwd_data_t<f32>),
      INSTANCE(gemm_bf16_inner_product_bwd_data_t<bf16>),
      INSTANCE(gemm_bf16_inner_product_bwd_weights_t<f32>),
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp

index 4652f0c..6ab2965 100644 (file)
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_bf16_convolution.cpp
@@ -173,8 +173,8 @@ void _jit_avx512_core_bf16_convolution_fwd_t<dst_type>::execute_forward_2d()
          balance211(work_amount, nthr, ithr, start, end);
          auto par_conv = jit_conv_call_s();
  
-        size_t src_h_stride = src_d.blk_off(0, 0, 1);
-        size_t dst_h_stride = dst_d.blk_off(0, 0, 1);
+        size_t src_h_stride = src_d.blk_off(0, 0, 1) - src_d.off_l(0);
+        size_t dst_h_stride = dst_d.blk_off(0, 0, 1) - dst_d.off_l(0);
          size_t wht_h_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
  
          int n{0}, g{0}, occ{0}, oh_s{0}, owb{0};
@@ -279,9 +279,9 @@ void _jit_avx512_core_bf16_convolution_fwd_t<dst_type>::execute_forward_3d()
          balance211(work_amount, nthr, ithr, start, end);
          auto par_conv = jit_conv_call_s();
  
-        size_t src_d_stride = src_d.blk_off(0, 0, 1);
-        size_t src_h_stride = src_d.blk_off(0, 0, 0, 1);
-        size_t dst_h_stride = dst_d.blk_off(0, 0, 0, 1);
+        size_t src_d_stride = src_d.blk_off(0, 0, 1) - src_d.off_l(0);
+        size_t src_h_stride = src_d.blk_off(0, 0, 0, 1) - src_d.off_l(0);
+        size_t dst_h_stride = dst_d.blk_off(0, 0, 0, 1) - dst_d.off_l(0);
          size_t wht_d_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
          size_t wht_h_stride = wht_blk_off(weights_d, 0, 0, 0, 0, 1);
  
@@ -447,7 +447,7 @@ void _jit_avx512_core_bf16_convolution_bwd_data_t<diff_src_type>
              }
              assert(kd_len >= 0);
  
-            auto diff_src_w = diff_src + 
+            auto diff_src_w = diff_src +
                  diff_src_d.blk_off(n, g_icb, id_s);
              auto diff_dst_w = diff_dst + diff_dst_d.blk_off(n, g_ocb, od_s);
              auto wht_w = weights + wht_blk_off(weights_d, g, 0, icb, kd_lo);
@@ -538,8 +538,8 @@ void _jit_avx512_core_bf16_convolution_bwd_data_t<diff_src_type>
          balance211(work_amount, nthr, ithr, start, end);
  
          auto par_conv = jit_conv_call_s();
-        size_t diff_src_h_stride = diff_src_d.blk_off(0, 0, 1);
-        size_t diff_dst_h_stride = diff_dst_d.blk_off(0, 0, 1);
+        size_t diff_src_h_stride = diff_src_d.blk_off(0, 0, 1) - diff_src_d.off_l(0);
+        size_t diff_dst_h_stride = diff_dst_d.blk_off(0, 0, 1) - diff_dst_d.off_l(0);
          size_t wht_h_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
  
          bool is_fast_path = jcp.dilate_h == 0 && jcp.stride_h == 1;
@@ -562,9 +562,9 @@ void _jit_avx512_core_bf16_convolution_bwd_data_t<diff_src_type>
              int work_rem = end - start;
              int ih_e = ih_s + work_rem > jcp.ih ? jcp.ih : ih_s + work_rem;
  
-            auto diff_src_w = diff_src + 
+            auto diff_src_w = diff_src +
                  diff_src_d.blk_off(n, g_icb);
-            auto diff_dst_w = diff_dst + 
+            auto diff_dst_w = diff_dst +
                  diff_dst_d.blk_off(n, g_ocb);
              auto wht_w = weights + wht_blk_off(weights_d, g, 0, icb);
  
diff --git a/inference-engine/thirdparty/movidius/XLink/pc/Win/include/win_synchapi.h b/inference-engine/thirdparty/movidius/XLink/pc/Win/include/win_synchapi.h

new file mode 100644 (file)

index 0000000..32584a3
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/XLink/pc/Win/include/win_synchapi.h
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef WIN_SYNCHAPI
+#define WIN_SYNCHAPI
+
+#include "win_pthread.h"
+#include "synchapi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _pthread_condattr_t pthread_condattr_t;
+
+typedef struct
+{
+    CONDITION_VARIABLE _cv;
+}
+pthread_cond_t;
+
+int pthread_cond_init(pthread_cond_t* __cond, const pthread_condattr_t* __cond_attr);
+int pthread_cond_destroy(pthread_cond_t* __cond);
+
+int pthread_cond_timedwait(pthread_cond_t* __cond,
+    pthread_mutex_t* __mutex,
+    const struct timespec* __abstime);
+int pthread_cond_broadcast(pthread_cond_t* __cond);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* WIN_MUTEX */
diff --git a/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c b/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c

new file mode 100644 (file)

index 0000000..26bd365
--- /dev/null
+++ b/inference-engine/thirdparty/movidius/XLink/pc/Win/src/win_synchapi.c
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "win_synchapi.h"
+
+int pthread_cond_init(pthread_cond_t* __cond, const pthread_condattr_t* __cond_attr)
+{
+    if (__cond == NULL) {
+        return ERROR_INVALID_HANDLE;
+    }
+
+    (void)__cond_attr;
+    InitializeConditionVariable(&__cond->_cv);
+    return 0;
+}
+
+int pthread_cond_destroy(pthread_cond_t* __cond)
+{
+    (void)__cond;
+    return 0;
+}
+
+int pthread_cond_timedwait(pthread_cond_t* __cond,
+    pthread_mutex_t* __mutex,
+    const struct timespec* __abstime) 
+{
+    if (__cond == NULL) {
+        return ERROR_INVALID_HANDLE;
+    }
+
+    long long msec = INFINITE;
+    if (__abstime != NULL) {
+        msec = __abstime->tv_sec * 1000 + __abstime->tv_nsec / 1000000;
+    }
+
+    return SleepConditionVariableCS(&__cond->_cv, __mutex, (DWORD)msec);
+}
+
+int pthread_cond_broadcast(pthread_cond_t *__cond)
+{
+    if (__cond == NULL) {
+        return ERROR_INVALID_HANDLE;
+    }
+
+    WakeConditionVariable(&__cond->_cv);
+    return 0;
+}
diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp

index 8f439e6..0c5e91b 100644 (file)
--- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp
+++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp
@@ -13,7 +13,7 @@ namespace Watchdog {
   */
  class IDevice {
   public:
-    using time_point = std::chrono::high_resolution_clock::time_point;
+    using time_point = std::chrono::steady_clock::time_point;
  
      virtual ~IDevice() = default;
  
diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp

index 10faaf8..6e022c6 100644 (file)
--- a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
+++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp
@@ -12,6 +12,7 @@
  #include <watchdogPrivate.hpp>
  #include <algorithm>
  #include <memory>
+#include <string>
  #include <ncCommPrivate.h>
  #include <mvnc.h>
  #include <ncPrivateTypes.h>
@@ -23,6 +24,10 @@
  #include "XLinkPrivateDefines.h"
  #include "XLinkErrorUtils.h"
  
+#if defined(_WIN32)
+#include "win_synchapi.h"
+#endif // defined(_WIN32)
+
  namespace {
  
  using namespace std;
@@ -34,7 +39,7 @@ using namespace Watchdog;
   */
  class XLinkDevice : public IDevice {
      _devicePrivate_t privateDevice;
-    using time_point = std::chrono::high_resolution_clock::time_point;
+    using time_point = std::chrono::steady_clock::time_point;
      time_point lastPongTime = time_point::min();
      time_point lastPingTime = time_point::min();
      enum : int { deviceHangTimeout = 12000};
@@ -162,13 +167,6 @@ struct wd_context_opaque {
  };
  
  class WatchdogImpl {
-    enum : uint8_t {
-        STATE_IDLE = 0,
-        INITIATE_THREAD_STOP = 1,
-        THREAD_EXITED = 2,
-        WAKE_UP_THREAD = 3,
-    };
-
      using wd_context_as_tuple = std::tuple<std::shared_ptr<IDevice>, bool*, void*>;
  
      using Devices = std::list<wd_context_as_tuple>;
@@ -176,35 +174,82 @@ class WatchdogImpl {
      std::mutex devicesListAcc;
      std::atomic<int> generation = {0};
      std::atomic_bool threadRunning;
-    volatile std::uint8_t notificationReason = STATE_IDLE;
-    std::condition_variable wakeUpPingThread;
  
+    pthread_mutex_t routineLock;
+    pthread_cond_t  wakeUpPingThread;
      std::thread poolThread;
  
-    WatchdogImpl() = default;
      WatchdogImpl(const WatchdogImpl&) = delete;
      WatchdogImpl(WatchdogImpl&&) = delete;
      WatchdogImpl& operator = (const WatchdogImpl&) = delete;
      WatchdogImpl& operator = (WatchdogImpl&&) = delete;
- public:
+
+private:
+
+    WatchdogImpl() {
+        int rc = pthread_mutex_init(&routineLock, NULL);
+        if (rc != 0) {
+            throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc));
+        }
+
+#if !(defined(__APPLE__) || defined(_WIN32))
+        pthread_condattr_t attr;
+        rc = pthread_condattr_init(&attr);
+        if (rc != 0) {
+            throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc));
+        }
+
+        rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+        if (rc != 0) {
+            throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc));
+        }
+#endif // !(defined(__APPLE__) || defined(_WIN32))
+
+        rc = pthread_cond_init(&wakeUpPingThread, NULL);
+        if (rc != 0) {
+            throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc));
+        }
+    }
+
+public:
  
      static WatchdogImpl &instance() {
          static WatchdogImpl watchdog;
          return watchdog;
      }
  
+
      ~WatchdogImpl() {
          mvLog(MVLOG_INFO, "watchdog terminated\n");
+        try
          {
-            auto __lock = lock();
+            lockRoutineMutex();
              for (auto &item : watchedDevices) {
                  *std::get<1>(item) = true;
                  mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", std::get<2>(item));
              }
-            notificationReason = THREAD_EXITED;
+            unlockRoutineMutex();
+        } catch (const std::exception & ex) {
+            mvLog(MVLOG_ERROR, "error %s", ex.what());
+        } catch (...) {
+            mvLog(MVLOG_ERROR, "unknown error");
          }
  
-        wakeUpPingThread.notify_one();
+        threadRunning = false;
+        int rc = pthread_cond_broadcast(&wakeUpPingThread);
+        if (rc != 0) {
+            mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+        }
+
+        rc = pthread_mutex_destroy(&routineLock);
+        if (rc != 0) {
+            mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc);
+        }
+
+        rc = pthread_cond_destroy(&wakeUpPingThread);
+        if (rc != 0) {
+            mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc);
+        }
  
          if (poolThread.joinable()) {
              poolThread.join();
@@ -213,7 +258,7 @@ class WatchdogImpl {
  
  public:
      void *register_device(std::shared_ptr<IDevice> device) {
-        auto __locker = lock();
+        lockRoutineMutex();
          std::unique_ptr<wd_context_opaque> ctx (new wd_context_opaque);
  
          // rare case of exact pointer address collision
@@ -240,8 +285,10 @@ public:
              });
          } else {
              // wake up thread
-            notificationReason = WAKE_UP_THREAD;
-            wakeUpPingThread.notify_one();
+            int rc = pthread_cond_broadcast(&wakeUpPingThread);
+            if (rc != 0) {
+                mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+            }
          }
  
          ctx->handleCached = device->getHandle();
@@ -249,6 +296,7 @@ public:
  
          ctx->actual = std::get<0>(watchedDevices.back()).get();
  
+        unlockRoutineMutex();
          return ctx.release();
      }
  
@@ -262,11 +310,12 @@ public:
          if (ptr == nullptr) {
              return false;
          }
-        auto __locker = lock();
+        lockRoutineMutex();
  
          // thread already removed
          if (ptr->destroyed) {
              delete ptr;
+            unlockRoutineMutex();
              return true;
          }
  
@@ -282,16 +331,28 @@ public:
          }
  
          // wake up thread since we might select removed device as nex to be ping, and there is no more devices available
-        notificationReason = WAKE_UP_THREAD;
-        __locker.unlock();
-        wakeUpPingThread.notify_one();
+        unlockRoutineMutex();
+        int rc = pthread_cond_broadcast(&wakeUpPingThread);
+        if (rc != 0) {
+            mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc);
+        }
  
          return bFound;
      }
  
   private:
-    std::unique_lock<std::mutex> lock() {
-        return std::unique_lock<std::mutex>(devicesListAcc);
+    void lockRoutineMutex() {
+        int rc = pthread_mutex_lock(&routineLock);
+        if (rc != 0) {
+            throw std::runtime_error("failed to lock \"routineLock\" mutex. rc: " + std::to_string(rc));
+        }
+    }
+
+    void unlockRoutineMutex() {
+        int rc = pthread_mutex_unlock(&routineLock);
+        if (rc != 0) {
+            throw std::runtime_error("failed to unlock \"routineLock\" mutex. rc: " + std::to_string(rc));
+        }
      }
  
      void watchdog_routine() noexcept {
@@ -299,14 +360,16 @@ public:
              mvLog(MVLOG_INFO, "thread started\n");
  
              milliseconds sleepInterval;
-            auto __locker = lock();
+            struct timespec timeToWait = {0, 0};
+            lockRoutineMutex();
+
              do {
                  for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end(); ) {
                      auto &device = std::get<0>(*deviceIt);
-                    auto isReady = device->dueIn(high_resolution_clock::now()).count() == 0;
+                    auto isReady = device->dueIn(steady_clock::now()).count() == 0;
                      if (isReady) {
                          auto now = high_resolution_clock::now();
-                        device->keepAlive(high_resolution_clock::now());
+                        device->keepAlive(steady_clock::now());
                          mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", duration_cast<std::chrono::milliseconds>(high_resolution_clock ::now()-now).count());
                      }
                      if (device->isTimeout()) {
@@ -319,7 +382,7 @@ public:
                          ++deviceIt;
                      }
                  }
-                auto currentTime = high_resolution_clock::now();
+                auto currentTime = steady_clock::now();
                  auto minInterval = std::min_element(watchedDevices.begin(),
                                                      watchedDevices.end(),
                                                      [&currentTime] (const Devices::value_type & device1, const Devices::value_type & device2) {
@@ -336,26 +399,39 @@ public:
                  sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime);
                  mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count());
  
-                notificationReason = STATE_IDLE;
+                auto sec = std::chrono::duration_cast<std::chrono::seconds>(sleepInterval);
  
-                wakeUpPingThread.wait_until(__locker, currentTime + sleepInterval, [this, currentTime]() {
-                    mvLog(MVLOG_DEBUG,
-                          "waiting for %ld ms\n",
-                          duration_cast<std::chrono::milliseconds>(high_resolution_clock::now() - currentTime).count());
-                    return notificationReason != STATE_IDLE;
-                });
+#if (defined(__APPLE__) || defined(_WIN32))
+                timeToWait.tv_sec = sec.count();
+                timeToWait.tv_nsec =
+                    std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+                    std::chrono::nanoseconds(sec).count();
+#else
+                clock_gettime(CLOCK_MONOTONIC, &timeToWait);
+                timeToWait.tv_sec += sec.count();
+                timeToWait.tv_nsec +=
+                    std::chrono::duration_cast<std::chrono::nanoseconds>(sleepInterval).count() -
+                    std::chrono::nanoseconds(sec).count();
+#endif // (defined(__APPLE__) || defined(_WIN32))
+
+#if defined(__APPLE__)
+                pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait);
+#else
+                pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait);
+#endif // defined(__APPLE__)
  
-                mvLog(MVLOG_DEBUG, "waiting completed in  %ld ms\n",
-                      duration_cast<std::chrono::milliseconds>(high_resolution_clock ::now() - currentTime).count());
-            } while (notificationReason != THREAD_EXITED);
  
+                mvLog(MVLOG_DEBUG, "waiting completed in  %ld ms\n",
+                      duration_cast<std::chrono::milliseconds>(steady_clock::now() - currentTime).count());
+            } while (threadRunning);
          } catch (const std::exception & ex) {
-            mvLog(MVLOG_ERROR, "error %s\n", ex.what());
+            mvLog(MVLOG_ERROR, "error %s", ex.what());
          } catch (...) {
-            mvLog(MVLOG_ERROR, "error\n");
+            mvLog(MVLOG_ERROR, "unknown error");
          }
+
+        unlockRoutineMutex();
          mvLog(MVLOG_INFO, "thread ended\n");
-        threadRunning = false;
      }
  };
  
@@ -440,25 +516,33 @@ WD_API wd_error_t watchdog_register_device(wd_context * ctx, devicePrivate_t *de
  }
  
  WD_API wd_error_t watchdog_unregister_device(wd_context *ctx) {
-    if (ctx == nullptr || ctx->opaque == nullptr) {
-        return WD_NOTINITIALIZED;
-    } else {
-        if (ctx->opaque != WD_OPAQUE_MAGIC) {
-            auto watchee = reinterpret_cast<wd_context_opaque*>(ctx->opaque);
-            // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
-            if (watchee->magic == WD_OPAQUE_MAGIC) {
-                if (!WatchdogImpl::instance().remove_device(ctx->opaque)) {
-                    mvLog(MVLOG_WARN, "cannot remove device\n");
-                    return WD_FAIL;
+    try {
+        if (ctx == nullptr || ctx->opaque == nullptr) {
+            return WD_NOTINITIALIZED;
+        } else {
+            if (ctx->opaque != WD_OPAQUE_MAGIC) {
+                auto watchee = reinterpret_cast<wd_context_opaque *>(ctx->opaque);
+                // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor
+                if (watchee->magic == WD_OPAQUE_MAGIC) {
+                    if (!WatchdogImpl::instance().remove_device(ctx->opaque)) {
+                        mvLog(MVLOG_WARN, "cannot remove device\n");
+                        return WD_FAIL;
+                    }
                  }
              }
          }
-    }
  
-    if (ctx != nullptr) {
-        // opaque pointer deleted
-        ctx->opaque = nullptr;
+        if (ctx != nullptr) {
+            // opaque pointer deleted
+            ctx->opaque = nullptr;
+        }
+
+        return WD_ERRNO;
+    } catch (const std::exception & ex) {
+        mvLog(MVLOG_ERROR, "error %s", ex.what());
+    } catch (...) {
+        mvLog(MVLOG_ERROR, "unknown error");
      }
  
-    return WD_ERRNO;
+    return WD_FAIL;
  }
diff --git a/inference-engine/tools/benchmark_tool/README.md b/inference-engine/tools/benchmark_tool/README.md

index edec297..4bfed66 100644 (file)
--- a/inference-engine/tools/benchmark_tool/README.md
+++ b/inference-engine/tools/benchmark_tool/README.md
@@ -195,4 +195,4 @@ Below are fragments of sample output for CPU and FPGA devices:
  ## See Also
  * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
  * [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](./tools/downloader/README.md)
-\ No newline at end of file
+* [Model Downloader](./tools/downloader/README.md)
diff --git a/inference-engine/tools/compile_tool/CMakeLists.txt b/inference-engine/tools/compile_tool/CMakeLists.txt

index 25168ba..55247b6 100644 (file)
--- a/inference-engine/tools/compile_tool/CMakeLists.txt
+++ b/inference-engine/tools/compile_tool/CMakeLists.txt
@@ -48,5 +48,5 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
  # install
  
  install(TARGETS compile_tool
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH}
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH}
          COMPONENT core)
diff --git a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt b/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt

index 798b3b9..072db1b 100644 (file)
--- a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
+++ b/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
@@ -49,5 +49,5 @@ add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
  # install
  
  install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH}
+        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH}
          COMPONENT myriad)
 \ No newline at end of file
diff --git a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt

index e2dc3b9..bb8ba29 100644 (file)
--- a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
+++ b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
@@ -54,6 +54,6 @@ if(ENABLE_MYRIAD)
      add_perfcheck_target(myriad_perfcheck myriadPlugin)
  
      install(TARGETS myriad_perfcheck
-            RUNTIME DESTINATION ${IE_CPACK_LIBRARY_PATH}
+            RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH}
              COMPONENT myriad)
  endif()
diff --git a/model-optimizer/automation/create_package.py b/model-optimizer/automation/create_package.py

new file mode 100644 (file)

index 0000000..2112e5a
--- /dev/null
+++ b/model-optimizer/automation/create_package.py
@@ -0,0 +1,19 @@
+import argparse
+import os
+from shutil import rmtree
+
+from utils import Automation
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--build_number", type=str, help="Build number to be added to package version", default="0", )
+args = parser.parse_args()
+
+auto = Automation()
+base_dir = os.path.dirname(__file__)
+bom_path = os.path.join(base_dir, "package_BOM.txt")
+bom = auto.parse_bom(bom_path=bom_path)
+dir_to_tar = auto.copy_files_from_bom(root_path=os.path.join(os.path.dirname(__file__), ".."), bom=bom)
+auto.add_version_txt(dst_path=dir_to_tar, build_number=args.build_number)
+
+auto.make_tarfile(out_file_name="mo_for_tf_{0}.tar.gz".format(args.build_number), source_dir=dir_to_tar)
+rmtree(dir_to_tar)
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt

new file mode 100644 (file)

index 0000000..7369c75
--- /dev/null
+++ b/model-optimizer/automation/package_BOM.txt
@@ -0,0 +1,963 @@
+extensions/__init__.py
+extensions/analysis/__init__.py
+extensions/analysis/boolean_input.py
+extensions/analysis/inputs.py
+extensions/analysis/json_print.py
+extensions/analysis/nodes.py
+extensions/analysis/tf_od_api.py
+extensions/analysis/tf_retinanet.py
+extensions/analysis/tf_yolo.py
+extensions/back/__init__.py
+extensions/back/ActivationsNormalizer.py
+extensions/back/AvgPool.py
+extensions/back/blob_normalizer.py
+extensions/back/compress_quantized_weights.py
+extensions/back/ConvolutionNormalizer.py
+extensions/back/CropToStridedSlice.py
+extensions/back/CutMemory.py
+extensions/back/disable_unsupported_ND_operations.py
+extensions/back/DumpFakeQuantStat.py
+extensions/back/ElementwiseOpsToEltwiseOps.py
+extensions/back/EnableConstantStridedSlice.py
+extensions/back/ForceStrictPrecision.py
+extensions/back/fuse_sub_div_min.py
+extensions/back/FuseTransposesSequence.py
+extensions/back/GatherNormalizer.py
+extensions/back/GroupedConvWeightsNormalize.py
+extensions/back/I64ToI32.py
+extensions/back/insert_compatibility_l2normalization.py
+extensions/back/InterpolateToInterpOrResample.py
+extensions/back/kaldi_remove_memory_output.py
+extensions/back/LeakyReLUMutation.py
+extensions/back/LeakyReluToReluWithNegativeSlope.py
+extensions/back/LRNToNorm.py
+extensions/back/LSTMCellNormalizer.py
+extensions/back/MatMulNormalizer.py
+extensions/back/MaxPool.py
+extensions/back/NonMaximumSuppressionNormalize.py
+extensions/back/NormalizeToNormalizeL2.py
+extensions/back/OneHotNormalizer.py
+extensions/back/op_versioning.py
+extensions/back/OptimizeTransposeReshapeSequence.py
+extensions/back/PackBinaryWeights.py
+extensions/back/PadToV7.py
+extensions/back/ParameterToPlaceholder.py
+extensions/back/pass_separator.py
+extensions/back/priorbox_mutation.py
+extensions/back/ProposalMutation.py
+extensions/back/ReduceToPooling.py
+extensions/back/ReduceTransposeDimensions.py
+extensions/back/remove_last_softmax_pattern.py
+extensions/back/RemoveUselessConvert.py
+extensions/back/Reshape0DToSqueeze.py
+extensions/back/ReshapeMutation.py
+extensions/back/ResultNormalizer.py
+extensions/back/ReverseInputChannels.py
+extensions/back/RNNSequenceTypeRename.py
+extensions/back/ScalarConstNormalize.py
+extensions/back/SelectBroadcast.py
+extensions/back/ShapeOfToShape.py
+extensions/back/ShuffleChannelPatternOptimization.py
+extensions/back/ShufflenetReLUReorder.py
+extensions/back/SpecialNodesFinalization.py
+extensions/back/split_normalizer.py
+extensions/back/StridedSliceMasksNormalizer.py
+extensions/back/TileNormalizer.py
+extensions/back/TopKNormalizer.py
+extensions/back/TransposeReduceFusing.py
+extensions/back/TransposeToPermute.py
+extensions/back/UselessConcatRemoval.py
+extensions/front/__init__.py
+extensions/front/ArgMaxSqueeze.py
+extensions/front/ATenToEmbeddingBag.py
+extensions/front/AttributedGatherNormalizer.py
+extensions/front/AttributedPadToPad.py
+extensions/front/binary_quantize_normalization.py
+extensions/front/caffe/__init__.py
+extensions/front/caffe/accum_ext.py
+extensions/front/caffe/argmax_ext.py
+extensions/front/caffe/ArgMaxFlatten.py
+extensions/front/caffe/axpy.py
+extensions/front/caffe/binarization.py
+extensions/front/caffe/binary_conv_ext.py
+extensions/front/caffe/bn.py
+extensions/front/caffe/conv_ext.py
+extensions/front/caffe/correlation_ext.py
+extensions/front/caffe/ctcgreedydecoder_ext.py
+extensions/front/caffe/CustomLayersMapping.xml.example
+extensions/front/caffe/data_augmentation_ext.py
+extensions/front/caffe/detection_output.py
+extensions/front/caffe/elementwise_ext.py
+extensions/front/caffe/eltwise_add_normalize.py
+extensions/front/caffe/elu.py
+extensions/front/caffe/flatten_ext.py
+extensions/front/caffe/grn_ext.py
+extensions/front/caffe/inner_product_ext.py
+extensions/front/caffe/input_ext.py
+extensions/front/caffe/interp_ext.py
+extensions/front/caffe/lrn_ext.py
+extensions/front/caffe/mvn_ext.py
+extensions/front/caffe/normalize_ext.py
+extensions/front/caffe/permute_ext.py
+extensions/front/caffe/pooling_ext.py
+extensions/front/caffe/power_file_ext.py
+extensions/front/caffe/prelu_ext.py
+extensions/front/caffe/priorbox_clustered_ext.py
+extensions/front/caffe/priorbox_ext.py
+extensions/front/caffe/proposal_ext.py
+extensions/front/caffe/proposal_python_ext.py
+extensions/front/caffe/psroipooling_ext.py
+extensions/front/caffe/regionyolo_ext.py
+extensions/front/caffe/relu6.py
+extensions/front/caffe/relu_ext.py
+extensions/front/caffe/reorgyolo_ext.py
+extensions/front/caffe/resample_ext.py
+extensions/front/caffe/reshape.py
+extensions/front/caffe/ShuffleChannel.py
+extensions/front/caffe/shufflechannel_ext.py
+extensions/front/caffe/sigmoid.py
+extensions/front/caffe/simplernms_ext.py
+extensions/front/caffe/slice_to_split.py
+extensions/front/caffe/softmax_ext.py
+extensions/front/caffe/spatial_transformer_ext.py
+extensions/front/caffe/split_to_identity.py
+extensions/front/caffe/tanh.py
+extensions/front/ChangeCastOutputType.py
+extensions/front/ChangePlaceholderTypes.py
+extensions/front/create_tensor_nodes.py
+extensions/front/disable_weights_quantize_value_propagation.py
+extensions/front/div.py
+extensions/front/eltwise_n.py
+extensions/front/ExpandDimsToUnsqueeze.py
+extensions/front/FillToBroadcast.py
+extensions/front/flatten_to_reshape.py
+extensions/front/freeze_placeholder_value.py
+extensions/front/GeLUMerger_Erf.py
+extensions/front/global_pooling_to_reduce.py
+extensions/front/image_scaler.py
+extensions/front/input_cut.py
+extensions/front/instance_normalization.py
+extensions/front/InterpolateNormalizer.py
+extensions/front/kaldi/__init__.py
+extensions/front/kaldi/add_permute_after_convolution.py
+extensions/front/kaldi/add_reshape_around_convolution.py
+extensions/front/kaldi/add_reshape_around_pooling.py
+extensions/front/kaldi/apply_counts.py
+extensions/front/kaldi/logsoftmax_component_ext.py
+extensions/front/kaldi/memory_offset_adjustment.py
+extensions/front/kaldi/replace_eltwise_nin1.py
+extensions/front/kaldi/replace_lstm_node_pattern.py
+extensions/front/kaldi/replace_lstm_nonlinearity.py
+extensions/front/kaldi/set_ports.py
+extensions/front/kaldi/sigmoid_ext.py
+extensions/front/kaldi/split_memoryoffsets.py
+extensions/front/kaldi/tanh_component_ext.py
+extensions/front/Log1p.py
+extensions/front/LogSoftmax.py
+extensions/front/LRNReplacer.py
+extensions/front/MatMul_normalizer.py
+extensions/front/MoveEmbeddedInputsToInputs.py
+extensions/front/mxnet/__init__.py
+extensions/front/mxnet/activation.py
+extensions/front/mxnet/adaptive_avg_pooling_ext.py
+extensions/front/mxnet/add_input_data_to_prior_boxes.py
+extensions/front/mxnet/arange_ext.py
+extensions/front/mxnet/arange_replacer.py
+extensions/front/mxnet/block_grad_ext.py
+extensions/front/mxnet/box_nms_ext.py
+extensions/front/mxnet/cast_ext.py
+extensions/front/mxnet/check_softmax_node_inputs.py
+extensions/front/mxnet/clip_ext.py
+extensions/front/mxnet/conv_ext.py
+extensions/front/mxnet/copy_ext.py
+extensions/front/mxnet/crop_ext.py
+extensions/front/mxnet/custom.py
+extensions/front/mxnet/custom_rpn_proposal.py
+extensions/front/mxnet/deformable_conv_ext.py
+extensions/front/mxnet/deformable_psroi_pooling_ext.py
+extensions/front/mxnet/dropout_ext.py
+extensions/front/mxnet/elementwise_ext.py
+extensions/front/mxnet/eltwise_scalar_replacers.py
+extensions/front/mxnet/exp_ext.py
+extensions/front/mxnet/expand_dims_ext.py
+extensions/front/mxnet/flatten_ext.py
+extensions/front/mxnet/fully_connected_ext.py
+extensions/front/mxnet/gather.py
+extensions/front/mxnet/gather_ext.py
+extensions/front/mxnet/instance_norm_ext.py
+extensions/front/mxnet/leaky_relu.py
+extensions/front/mxnet/lrn_ext.py
+extensions/front/mxnet/max_ext.py
+extensions/front/mxnet/multibox_detection_ext.py
+extensions/front/mxnet/mx_reshape_reverse.py
+extensions/front/mxnet/mx_reshape_to_reshape.py
+extensions/front/mxnet/MXRepeatReplacer.py
+extensions/front/mxnet/null_ext.py
+extensions/front/mxnet/pad_ext.py
+extensions/front/mxnet/pooling_ext.py
+extensions/front/mxnet/proposal_ext.py
+extensions/front/mxnet/psroi_pooling_ext.py
+extensions/front/mxnet/repeat_ext.py
+extensions/front/mxnet/reshape_ext.py
+extensions/front/mxnet/RNN_ext.py
+extensions/front/mxnet/rnn_param_concat.py
+extensions/front/mxnet/roi_pooling_ext.py
+extensions/front/mxnet/shape_array_ext.py
+extensions/front/mxnet/sigmoid.py
+extensions/front/mxnet/slice_channel_ext.py
+extensions/front/mxnet/slice_ext.py
+extensions/front/mxnet/slice_like_ext.py
+extensions/front/mxnet/slice_replacers.py
+extensions/front/mxnet/softmax.py
+extensions/front/mxnet/softmax_activation_ext.py
+extensions/front/mxnet/softmax_ext.py
+extensions/front/mxnet/softmax_output_ext.py
+extensions/front/mxnet/squeeze_ext.py
+extensions/front/mxnet/ssd_anchor_reshape.py
+extensions/front/mxnet/ssd_detection_output_replacer.py
+extensions/front/mxnet/ssd_pattern_flatten_softmax_activation.py
+extensions/front/mxnet/ssd_pattern_remove_flatten.py
+extensions/front/mxnet/ssd_pattern_remove_reshape.py
+extensions/front/mxnet/ssd_pattern_remove_transpose.py
+extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
+extensions/front/mxnet/stack_ext.py
+extensions/front/mxnet/swapaxis_ext.py
+extensions/front/mxnet/tile_ext.py
+extensions/front/mxnet/tile_replacer.py
+extensions/front/mxnet/transpose_ext.py
+extensions/front/mxnet/up_sampling_ext.py
+extensions/front/mxnet/where_ext.py
+extensions/front/mxnet/yolo_v3_mobilenet1_voc.json
+extensions/front/mxnet/zeros_ext.py
+extensions/front/no_op_eraser.py
+extensions/front/onnx/__init__.py
+extensions/front/onnx/activation_ext.py
+extensions/front/onnx/affine_ext.py
+extensions/front/onnx/argmax_ext.py
+extensions/front/onnx/aten_ext.py
+extensions/front/onnx/cast_ext.py
+extensions/front/onnx/clip_ext.py
+extensions/front/onnx/const_ext.py
+extensions/front/onnx/constant_fill_ext.py
+extensions/front/onnx/constant_of_shape_ext.py
+extensions/front/onnx/constant_of_shape_to_broadcast.py
+extensions/front/onnx/conv_ext.py
+extensions/front/onnx/crop_ext.py
+extensions/front/onnx/deformable_conv_ext.py
+extensions/front/onnx/detection_output.py
+extensions/front/onnx/detectionoutput_ext.py
+extensions/front/onnx/dropout_ext.py
+extensions/front/onnx/elementwise_ext.py
+extensions/front/onnx/expand_ext.py
+extensions/front/onnx/flatten_ext.py
+extensions/front/onnx/flattenONNX_to_reshape.py
+extensions/front/onnx/gather_ext.py
+extensions/front/onnx/gemm_ext.py
+extensions/front/onnx/group_norm_ext.py
+extensions/front/onnx/gru_ext.py
+extensions/front/onnx/hard_sigmoid_ext.py
+extensions/front/onnx/image_scaler_ext.py
+extensions/front/onnx/instance_normalization_ext.py
+extensions/front/onnx/lp_normalization_ext.py
+extensions/front/onnx/lrn_ext.py
+extensions/front/onnx/lstm_ext.py
+extensions/front/onnx/mask_rcnn.json
+extensions/front/onnx/mask_rcnn_conversion.py
+extensions/front/onnx/matmul_ext.py
+extensions/front/onnx/mean_variance_normalization_ext.py
+extensions/front/onnx/non_max_suppression_ext.py
+extensions/front/onnx/non_max_suppression_normalize.py
+extensions/front/onnx/non_zero_ext.py
+extensions/front/onnx/normalize_ext.py
+extensions/front/onnx/normalize_l2_normalize.py
+extensions/front/onnx/one_hot_ext.py
+extensions/front/onnx/one_hot_normalize.py
+extensions/front/onnx/pad_ext.py
+extensions/front/onnx/parameter_ext.py
+extensions/front/onnx/person_detection_crossroad.json
+extensions/front/onnx/person_detection_crossroad_conversion.py
+extensions/front/onnx/pooling_ext.py
+extensions/front/onnx/priorbox_clustered_ext.py
+extensions/front/onnx/priorbox_ext.py
+extensions/front/onnx/priorgridgenerator_ext.py
+extensions/front/onnx/proposal_ext.py
+extensions/front/onnx/quantize_ext.py
+extensions/front/onnx/reduce_max_ext.py
+extensions/front/onnx/reduce_mean_ext.py
+extensions/front/onnx/reduce_min_ext.py
+extensions/front/onnx/reduce_prod_ext.py
+extensions/front/onnx/reduce_sum_ext.py
+extensions/front/onnx/remove_filtering_boxes_by_size.py
+extensions/front/onnx/resize_ext.py
+extensions/front/onnx/resize_to_interpolate.py
+extensions/front/onnx/reverse_sequence_ext.py
+extensions/front/onnx/rnn_ext.py
+extensions/front/onnx/roialign_ext.py
+extensions/front/onnx/roifeatureextractor_ext.py
+extensions/front/onnx/scatter_ext.py
+extensions/front/onnx/shape_ext.py
+extensions/front/onnx/slice_ext.py
+extensions/front/onnx/softmax_ext.py
+extensions/front/onnx/split_ext.py
+extensions/front/onnx/squeeze_ext.py
+extensions/front/onnx/top_k_ext.py
+extensions/front/onnx/topkrois_ext.py
+extensions/front/onnx/transpose_ext.py
+extensions/front/onnx/unsqueeze_ext.py
+extensions/front/onnx/upsample_ext.py
+extensions/front/output_cut.py
+extensions/front/override_batch.py
+extensions/front/Pack.py
+extensions/front/pass_separator.py
+extensions/front/PowerToEltwises.py
+extensions/front/rank_decomposer.py
+extensions/front/reciprocal.py
+extensions/front/reduce_axis_normalizer.py
+extensions/front/reshape_dim_normalizer.py
+extensions/front/restore_ports.py
+extensions/front/scatter_normalizer.py
+extensions/front/softmax.py
+extensions/front/softsign_replacer.py
+extensions/front/split_normalizer.py
+extensions/front/squared_difference.py
+extensions/front/SqueezeNormalize.py
+extensions/front/standalone_const_eraser.py
+extensions/front/sub.py
+extensions/front/tf/__init__.py
+extensions/front/tf/activation_ext.py
+extensions/front/tf/argmax_ext.py
+extensions/front/tf/assign_elimination.py
+extensions/front/tf/basic_lstm_cell.py
+extensions/front/tf/batch_to_space_ext.py
+extensions/front/tf/BatchMatMul_ext.py
+extensions/front/tf/BatchToSpaceNDToUpsample.py
+extensions/front/tf/BlockLSTM.py
+extensions/front/tf/BlockLSTM_ext.py
+extensions/front/tf/bucketize.py
+extensions/front/tf/bucketize_ext.py
+extensions/front/tf/Cast_ext.py
+extensions/front/tf/concat.py
+extensions/front/tf/concat_ext.py
+extensions/front/tf/const_ext.py
+extensions/front/tf/conv_ext.py
+extensions/front/tf/crop_and_resize_ext.py
+extensions/front/tf/CropAndResizeReplacement.py
+extensions/front/tf/CTCGreedyDecoder.py
+extensions/front/tf/CTCGreedyDecoder_ext.py
+extensions/front/tf/deconv_ext.py
+extensions/front/tf/depth_to_space.py
+extensions/front/tf/elementwise_ext.py
+extensions/front/tf/expand_dims_ext.py
+extensions/front/tf/extract_image_patches_ext.py
+extensions/front/tf/fake_const_ext.py
+extensions/front/tf/FakeQuantWithMinMaxVars.py
+extensions/front/tf/FakeQuantWithMinMaxVars_ext.py
+extensions/front/tf/faster_rcnn_support.json
+extensions/front/tf/faster_rcnn_support_api_v1.10.json
+extensions/front/tf/faster_rcnn_support_api_v1.13.json
+extensions/front/tf/faster_rcnn_support_api_v1.14.json
+extensions/front/tf/faster_rcnn_support_api_v1.15.json
+extensions/front/tf/faster_rcnn_support_api_v1.7.json
+extensions/front/tf/fifo_queue_v2_ext.py
+extensions/front/tf/fifo_replacer.py
+extensions/front/tf/fill_ext.py
+extensions/front/tf/FlattenToReshape.py
+extensions/front/tf/floor_ext.py
+extensions/front/tf/gather_ext.py
+extensions/front/tf/GatherTree_ext.py
+extensions/front/tf/GNMT_DynamicSequenceLengths.py
+extensions/front/tf/identity_ext.py
+extensions/front/tf/InterpolateTransposes.py
+extensions/front/tf/IteratorGetNext_ext.py
+extensions/front/tf/LoopCond_ext.py
+extensions/front/tf/lrn_ext.py
+extensions/front/tf/mask_rcnn_support.json
+extensions/front/tf/mask_rcnn_support_api_v1.11.json
+extensions/front/tf/mask_rcnn_support_api_v1.13.json
+extensions/front/tf/mask_rcnn_support_api_v1.14.json
+extensions/front/tf/mask_rcnn_support_api_v1.15.json
+extensions/front/tf/mask_rcnn_support_api_v1.7.json
+extensions/front/tf/matmul_ext.py
+extensions/front/tf/mvn.py
+extensions/front/tf/mvn_unrolled.py
+extensions/front/tf/nearest_neighbor_upsampling.py
+extensions/front/tf/next_iteration_ext.py
+extensions/front/tf/non_max_suppression_ext.py
+extensions/front/tf/non_max_suppression_normalize.py
+extensions/front/tf/ObjectDetectionAPI.py
+extensions/front/tf/one_hot_ext.py
+extensions/front/tf/pad_ext.py
+extensions/front/tf/pad_tf_to_pad.py
+extensions/front/tf/placeholder_ext.py
+extensions/front/tf/placeholder_with_default_ext.py
+extensions/front/tf/pooling_ext.py
+extensions/front/tf/prelu.py
+extensions/front/tf/reduce_ext.py
+extensions/front/tf/reshape_related_ext.py
+extensions/front/tf/resize_bilinear.py
+extensions/front/tf/resize_nearest_neighbor.py
+extensions/front/tf/retinanet.json
+extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py
+extensions/front/tf/reverse_sequence.py
+extensions/front/tf/reverse_v2.py
+extensions/front/tf/rfcn_support.json
+extensions/front/tf/rfcn_support_api_v1.10.json
+extensions/front/tf/rfcn_support_api_v1.13.json
+extensions/front/tf/rfcn_support_api_v1.14.json
+extensions/front/tf/select_ext.py
+extensions/front/tf/sign_ext.py
+extensions/front/tf/SizeReplacer.py
+extensions/front/tf/slice_ext.py
+extensions/front/tf/softmax_ext.py
+extensions/front/tf/space_to_batch.py
+extensions/front/tf/space_to_batch_ext.py
+extensions/front/tf/space_to_depth_ext.py
+extensions/front/tf/sparse_fill_empty_rows_ext.py
+extensions/front/tf/sparse_segment_mean_ext.py
+extensions/front/tf/sparse_segment_sqrtn_ext.py
+extensions/front/tf/sparse_segment_sum_ext.py
+extensions/front/tf/sparse_to_dense_ext.py
+extensions/front/tf/sparse_weighted_sum.py
+extensions/front/tf/split_ext.py
+extensions/front/tf/SplitConcatPairToInterpolate.py
+extensions/front/tf/ssd_support.json
+extensions/front/tf/ssd_support_api_v1.14.json
+extensions/front/tf/ssd_support_api_v1.15.json
+extensions/front/tf/ssd_toolbox_detection_output.json
+extensions/front/tf/ssd_toolbox_multihead_detection_output.json
+extensions/front/tf/ssd_v2_support.json
+extensions/front/tf/SSDToolboxDetectionOutput.py
+extensions/front/tf/swap_deconv_inputs.py
+extensions/front/tf/swish.py
+extensions/front/tf/SwitchMergeOptimization.py
+extensions/front/tf/TensorArrayExtractors.py
+extensions/front/tf/TensorArrayGatherV3.py
+extensions/front/tf/tensorflow_custom_operations_config_update.py
+extensions/front/tf/tile_ext.py
+extensions/front/tf/topk_ext.py
+extensions/front/tf/transpose_ext.py
+extensions/front/tf/transposed_mvn_unrolled.py
+extensions/front/tf/unique_ext.py
+extensions/front/tf/UnpackPackReverseInputChannels.py
+extensions/front/tf/variable_ext.py
+extensions/front/tf/variables_values_freezing.py
+extensions/front/tf/yolo_v1.json
+extensions/front/tf/yolo_v1_tiny.json
+extensions/front/tf/yolo_v2.json
+extensions/front/tf/yolo_v2_tiny.json
+extensions/front/tf/yolo_v2_tiny_voc.json
+extensions/front/tf/yolo_v2_voc.json
+extensions/front/tf/yolo_v3.json
+extensions/front/tf/yolo_v3_tiny.json
+extensions/front/tf/yolo_v3_voc.json
+extensions/front/TopKNormalize.py
+extensions/front/transformations_config.py
+extensions/front/TransposeOrderNormalizer.py
+extensions/front/user_data_repack.py
+extensions/front/YOLO.py
+extensions/load/__init__.py
+extensions/load/caffe/__init__.py
+extensions/load/caffe/loader.py
+extensions/load/kaldi/__init__.py
+extensions/load/kaldi/loader.py
+extensions/load/loader.py
+extensions/load/mxnet/__init__.py
+extensions/load/mxnet/loader.py
+extensions/load/onnx/__init__.py
+extensions/load/onnx/loader.py
+extensions/load/tf/__init__.py
+extensions/load/tf/loader.py
+extensions/middle/__init__.py
+extensions/middle/AddFakeQuantizeFuse.py
+extensions/middle/AddIsCyclicAttribute.py
+extensions/middle/AddMeanScaleValues.py
+extensions/middle/AnchorToPriorBox.py
+extensions/middle/ApplyNHWCtoNCHWpermutation.py
+extensions/middle/ApplyPermutations.py
+extensions/middle/ArgMaxToTopK.py
+extensions/middle/AttributedTileNormalizer.py
+extensions/middle/BiasAddBroadcasting.py
+extensions/middle/BinarizeWeightsM1P1.py
+extensions/middle/BlockLSTMtoLSTMSequence.py
+extensions/middle/CheckForCycle.py
+extensions/middle/ConcatOptimization.py
+extensions/middle/ConstSwitchResolver.py
+extensions/middle/ConvertGroupedStridedSlice.py
+extensions/middle/ConvertLayoutDependentOperations.py
+extensions/middle/ConvertMultiInputConv.py
+extensions/middle/ConvToBinaryConv.py
+extensions/middle/CustomSubgraphCall.py
+extensions/middle/CutInputHavingZeroDimFromConcat.py
+extensions/middle/DecomposeBias.py
+extensions/middle/DecomposeBidirectionalRNNSequence.py
+extensions/middle/Deconvolution3rdInputNormalization.py
+extensions/middle/DeleteControlFlowEdges.py
+extensions/middle/DeleteNotExecutable.py
+extensions/middle/DepthToSpace.py
+extensions/middle/DilatedConvolution.py
+extensions/middle/EltwiseChecker.py
+extensions/middle/EltwiseInputNormalization.py
+extensions/middle/EltwiseInputReshape.py
+extensions/middle/EmbeddingBagResolver.py
+extensions/middle/FakeSplitOutputs.py
+extensions/middle/FusedBatchNormNonConstant.py
+extensions/middle/FusedBatchNormTraining.py
+extensions/middle/FuseReshapesSequence.py
+extensions/middle/fusings.py
+extensions/middle/GatherNdNormalizer.py
+extensions/middle/GroupNorm.py
+extensions/middle/GRURNNSequenceToTensorIterator.py
+extensions/middle/InputCut.py
+extensions/middle/InsertLayoutPropagationTransposes.py
+extensions/middle/InsertSelect.py
+extensions/middle/InterpolateSequenceToInterpolate.py
+extensions/middle/L2NormToNorm.py
+extensions/middle/LayoutChangeForConstantShapePaths.py
+extensions/middle/LeakyReluPattern.py
+extensions/middle/LSTMRNNSequenceToTensorIterator.py
+extensions/middle/MinimumMiddleReplacer.py
+extensions/middle/MulAddToSS.py
+extensions/middle/MulFakeQuantizeFuse.py
+extensions/middle/MXNetRNNSequenceNormalize.py
+extensions/middle/MXNetSplitMultiLayers.py
+extensions/middle/MXTileReplacer.py
+extensions/middle/NasNet.py
+extensions/middle/ONNXRNNSequenceNormalize.py
+extensions/middle/PartialInfer.py
+extensions/middle/pass_separator.py
+extensions/middle/permute_tensor_iterator.py
+extensions/middle/preprocessing.py
+extensions/middle/quantize_fuses.py
+extensions/middle/ReluQuantizeFuse.py
+extensions/middle/RemoveDuplicationMemory.py
+extensions/middle/RemoveIdentity.py
+extensions/middle/RemoveRedundantReshapeAfterCropAndResize.py
+extensions/middle/RemoveRedundantReshapes.py
+extensions/middle/RemoveUselessConcatSplit.py
+extensions/middle/RemoveUselessCrops.py
+extensions/middle/RemoveUselessPad.py
+extensions/middle/ReplaceMemoryOffsetWithSplice.py
+extensions/middle/ReplacePNorm.py
+extensions/middle/ReplaceSpliceNodePattern.py
+extensions/middle/reverse_tensor_iterator.py
+extensions/middle/ReverseTransposeNormalization.py
+extensions/middle/ReverseV2ToReverseSequence.py
+extensions/middle/RNNSequenceNormalizeToIE.py
+extensions/middle/ScaleInput.py
+extensions/middle/SharedWeightsDuplication.py
+extensions/middle/SliceConverter.py
+extensions/middle/space_to_depth.py
+extensions/middle/sparse_reshape.py
+extensions/middle/ssd_anchors_to_const.py
+extensions/middle/SwapAxesMiddleReplacer.py
+extensions/middle/TensorIterator_utils.py
+extensions/middle/TensorIteratorBackEdge.py
+extensions/middle/TensorIteratorCondition.py
+extensions/middle/TensorIteratorConditionChecker.py
+extensions/middle/TensorIteratorInput.py
+extensions/middle/TensorIteratorLSTMToLSTMSequence.py
+extensions/middle/TensorIteratorMerge.py
+extensions/middle/TensorIteratorOutput.py
+extensions/middle/TF_lstm_cell_to_generic.py
+extensions/middle/UnsqueezeTileReshapeBlockToInterpolate.py
+extensions/middle/UpsampleToResample.py
+extensions/middle/UselessMerge.py
+extensions/middle/UselessSplitEraser.py
+extensions/middle/UselessStridedSlice.py
+extensions/middle/wights_permute_normalizer.py
+extensions/ops/__init__.py
+extensions/ops/accum.py
+extensions/ops/activation_ops.py
+extensions/ops/adaptive_avg_pooling.py
+extensions/ops/argmax.py
+extensions/ops/assert_op.py
+extensions/ops/aten.py
+extensions/ops/axpy.py
+extensions/ops/binarization.py
+extensions/ops/BlockLSTM.py
+extensions/ops/bn.py
+extensions/ops/box_nms.py
+extensions/ops/bucketize.py
+extensions/ops/Cast.py
+extensions/ops/constant_fill.py
+extensions/ops/copyop.py
+extensions/ops/correlation.py
+extensions/ops/ctc_greedy_decoder.py
+extensions/ops/data_augmentation.py
+extensions/ops/depth_to_space.py
+extensions/ops/DetectionOutput.py
+extensions/ops/detectionoutput_onnx.py
+extensions/ops/elementwise.py
+extensions/ops/embedding_bag.py
+extensions/ops/Enter.py
+extensions/ops/Exit.py
+extensions/ops/exp.py
+extensions/ops/fakequantize.py
+extensions/ops/gather.py
+extensions/ops/GatherNd.py
+extensions/ops/GatherTree.py
+extensions/ops/gelu.py
+extensions/ops/grn.py
+extensions/ops/GRU.py
+extensions/ops/GRUCell.py
+extensions/ops/hard_sigmoid.py
+extensions/ops/identity.py
+extensions/ops/instance_normalization.py
+extensions/ops/interp.py
+extensions/ops/interpolate.py
+extensions/ops/Log.py
+extensions/ops/LSTM.py
+extensions/ops/lstm_cell.py
+extensions/ops/lstm_sequence.py
+extensions/ops/MatMul.py
+extensions/ops/merge.py
+extensions/ops/mvn.py
+extensions/ops/mxrepeat.py
+extensions/ops/mxreshape.py
+extensions/ops/mxslice.py
+extensions/ops/NextIteration.py
+extensions/ops/non_max_suppression.py
+extensions/ops/non_zero.py
+extensions/ops/normalize.py
+extensions/ops/normalize_l2.py
+extensions/ops/one_hot.py
+extensions/ops/pack.py
+extensions/ops/parameter.py
+extensions/ops/pnorm.py
+extensions/ops/power_file.py
+extensions/ops/prediction_heatmap.py
+extensions/ops/prelu.py
+extensions/ops/priorbox.py
+extensions/ops/priorbox_clustered.py
+extensions/ops/priorgridgenerator_onnx.py
+extensions/ops/proposal.py
+extensions/ops/proposal_onnx.py
+extensions/ops/proposal_python_example.py
+extensions/ops/psroipooling.py
+extensions/ops/range.py
+extensions/ops/rank.py
+extensions/ops/ReduceOps.py
+extensions/ops/regionyolo.py
+extensions/ops/reorgyolo.py
+extensions/ops/resample.py
+extensions/ops/resize.py
+extensions/ops/resize_factor_utils.py
+extensions/ops/Reverse.py
+extensions/ops/reverse_sequence.py
+extensions/ops/RNN.py
+extensions/ops/RNNCell.py
+extensions/ops/roialign.py
+extensions/ops/roifeatureextractor_onnx.py
+extensions/ops/scatter.py
+extensions/ops/select.py
+extensions/ops/shufflechannel.py
+extensions/ops/simplernms.py
+extensions/ops/size.py
+extensions/ops/space_to_depth.py
+extensions/ops/sparse_fill_empty_rows.py
+extensions/ops/sparse_reshape.py
+extensions/ops/sparse_segment_mean.py
+extensions/ops/sparse_segment_sqrtn.py
+extensions/ops/sparse_segment_sum.py
+extensions/ops/sparse_to_dense.py
+extensions/ops/sparse_weighted_sum.py
+extensions/ops/spatial_transformer.py
+extensions/ops/splice.py
+extensions/ops/split.py
+extensions/ops/stop_gradient.py
+extensions/ops/swapaxis.py
+extensions/ops/switch.py
+extensions/ops/tensor_iterator.py
+extensions/ops/TensorArray.py
+extensions/ops/TensorArrayGather.py
+extensions/ops/TensorArrayRead.py
+extensions/ops/TensorArrayScatter.py
+extensions/ops/TensorArraySize.py
+extensions/ops/TensorArrayWrite.py
+extensions/ops/TensorIterator_ops.py
+extensions/ops/topk.py
+extensions/ops/topkrois_onnx.py
+extensions/ops/transpose.py
+extensions/ops/unique.py
+extensions/ops/upsample.py
+install_prerequisites/install_prerequisites.bat
+install_prerequisites/install_prerequisites.sh
+install_prerequisites/install_prerequisites_caffe.bat
+install_prerequisites/install_prerequisites_caffe.sh
+install_prerequisites/install_prerequisites_kaldi.bat
+install_prerequisites/install_prerequisites_kaldi.sh
+install_prerequisites/install_prerequisites_mxnet.bat
+install_prerequisites/install_prerequisites_mxnet.sh
+install_prerequisites/install_prerequisites_onnx.bat
+install_prerequisites/install_prerequisites_onnx.sh
+install_prerequisites/install_prerequisites_tf.bat
+install_prerequisites/install_prerequisites_tf.sh
+install_prerequisites/protobuf-3.6.1-py3.4-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.5-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.6-win-amd64.egg
+install_prerequisites/protobuf-3.6.1-py3.7-win-amd64.egg
+mo.py
+mo/__init__.py
+mo/back/__init__.py
+mo/back/ie_ir_ver_2/__init__.py
+mo/back/ie_ir_ver_2/emitter.py
+mo/back/replacement.py
+mo/front/__init__.py
+mo/front/caffe/__init__.py
+mo/front/caffe/collect_attributes.py
+mo/front/caffe/custom_layers_mapping.py
+mo/front/caffe/extractor.py
+mo/front/caffe/extractors/__init__.py
+mo/front/caffe/extractors/batchnorm.py
+mo/front/caffe/extractors/concat.py
+mo/front/caffe/extractors/crop.py
+mo/front/caffe/extractors/native_caffe.py
+mo/front/caffe/extractors/roipooling.py
+mo/front/caffe/extractors/scale.py
+mo/front/caffe/extractors/slice.py
+mo/front/caffe/extractors/tile.py
+mo/front/caffe/extractors/utils.py
+mo/front/caffe/loader.py
+mo/front/caffe/proto/__init__.py
+mo/front/caffe/proto/caffe_pb2.py
+mo/front/caffe/proto/generate_caffe_pb2.py
+mo/front/caffe/proto/mo_caffe.proto
+mo/front/caffe/python_layer_extractor.py
+mo/front/caffe/register_custom_ops.py
+mo/front/common/__init__.py
+mo/front/common/custom_replacement_registry.py
+mo/front/common/extractors/utils.py
+mo/front/common/find_unsupported_ops.py
+mo/front/common/layout.py
+mo/front/common/partial_infer/__init__.py
+mo/front/common/partial_infer/batch_norm.py
+mo/front/common/partial_infer/caffe_fallback.py
+mo/front/common/partial_infer/concat.py
+mo/front/common/partial_infer/crop.py
+mo/front/common/partial_infer/elemental.py
+mo/front/common/partial_infer/eltwise.py
+mo/front/common/partial_infer/multi_box_detection.py
+mo/front/common/partial_infer/multi_box_prior.py
+mo/front/common/partial_infer/random_uniform.py
+mo/front/common/partial_infer/reshape.py
+mo/front/common/partial_infer/roipooling.py
+mo/front/common/partial_infer/slice.py
+mo/front/common/partial_infer/utils.py
+mo/front/common/register_custom_ops.py
+mo/front/common/replacement.py
+mo/front/common/weights.py
+mo/front/extractor.py
+mo/front/kaldi/__init__.py
+mo/front/kaldi/extractor.py
+mo/front/kaldi/extractors/__init__.py
+mo/front/kaldi/extractors/add_ext.py
+mo/front/kaldi/extractors/add_shift_ext.py
+mo/front/kaldi/extractors/affine_component_ext.py
+mo/front/kaldi/extractors/affine_component_preconditioned_online_ext.py
+mo/front/kaldi/extractors/affine_transform_ext.py
+mo/front/kaldi/extractors/backproptruncation_ext.py
+mo/front/kaldi/extractors/batchnorm_component_ext.py
+mo/front/kaldi/extractors/clip_ext.py
+mo/front/kaldi/extractors/concat_ext.py
+mo/front/kaldi/extractors/convolutional_1d_component_ext.py
+mo/front/kaldi/extractors/convolutional_component_ext.py
+mo/front/kaldi/extractors/copy_ext.py
+mo/front/kaldi/extractors/crop_ext.py
+mo/front/kaldi/extractors/elementwise_component_ext.py
+mo/front/kaldi/extractors/fixed_affine_component_ext.py
+mo/front/kaldi/extractors/linear_component_ext.py
+mo/front/kaldi/extractors/lstm_nonlinearity_ext.py
+mo/front/kaldi/extractors/lstm_projected_streams_ext.py
+mo/front/kaldi/extractors/max_pooling_ext.py
+mo/front/kaldi/extractors/memoryoffset_ext.py
+mo/front/kaldi/extractors/naturalgradient_affine_component_ext.py
+mo/front/kaldi/extractors/noop_ext.py
+mo/front/kaldi/extractors/normalize_component_ext.py
+mo/front/kaldi/extractors/pnorm_component_ext.py
+mo/front/kaldi/extractors/rectified_linear_component_ext.py
+mo/front/kaldi/extractors/rescale_ext.py
+mo/front/kaldi/extractors/scale_component_ext.py
+mo/front/kaldi/extractors/slice_ext.py
+mo/front/kaldi/extractors/softmax_ext.py
+mo/front/kaldi/extractors/splice_component_ext.py
+mo/front/kaldi/loader/__init__.py
+mo/front/kaldi/loader/loader.py
+mo/front/kaldi/loader/utils.py
+mo/front/kaldi/register_custom_ops.py
+mo/front/kaldi/utils.py
+mo/front/mxnet/__init__.py
+mo/front/mxnet/extractor.py
+mo/front/mxnet/extractors/__init__.py
+mo/front/mxnet/extractors/add_n.py
+mo/front/mxnet/extractors/batchnorm.py
+mo/front/mxnet/extractors/concat.py
+mo/front/mxnet/extractors/l2_normalization.py
+mo/front/mxnet/extractors/multibox_prior.py
+mo/front/mxnet/extractors/relu.py
+mo/front/mxnet/extractors/scaleshift.py
+mo/front/mxnet/extractors/slice_axis.py
+mo/front/mxnet/extractors/utils.py
+mo/front/mxnet/loader.py
+mo/front/mxnet/nd_to_params.py
+mo/front/mxnet/register_custom_ops.py
+mo/front/onnx/__init__.py
+mo/front/onnx/extractor.py
+mo/front/onnx/extractors/__init__.py
+mo/front/onnx/extractors/concat.py
+mo/front/onnx/extractors/eltwise.py
+mo/front/onnx/extractors/fused_bn.py
+mo/front/onnx/extractors/reshape.py
+mo/front/onnx/extractors/utils.py
+mo/front/onnx/loader.py
+mo/front/onnx/register_custom_ops.py
+mo/front/subgraph_matcher.py
+mo/front/tf/__init__.py
+mo/front/tf/common.py
+mo/front/tf/custom_subgraph_call.py
+mo/front/tf/extractor.py
+mo/front/tf/extractors/__init__.py
+mo/front/tf/extractors/concat.py
+mo/front/tf/extractors/fused_bn.py
+mo/front/tf/extractors/identity.py
+mo/front/tf/extractors/native_tf.py
+mo/front/tf/extractors/pack.py
+mo/front/tf/extractors/random_uniform.py
+mo/front/tf/extractors/strided_slice.py
+mo/front/tf/extractors/utils.py
+mo/front/tf/graph_utils.py
+mo/front/tf/loader.py
+mo/front/tf/partial_infer/__init__.py
+mo/front/tf/partial_infer/tf.py
+mo/front/tf/register_custom_ops.py
+mo/front/tf/replacement.py
+mo/graph/__init__.py
+mo/graph/connection.py
+mo/graph/graph.py
+mo/graph/perm_inputs.py
+mo/graph/port.py
+mo/main.py
+mo/middle/__init__.py
+mo/middle/passes/__init__.py
+mo/middle/passes/conv.py
+mo/middle/passes/convert_data_type.py
+mo/middle/passes/debug.py
+mo/middle/passes/eliminate.py
+mo/middle/passes/fusing/__init__.py
+mo/middle/passes/fusing/decomposition.py
+mo/middle/passes/fusing/fuse_grouped_conv.py
+mo/middle/passes/fusing/fuse_linear_ops.py
+mo/middle/passes/fusing/fuse_linear_seq.py
+mo/middle/passes/fusing/helpers.py
+mo/middle/passes/fusing/mark_unfused_nodes.py
+mo/middle/passes/fusing/resnet_optimization.py
+mo/middle/passes/infer.py
+mo/middle/passes/leaky_relu.py
+mo/middle/passes/mean_scale_values.py
+mo/middle/passes/tensor_names.py
+mo/middle/pattern_match.py
+mo/middle/replacement.py
+mo/ops/__init__.py
+mo/ops/activation.py
+mo/ops/broadcast.py
+mo/ops/clamp.py
+mo/ops/concat.py
+mo/ops/const.py
+mo/ops/constant_of_shape.py
+mo/ops/convolution.py
+mo/ops/crop.py
+mo/ops/deconvolution.py
+mo/ops/deformable_convolution.py
+mo/ops/eltwise.py
+mo/ops/eltwise_n.py
+mo/ops/eltwise_ninputs_in_1.py
+mo/ops/expand_dims.py
+mo/ops/fill.py
+mo/ops/flatten.py
+mo/ops/group_norm.py
+mo/ops/lrn.py
+mo/ops/lstmnonlinearity.py
+mo/ops/memory.py
+mo/ops/memoryoffset.py
+mo/ops/op.py
+mo/ops/pad.py
+mo/ops/permute.py
+mo/ops/pooling.py
+mo/ops/power.py
+mo/ops/reshape.py
+mo/ops/result.py
+mo/ops/roipooling.py
+mo/ops/scale_shift.py
+mo/ops/shape.py
+mo/ops/slice.py
+mo/ops/softmax.py
+mo/ops/space_to_batch.py
+mo/ops/squeeze.py
+mo/ops/strided_slice.py
+mo/ops/tile.py
+mo/ops/unsqueeze.py
+mo/pipeline/__init__.py
+mo/pipeline/common.py
+mo/pipeline/unified.py
+mo/utils/__init__.py
+mo/utils/class_registration.py
+mo/utils/cli_parser.py
+mo/utils/custom_replacement_config.py
+mo/utils/dsu.py
+mo/utils/error.py
+mo/utils/find_inputs.py
+mo/utils/graph.py
+mo/utils/guess_framework.py
+mo/utils/import_extensions.py
+mo/utils/ir_engine/__init__.py
+mo/utils/ir_engine/compare_graphs.py
+mo/utils/ir_engine/ir_engine.py
+mo/utils/ir_reader/__init__.py
+mo/utils/ir_reader/extender.py
+mo/utils/ir_reader/extenders/binary_convolution_extender.py
+mo/utils/ir_reader/extenders/conv_extender.py
+mo/utils/ir_reader/extenders/convert_extender.py
+mo/utils/ir_reader/extenders/deconvolution_extender.py
+mo/utils/ir_reader/extenders/deformable_convolution_extender.py
+mo/utils/ir_reader/extenders/experimental_extender.py
+mo/utils/ir_reader/extenders/fakequantize_extender.py
+mo/utils/ir_reader/extenders/GRUCell_extender.py
+mo/utils/ir_reader/extenders/interpolate_extender.py
+mo/utils/ir_reader/extenders/LSTMCell_extender.py
+mo/utils/ir_reader/extenders/non_zero_extender.py
+mo/utils/ir_reader/extenders/pad_extender.py
+mo/utils/ir_reader/extenders/parameter_extender.py
+mo/utils/ir_reader/extenders/pooling_extender.py
+mo/utils/ir_reader/extenders/priorbox_clustered_extender.py
+mo/utils/ir_reader/extenders/priorbox_extender.py
+mo/utils/ir_reader/extenders/reorg_yolo_extender.py
+mo/utils/ir_reader/extenders/RNNCell_extender.py
+mo/utils/ir_reader/extenders/strided_slice_extender.py
+mo/utils/ir_reader/extenders/tensoriterator_extender.py
+mo/utils/ir_reader/extenders/topk_extender.py
+mo/utils/ir_reader/extenders/variadic_split_extender.py
+mo/utils/ir_reader/layer_to_class.py
+mo/utils/ir_reader/restore_graph.py
+mo/utils/logger.py
+mo/utils/model_analysis.py
+mo/utils/pipeline_config.py
+mo/utils/replacement_pattern.py
+mo/utils/shape.py
+mo/utils/simple_proto_parser.py
+mo/utils/str_to.py
+mo/utils/summarize_graph.py
+mo/utils/tensorboard_util.py
+mo/utils/unsupported_ops.py
+mo/utils/utils.py
+mo/utils/version.py
+mo/utils/versions_checker.py
+mo_caffe.py
+mo_kaldi.py
+mo_mxnet.py
+mo_onnx.py
+mo_tf.py
+requirements.txt
+requirements_caffe.txt
+requirements_kaldi.txt
+requirements_mxnet.txt
+requirements_onnx.txt
+requirements_tf.txt
diff --git a/model-optimizer/automation/utils.py b/model-optimizer/automation/utils.py

new file mode 100644 (file)

index 0000000..9045282
--- /dev/null
+++ b/model-optimizer/automation/utils.py
@@ -0,0 +1,47 @@
+import os
+import subprocess
+import tarfile
+from datetime import datetime
+from shutil import copy, copytree, rmtree
+
+
+
+class Automation:
+    @staticmethod
+    def parse_bom(bom_path):
+        files = []
+        for file in open(bom_path):
+            files.append(file)
+        return files
+
+    @staticmethod
+    def copy_files_from_bom(root_path, bom):
+        target_dir = os.path.join(os.path.dirname(__file__), "ModelOptimizerForTensorflow")
+        if os.path.exists(target_dir):
+            rmtree(target_dir)
+        os.makedirs(target_dir)
+        for file in bom:
+            src = os.path.join(root_path, file.strip('\n'))
+            dst = os.path.join(target_dir, file.strip('\n'))
+            if not os.path.exists(os.path.dirname(dst)):
+                os.makedirs(os.path.dirname(dst))
+            if os.path.isdir(src):
+                copytree(src, dst)
+            else:
+                copy(src, dst)
+        return target_dir
+
+    @staticmethod
+    def add_version_txt(dst_path, build_number):
+        timestamp = datetime.now().strftime("%I:%M%p %B %d, %Y")
+        with open(os.path.join(dst_path, "version.txt"), 'w') as f:
+            f.write(timestamp + '\n')
+            f.write(build_number + '\n')
+
+    @staticmethod
+    def make_tarfile(out_file_name, source_dir):
+        archive_path = os.path.join(os.path.dirname(__file__), out_file_name)
+        if os.path.exists(archive_path):
+            os.remove(archive_path)
+        with tarfile.open(out_file_name, "w:gz") as tar:
+            tar.add(source_dir, arcname=os.path.basename(source_dir))
diff --git a/model-optimizer/extensions/analysis/boolean_input.py b/model-optimizer/extensions/analysis/boolean_input.py

index 7c9ce93..b4e6de4 100644 (file)
--- a/model-optimizer/extensions/analysis/boolean_input.py
+++ b/model-optimizer/extensions/analysis/boolean_input.py
@@ -14,9 +14,10 @@
   limitations under the License.
  """
  
+import numpy as np
+
  from mo.graph.graph import Graph
  from mo.utils.model_analysis import AnalyzeAction
-import numpy as np
  
  
  class TrainingPhaseAnalysis(AnalyzeAction):
diff --git a/model-optimizer/extensions/analysis/json_print.py b/model-optimizer/extensions/analysis/json_print.py

index 86809e2..6079918 100644 (file)
--- a/model-optimizer/extensions/analysis/json_print.py
+++ b/model-optimizer/extensions/analysis/json_print.py
@@ -13,9 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import logging as log
-
  import json
+import logging as log
  import sys
  
  import numpy as np
diff --git a/model-optimizer/extensions/analysis/tf_od_api.py b/model-optimizer/extensions/analysis/tf_od_api.py

index 1ed66cb..976511f 100644 (file)
--- a/model-optimizer/extensions/analysis/tf_od_api.py
+++ b/model-optimizer/extensions/analysis/tf_od_api.py
@@ -16,7 +16,7 @@
  import logging as log
  
  from mo.graph.graph import Graph
-from mo.utils.model_analysis import AnalyzeAction, graph_contains_scope, AnalysisResults
+from mo.utils.model_analysis import AnalyzeAction, graph_contains_scope
  from mo.utils.utils import files_by_pattern, get_mo_root_dir
  
  
diff --git a/model-optimizer/extensions/back/CutMemory_test.py b/model-optimizer/extensions/back/CutMemory_test.py

index 13b9c0b..d287afb 100644 (file)
--- a/model-optimizer/extensions/back/CutMemory_test.py
+++ b/model-optimizer/extensions/back/CutMemory_test.py
@@ -13,12 +13,13 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import unittest
+
  import numpy as np
  
-import unittest
  from extensions.back.CutMemory import CutMemory
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class CutMemoryTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/back/I64ToI32.py b/model-optimizer/extensions/back/I64ToI32.py

index 16cfb5e..3eb526d 100644 (file)
--- a/model-optimizer/extensions/back/I64ToI32.py
+++ b/model-optimizer/extensions/back/I64ToI32.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  import numpy as np
  
  from extensions.back.ForceStrictPrecision import ForceStrictPrecision
diff --git a/model-optimizer/extensions/back/LSTMCellNormalizer.py b/model-optimizer/extensions/back/LSTMCellNormalizer.py

index 5165092..073b88c 100644 (file)
--- a/model-optimizer/extensions/back/LSTMCellNormalizer.py
+++ b/model-optimizer/extensions/back/LSTMCellNormalizer.py
@@ -15,13 +15,14 @@
  """
  
  import numpy as np
+
  from extensions.ops.split import VariadicSplit
-from mo.front.tf.graph_utils import create_op_node_with_second_input
-from mo.front.common.partial_infer.utils import int64_array
  from mo.back.replacement import BackReplacementPattern
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Graph
-from mo.ops.reshape import Reshape
  from mo.ops.const import Const
+from mo.ops.reshape import Reshape
  
  
  class LSTMCellNormalizer(BackReplacementPattern):
diff --git a/model-optimizer/extensions/back/OptimizeTransposeReshapeSequence.py b/model-optimizer/extensions/back/OptimizeTransposeReshapeSequence.py

index a178c74..6b898af 100644 (file)
--- a/model-optimizer/extensions/back/OptimizeTransposeReshapeSequence.py
+++ b/model-optimizer/extensions/back/OptimizeTransposeReshapeSequence.py
@@ -15,12 +15,12 @@
  """
  
  import logging as log
-
  import math
+
  import numpy as np
  
-from extensions.middle.FuseReshapesSequence import FuseReshapesSequence
  from extensions.back.FuseTransposesSequence import FuseTransposesSequence
+from extensions.middle.FuseReshapesSequence import FuseReshapesSequence
  from extensions.middle.RemoveRedundantReshapes import RemoveRedundantReshapes
  from mo.back.replacement import BackReplacementPattern
  from mo.front.common.partial_infer.utils import int64_array
diff --git a/model-optimizer/extensions/back/ProposalMutation.py b/model-optimizer/extensions/back/ProposalMutation.py

index 7ef6255..1017ef4 100644 (file)
--- a/model-optimizer/extensions/back/ProposalMutation.py
+++ b/model-optimizer/extensions/back/ProposalMutation.py
@@ -14,6 +14,7 @@
   limitations under the License.
  """
  import logging as log
+
  import numpy as np
  
  from extensions.back.ReshapeMutation import ReshapeMutation
diff --git a/model-optimizer/extensions/back/ReduceToPooling_test.py b/model-optimizer/extensions/back/ReduceToPooling_test.py

index cd4b9e3..57c233b 100644 (file)
--- a/model-optimizer/extensions/back/ReduceToPooling_test.py
+++ b/model-optimizer/extensions/back/ReduceToPooling_test.py
@@ -20,7 +20,7 @@ from generator import generator, generate
  
  from extensions.back.ReduceToPooling import ReduceReplacer, ReduceMerge
  from mo.front.common.partial_infer.utils import int64_array
-from mo.middle.passes.eliminate import shape_inference, eliminate_dead_nodes
+from mo.middle.passes.eliminate import shape_inference
  from mo.middle.passes.eliminate_test import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
  
diff --git a/model-optimizer/extensions/back/ReduceTransposeDimensions.py b/model-optimizer/extensions/back/ReduceTransposeDimensions.py

index d3eda9b..f69c0e3 100644 (file)
--- a/model-optimizer/extensions/back/ReduceTransposeDimensions.py
+++ b/model-optimizer/extensions/back/ReduceTransposeDimensions.py
@@ -18,8 +18,8 @@ import logging as log
  
  import numpy as np
  
-from mo.back.replacement import BackReplacementPattern
  from extensions.back.OptimizeTransposeReshapeSequence import set_reshape_new_output_shape
+from mo.back.replacement import BackReplacementPattern
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph
  
diff --git a/model-optimizer/extensions/back/Reshape0DToSqueeze.py b/model-optimizer/extensions/back/Reshape0DToSqueeze.py

index 9021030..bf98865 100644 (file)
--- a/model-optimizer/extensions/back/Reshape0DToSqueeze.py
+++ b/model-optimizer/extensions/back/Reshape0DToSqueeze.py
@@ -14,6 +14,7 @@
   limitations under the License.
  """
  import logging as log
+
  import numpy as np
  
  from extensions.back.ForceStrictPrecision import ForceStrictPrecision
diff --git a/model-optimizer/extensions/back/ShapeOfToShape.py b/model-optimizer/extensions/back/ShapeOfToShape.py

index bfc4330..d03f958 100644 (file)
--- a/model-optimizer/extensions/back/ShapeOfToShape.py
+++ b/model-optimizer/extensions/back/ShapeOfToShape.py
@@ -13,11 +13,9 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  
  from mo.back.replacement import BackReplacementPattern
-from mo.graph.graph import Graph, Node
-from mo.middle.pattern_match import for_each_sub_graph_recursively
+from mo.graph.graph import Graph
  
  
  class ShapeOfToShape(BackReplacementPattern):
diff --git a/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py b/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py

index 3ec9ad4..3982172 100644 (file)
--- a/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py
+++ b/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.back.ShufflenetReLUReorder import ShufflenetReLUReorder
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  # The dictionary with nodes attributes used to build various graphs. A key is the name of the node and the value is the
  # dictionary with node attributes.
diff --git a/model-optimizer/extensions/back/SpecialNodesFinalization.py b/model-optimizer/extensions/back/SpecialNodesFinalization.py

index 543bf17..09422d3 100644 (file)
--- a/model-optimizer/extensions/back/SpecialNodesFinalization.py
+++ b/model-optimizer/extensions/back/SpecialNodesFinalization.py
@@ -20,10 +20,8 @@ from copy import copy
  import numpy as np
  
  from extensions.back.pass_separator import BackFinish
-from extensions.ops.split import Split
  from extensions.ops.tensor_iterator import TensorIterator, get_internal_node_by_layer_id
  from mo.back.replacement import BackReplacementPattern
-from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Graph
  from mo.ops.const import Const
  from mo.utils.error import Error
diff --git a/model-optimizer/extensions/back/SpecialNodesFinalization_test.py b/model-optimizer/extensions/back/SpecialNodesFinalization_test.py

index af9b7e1..c5f5888 100644 (file)
--- a/model-optimizer/extensions/back/SpecialNodesFinalization_test.py
+++ b/model-optimizer/extensions/back/SpecialNodesFinalization_test.py
@@ -14,10 +14,12 @@
   limitations under the License.
  """
  import unittest
+
  import numpy as np
+
  from extensions.back.SpecialNodesFinalization import CreateConstNodesReplacement
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class CreateConstNodesReplacementTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/back/StridedSliceMasksNormalizer.py b/model-optimizer/extensions/back/StridedSliceMasksNormalizer.py

index 81aa824..0c63a78 100644 (file)
--- a/model-optimizer/extensions/back/StridedSliceMasksNormalizer.py
+++ b/model-optimizer/extensions/back/StridedSliceMasksNormalizer.py
@@ -13,7 +13,6 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  
  from extensions.back.ConvolutionNormalizer import DeconvolutionNormalizer
  from extensions.back.CropToStridedSlice import CropToStridedSlice
diff --git a/model-optimizer/extensions/back/TileNormalizer_test.py b/model-optimizer/extensions/back/TileNormalizer_test.py

index b58c264..bb00513 100644 (file)
--- a/model-optimizer/extensions/back/TileNormalizer_test.py
+++ b/model-optimizer/extensions/back/TileNormalizer_test.py
@@ -20,8 +20,8 @@ import numpy as np
  from extensions.back.TileNormalizer import TileMultipleAxisReplacer, Tile3DReshaper
  from mo.front.common.partial_infer.utils import int64_array
  from mo.ops.tile import Tile
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class TileMultipleAxisReplacerTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/back/compress_quantized_weights_test.py b/model-optimizer/extensions/back/compress_quantized_weights_test.py

index 034b9bb..d0dd194 100644 (file)
--- a/model-optimizer/extensions/back/compress_quantized_weights_test.py
+++ b/model-optimizer/extensions/back/compress_quantized_weights_test.py
@@ -15,14 +15,15 @@
  """
  
  import unittest
-import numpy as np
  from argparse import Namespace
  
+import numpy as np
+
  from extensions.back.compress_quantized_weights import CompressQuantizeWeights
  from extensions.ops.fakequantize import FakeQuantize
-from mo.ops.const import Const
  from mo.front.common.partial_infer.eltwise import eltwise_infer
  from mo.graph.graph import Node
+from mo.ops.const import Const
  from mo.utils.ir_engine.compare_graphs import compare_graphs
  from mo.utils.unittest.graph import build_graph
  
diff --git a/model-optimizer/extensions/back/disable_unsupported_ND_operations.py b/model-optimizer/extensions/back/disable_unsupported_ND_operations.py

index 464ca85..87dc7e7 100644 (file)
--- a/model-optimizer/extensions/back/disable_unsupported_ND_operations.py
+++ b/model-optimizer/extensions/back/disable_unsupported_ND_operations.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.back.replacement import BackReplacementPattern
  from mo.graph.graph import Node, Graph
  from mo.utils.error import Error
diff --git a/model-optimizer/extensions/back/insert_compatibility_l2normalization.py b/model-optimizer/extensions/back/insert_compatibility_l2normalization.py

index bb2d0d2..a09544e 100644 (file)
--- a/model-optimizer/extensions/back/insert_compatibility_l2normalization.py
+++ b/model-optimizer/extensions/back/insert_compatibility_l2normalization.py
@@ -15,12 +15,10 @@
  """
  
  import numpy as np
-import networkx as nx
  
-from mo.ops.const import Const
-from mo.ops.op import Op
-from mo.graph.graph import Graph
  from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+from mo.ops.const import Const
  
  
  class CompatibilityL2NormalizationPattern(BackReplacementPattern):
diff --git a/model-optimizer/extensions/front/ChangeCastOutputType.py b/model-optimizer/extensions/front/ChangeCastOutputType.py

index 8b28cec..d633a59 100644 (file)
--- a/model-optimizer/extensions/front/ChangeCastOutputType.py
+++ b/model-optimizer/extensions/front/ChangeCastOutputType.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  import numpy as np
  
  from mo.front.common.replacement import FrontReplacementSubgraph
diff --git a/model-optimizer/extensions/front/ExpandDimsToUnsqueeze.py b/model-optimizer/extensions/front/ExpandDimsToUnsqueeze.py

index 78c25dc..c285510 100644 (file)
--- a/model-optimizer/extensions/front/ExpandDimsToUnsqueeze.py
+++ b/model-optimizer/extensions/front/ExpandDimsToUnsqueeze.py
@@ -15,10 +15,11 @@
  """
  
  import logging as log
+
  import numpy as np
  
-from mo.front.common.replacement import FrontReplacementPattern
  from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementPattern
  from mo.graph.graph import Graph
  from mo.ops.const import Const
  from mo.ops.unsqueeze import Unsqueeze
diff --git a/model-optimizer/extensions/front/GeLUMerger_Erf.py b/model-optimizer/extensions/front/GeLUMerger_Erf.py

index 8b44dd5..765c431 100644 (file)
--- a/model-optimizer/extensions/front/GeLUMerger_Erf.py
+++ b/model-optimizer/extensions/front/GeLUMerger_Erf.py
@@ -16,6 +16,7 @@
  
  import logging as log
  from math import sqrt, fabs
+
  from extensions.ops.gelu import GeLUOP
  from mo.front.common.replacement import FrontReplacementSubgraph
  from mo.graph.graph import Graph
diff --git a/model-optimizer/extensions/front/LRNReplacer.py b/model-optimizer/extensions/front/LRNReplacer.py

index b2a5c11..46d7dbb 100644 (file)
--- a/model-optimizer/extensions/front/LRNReplacer.py
+++ b/model-optimizer/extensions/front/LRNReplacer.py
@@ -16,9 +16,9 @@
  
  import numpy as np
  
+from extensions.ops.elementwise import Mul
  from mo.front.common.replacement import FrontReplacementOp
  from mo.graph.graph import Graph
-from extensions.ops.elementwise import Mul
  from mo.ops.const import Const
  
  
diff --git a/model-optimizer/extensions/front/LRNReplacer_test.py b/model-optimizer/extensions/front/LRNReplacer_test.py

index cd1b8d0..6056184 100644 (file)
--- a/model-optimizer/extensions/front/LRNReplacer_test.py
+++ b/model-optimizer/extensions/front/LRNReplacer_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.LRNReplacer import LRNReplacer
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/Log1p_test.py b/model-optimizer/extensions/front/Log1p_test.py

index e14ba92..98a4c9f 100644 (file)
--- a/model-optimizer/extensions/front/Log1p_test.py
+++ b/model-optimizer/extensions/front/Log1p_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.Log1p import Log1p
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder': {'shape': np.array([4, 5, 6]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/tf/LogSoftmax.py b/model-optimizer/extensions/front/LogSoftmax.py

similarity index 69%

rename from model-optimizer/extensions/front/tf/LogSoftmax.py

rename to model-optimizer/extensions/front/LogSoftmax.py

index 11dd2ad..c5dcd99 100644 (file)
--- a/model-optimizer/extensions/front/tf/LogSoftmax.py
+++ b/model-optimizer/extensions/front/LogSoftmax.py
@@ -15,28 +15,26 @@
  """
  from extensions.ops.Log import LogOp
  from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph, Node, rename_nodes
  from mo.ops.softmax import Softmax
  
  
  class LogSoftmaxFrontReplacer(FrontReplacementOp):
      """
-    Replace LogSoftmax operation by Softmax -> Log.
+    Replace LogSoftmax operation with Softmax -> Log.
      """
      op = "LogSoftmax"
      enabled = True
  
      def replace_op(self, graph: Graph, node: Node):
-        axis = -1
-        if 'axis' in node.pb.attr:
-            axis = node.pb.attr['axis'].i
+        node_name = node.soft_get('name', node.id)
+        assert node.has_valid('axis'), 'The node "{}" does not have mandatory attribute "axis"'.format(node_name)
  
-        log = LogOp(graph, {'name': node.name + '/Log_'}).create_node()
-        softmax = Softmax(graph, {'axis': axis, 'name': node.name + '/SoftMax_'}).create_node()
+        log = LogOp(graph, {}).create_node()
+        softmax = Softmax(graph, {'axis': node.axis, 'name': node_name + '/Softmax'}).create_node()
+        rename_nodes([(node, node_name + '/delete'), (log, node_name)])
  
          # Connect nodes: input -> Softmax -> Log
          node.in_port(0).get_connection().set_destination(softmax.in_port(0))
          log.in_port(0).get_connection().set_source(softmax.out_port(0))
-
-        # The "explicit" version of the return value is: [(out_node.id, 0)])
          return [log.id]
diff --git a/model-optimizer/extensions/front/LogSoftmax_test.py b/model-optimizer/extensions/front/LogSoftmax_test.py

new file mode 100644 (file)

index 0000000..18bea40
--- /dev/null
+++ b/model-optimizer/extensions/front/LogSoftmax_test.py
@@ -0,0 +1,81 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+from extensions.front.LogSoftmax import LogSoftmaxFrontReplacer
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, regular_op, result, connect
+
+nodes = {
+    **regular_op('input', {'type': 'Parameter'}),
+    **regular_op('logsoftmax', {'type': None, 'op': 'LogSoftmax', 'axis': -2, 'name': 'my_logsoftmax'}),
+    **result('output'),
+}
+edges = [
+    ('input', 'logsoftmax'),
+    ('logsoftmax', 'output'),
+]
+
+
+class LogSoftmaxReplacerTest(unittest.TestCase):
+    def test_1(self):
+        graph = build_graph(nodes, edges)
+
+        graph_ref = build_graph({
+            **regular_op('input', {'type': 'Parameter'}),
+            **regular_op('log', {'op': 'Log', 'type': 'Log'}),
+            **regular_op('softmax', {'op': 'SoftMax', 'type': 'SoftMax', 'axis': -2}),
+            **result('output'),
+        },
+            [
+                ('input', 'softmax'),
+                ('softmax', 'log'),
+                ('log', 'output'),
+            ])
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
+
+        LogSoftmaxFrontReplacer().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+        self.assertTrue(graph.get_op_nodes(op='Log')[0].name == 'my_logsoftmax')
+
+    def test_2(self):
+        graph = build_graph(nodes, edges)
+
+        graph_ref = build_graph({
+            **regular_op('input', {'type': 'Parameter'}),
+            **regular_op('log', {'op': 'Log', 'type': 'Log'}),
+            **regular_op('softmax', {'op': 'SoftMax', 'type': 'SoftMax', 'axis': -2}),
+            **result('output'),
+        },
+            [
+                ('input', 'softmax'),
+                ('softmax', 'log'),
+                ('log', 'output'),
+            ])
+
+        graph.graph['layout'] = 'NHWC'
+        graph.stage = 'front'
+
+        LogSoftmaxFrontReplacer().find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+        self.assertTrue(graph.get_op_nodes(op='Log')[0].name == 'my_logsoftmax')
diff --git a/model-optimizer/extensions/front/MatMul_normalizer.py b/model-optimizer/extensions/front/MatMul_normalizer.py

index a86bafa..3509f5a 100644 (file)
--- a/model-optimizer/extensions/front/MatMul_normalizer.py
+++ b/model-optimizer/extensions/front/MatMul_normalizer.py
@@ -14,6 +14,7 @@
   limitations under the License.
  """
  import math
+
  import numpy as np
  
  from extensions.ops.MatMul import MatMul
diff --git a/model-optimizer/extensions/front/Pack_test.py b/model-optimizer/extensions/front/Pack_test.py

index 37d4126..663d1ce 100644 (file)
--- a/model-optimizer/extensions/front/Pack_test.py
+++ b/model-optimizer/extensions/front/Pack_test.py
@@ -17,12 +17,11 @@
  import unittest
  
  import numpy as np
+from generator import generator, generate
  
  from extensions.front.Pack import Pack
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
-
-from generator import generator, generate
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_0': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/SqueezeNormalize.py b/model-optimizer/extensions/front/SqueezeNormalize.py

index f44672e..bec2383 100644 (file)
--- a/model-optimizer/extensions/front/SqueezeNormalize.py
+++ b/model-optimizer/extensions/front/SqueezeNormalize.py
@@ -17,8 +17,8 @@
  import logging as log
  
  from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Graph
  from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
  from mo.ops.const import Const
  from mo.utils.error import Error
  
diff --git a/model-optimizer/extensions/front/TopKNormalize.py b/model-optimizer/extensions/front/TopKNormalize.py

index fe825ef..0692bd1 100644 (file)
--- a/model-optimizer/extensions/front/TopKNormalize.py
+++ b/model-optimizer/extensions/front/TopKNormalize.py
@@ -20,7 +20,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementPattern
  from mo.graph.graph import Graph
  from mo.ops.const import Const
-from mo.ops.result import Result
  
  
  class TopKNormalize(FrontReplacementPattern):
diff --git a/model-optimizer/extensions/front/binary_quantize_normalization.py b/model-optimizer/extensions/front/binary_quantize_normalization.py

index 8dfe8ba..55c4ad7 100644 (file)
--- a/model-optimizer/extensions/front/binary_quantize_normalization.py
+++ b/model-optimizer/extensions/front/binary_quantize_normalization.py
@@ -16,10 +16,10 @@
  
  import numpy as np
  
+from extensions.ops.elementwise import Add, Mul
  from mo.front.common.replacement import FrontReplacementPattern
  from mo.graph.graph import Graph
  from mo.ops.const import Const
-from extensions.ops.elementwise import Add, Mul
  
  
  class BinaryFakeQuantizeNormalization(FrontReplacementPattern):
diff --git a/model-optimizer/extensions/front/binary_quantize_normalization_test.py b/model-optimizer/extensions/front/binary_quantize_normalization_test.py

index ee1501c..fa0d9cd 100644 (file)
--- a/model-optimizer/extensions/front/binary_quantize_normalization_test.py
+++ b/model-optimizer/extensions/front/binary_quantize_normalization_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.binary_quantize_normalization import BinaryFakeQuantizeNormalization
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  graph_nodes = {
      '0': {'name': 'input', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/caffe/accum_ext_test.py b/model-optimizer/extensions/front/caffe/accum_ext_test.py

index 252f266..817179c 100644 (file)
--- a/model-optimizer/extensions/front/caffe/accum_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/accum_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.accum_ext import AccumFrontExtractor
  from extensions.ops.accum import AccumOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeAccumProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/argmax_ext_test.py b/model-optimizer/extensions/front/caffe/argmax_ext_test.py

index 2674fdc..b748eae 100644 (file)
--- a/model-optimizer/extensions/front/caffe/argmax_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/argmax_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.argmax_ext import ArgMaxFrontExtractor
  from extensions.ops.argmax import ArgMaxOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeArgMaxProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/axpy.py b/model-optimizer/extensions/front/caffe/axpy.py

index dd9d8de..22ac09f 100644 (file)
--- a/model-optimizer/extensions/front/caffe/axpy.py
+++ b/model-optimizer/extensions/front/caffe/axpy.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.elementwise import Add
  from mo.front.common.replacement import FrontReplacementOp
  from mo.graph.graph import Node, Graph
-from extensions.ops.elementwise import Add
  from mo.ops.scale_shift import ScaleShiftOp
  
  
diff --git a/model-optimizer/extensions/front/caffe/bn_test.py b/model-optimizer/extensions/front/caffe/bn_test.py

index 1958f4a..fd899d8 100644 (file)
--- a/model-optimizer/extensions/front/caffe/bn_test.py
+++ b/model-optimizer/extensions/front/caffe/bn_test.py
@@ -19,9 +19,9 @@ import numpy as np
  
  from extensions.front.caffe.bn import BNToScaleShift
  from mo.graph.graph import Node
+from mo.utils.ir_engine.compare_graphs import compare_graphs
  from mo.utils.unittest.extractors import FakeParam
  from mo.utils.unittest.graph import build_graph_with_edge_attrs, build_graph_with_attrs
-from mo.utils.ir_engine.compare_graphs import compare_graphs
  
  
  class FakeBNProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/correlation_ext_test.py b/model-optimizer/extensions/front/caffe/correlation_ext_test.py

index 469c1bc..8bb1943 100644 (file)
--- a/model-optimizer/extensions/front/caffe/correlation_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/correlation_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.correlation_ext import CorrelationFrontExtractor
  from extensions.ops.correlation import CorrelationOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeCorrProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py

index d882758..25bf849 100644 (file)
--- a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.ctcgreedydecoder_ext import CTCGreedyDecoderFrontExtractor
  from extensions.ops.ctc_greedy_decoder import CTCGreedyDecoderOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeCTCGreedyDecoderProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py b/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py

index 0f36668..33b4ef2 100644 (file)
--- a/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py
@@ -21,9 +21,9 @@ import numpy as np
  
  from extensions.front.caffe.data_augmentation_ext import DataAugmentationFrontExtractor
  from extensions.ops.data_augmentation import DataAugmentationOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeDAProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/elu.py b/model-optimizer/extensions/front/caffe/elu.py

index a799d55..eee09bc 100644 (file)
--- a/model-optimizer/extensions/front/caffe/elu.py
+++ b/model-optimizer/extensions/front/caffe/elu.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.activation_ops import Elu
  from mo.front.caffe.collect_attributes import collect_attributes
  from mo.front.extractor import FrontExtractorOp
-from extensions.ops.activation_ops import Elu
  
  
  class ELUFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/caffe/elu_test.py b/model-optimizer/extensions/front/caffe/elu_test.py

index cc3d64e..dc30e54 100644 (file)
--- a/model-optimizer/extensions/front/caffe/elu_test.py
+++ b/model-optimizer/extensions/front/caffe/elu_test.py
@@ -15,7 +15,6 @@
  """
  
  import unittest
-
  from unittest.mock import patch
  
  from extensions.front.caffe.elu import ELUFrontExtractor
diff --git a/model-optimizer/extensions/front/caffe/grn_ext_test.py b/model-optimizer/extensions/front/caffe/grn_ext_test.py

index 8aa502f..0499be2 100644 (file)
--- a/model-optimizer/extensions/front/caffe/grn_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/grn_ext_test.py
@@ -19,10 +19,10 @@ from unittest.mock import patch
  
  from extensions.front.caffe.grn_ext import GRNFrontExtractor
  from extensions.ops.grn import GRNOp
-from mo.utils.unittest.extractors import FakeMultiParam
-from mo.utils.unittest.graph import FakeNode
  from mo.front.common.partial_infer.elemental import copy_shape_infer
  from mo.ops.op import Op
+from mo.utils.unittest.extractors import FakeMultiParam
+from mo.utils.unittest.graph import FakeNode
  
  
  class FakeGRNProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/normalize_ext_test.py b/model-optimizer/extensions/front/caffe/normalize_ext_test.py

index 40e679e..ed76fcd 100644 (file)
--- a/model-optimizer/extensions/front/caffe/normalize_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/normalize_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.normalize_ext import NormalizeFrontExtractor
  from extensions.ops.normalize import NormalizeOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeNormalizeProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/power_file_ext_test.py b/model-optimizer/extensions/front/caffe/power_file_ext_test.py

index fe4366f..be4ace6 100644 (file)
--- a/model-optimizer/extensions/front/caffe/power_file_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/power_file_ext_test.py
@@ -19,10 +19,10 @@ from unittest.mock import patch
  
  from extensions.front.caffe.power_file_ext import PowerFileFrontExtractor
  from extensions.ops.power_file import PowerFileOp
-from mo.utils.unittest.extractors import FakeMultiParam
-from mo.utils.unittest.graph import FakeNode
  from mo.front.common.partial_infer.elemental import copy_shape_infer
  from mo.ops.op import Op
+from mo.utils.unittest.extractors import FakeMultiParam
+from mo.utils.unittest.graph import FakeNode
  
  
  class FakePowerFileProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/prelu_ext_test.py b/model-optimizer/extensions/front/caffe/prelu_ext_test.py

index 453a9a0..ce25828 100644 (file)
--- a/model-optimizer/extensions/front/caffe/prelu_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/prelu_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.prelu_ext import PreluFrontExtractor
  from extensions.ops.prelu import PreluOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakePReLUProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py

index e8f3e73..f3ba679 100644 (file)
--- a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py
@@ -21,9 +21,9 @@ import numpy as np
  
  from extensions.front.caffe.priorbox_clustered_ext import PriorBoxClusteredFrontExtractor
  from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakePriorBoxClusteredProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/priorbox_ext_test.py b/model-optimizer/extensions/front/caffe/priorbox_ext_test.py

index e6af49b..db1d8b0 100644 (file)
--- a/model-optimizer/extensions/front/caffe/priorbox_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_ext_test.py
@@ -21,9 +21,9 @@ import numpy as np
  
  from extensions.front.caffe.priorbox_ext import PriorBoxFrontExtractor
  from extensions.ops.priorbox import PriorBoxOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam, FakeParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeMultiParamListFields(FakeMultiParam):
diff --git a/model-optimizer/extensions/front/caffe/proposal_ext_test.py b/model-optimizer/extensions/front/caffe/proposal_ext_test.py

index 673df1d..051ea60 100644 (file)
--- a/model-optimizer/extensions/front/caffe/proposal_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/proposal_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.proposal_ext import ProposalFrontExtractor
  from extensions.ops.proposal import ProposalOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode, FakeAttr
-from mo.ops.op import Op
  
  
  class FakeProposalProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py b/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py

index 2e41950..76c8297 100644 (file)
--- a/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py
@@ -15,13 +15,12 @@
  """
  
  import unittest
-from unittest.mock import patch
  
  from extensions.front.caffe.proposal_python_ext import ProposalPythonFrontExtractor
  from extensions.ops.proposal import ProposalOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode, FakeAttr
-from mo.ops.op import Op
  
  
  class FakeProposalPythonProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py b/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py

index 00e0224..87db9dc 100644 (file)
--- a/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.regionyolo_ext import RegionYoloFrontExtractor
  from extensions.ops.regionyolo import RegionYoloOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeRegionYoloProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/relu6.py b/model-optimizer/extensions/front/caffe/relu6.py

index f77b997..bf6c29f 100644 (file)
--- a/model-optimizer/extensions/front/caffe/relu6.py
+++ b/model-optimizer/extensions/front/caffe/relu6.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.activation_ops import ReLU6
+from mo.front.extractor import FrontExtractorOp
  
  
  class ReLU6FrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py b/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py

index bff6b80..3775eb9 100644 (file)
--- a/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.reorgyolo_ext import ReorgYoloFrontExtractor
  from extensions.ops.reorgyolo import ReorgYoloOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeReorgYoloProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/simplernms_ext_test.py b/model-optimizer/extensions/front/caffe/simplernms_ext_test.py

index ade38d0..834e0ca 100644 (file)
--- a/model-optimizer/extensions/front/caffe/simplernms_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/simplernms_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.simplernms_ext import SimplerNMSFrontExtractor
  from extensions.ops.simplernms import SimplerNMSOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeSimplerNMSProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py b/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py

index 1cec6b1..8246e07 100644 (file)
--- a/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py
@@ -19,9 +19,9 @@ from unittest.mock import patch
  
  from extensions.front.caffe.spatial_transformer_ext import SpatialTransformFrontExtractor
  from extensions.ops.spatial_transformer import SpatialTransformOp
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import FakeNode
-from mo.ops.op import Op
  
  
  class FakeSpatialTransformProtoLayer:
diff --git a/model-optimizer/extensions/front/caffe/tanh.py b/model-optimizer/extensions/front/caffe/tanh.py

index 0811fad..668eb91 100644 (file)
--- a/model-optimizer/extensions/front/caffe/tanh.py
+++ b/model-optimizer/extensions/front/caffe/tanh.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.activation_ops import Tanh
+from mo.front.extractor import FrontExtractorOp
  
  
  class TanhFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/eltwise_n.py b/model-optimizer/extensions/front/eltwise_n.py

index b377072..55800f4 100644 (file)
--- a/model-optimizer/extensions/front/eltwise_n.py
+++ b/model-optimizer/extensions/front/eltwise_n.py
@@ -15,9 +15,9 @@
  """
  
  
+from extensions.ops.elementwise import Add, Maximum, Mul
  from mo.front.common.replacement import FrontReplacementOp
  from mo.graph.graph import Node, Graph
-from extensions.ops.elementwise import Add, Maximum, Mul
  
  
  class EltwiseNReplacement(FrontReplacementOp):
diff --git a/model-optimizer/extensions/front/eltwise_n_test.py b/model-optimizer/extensions/front/eltwise_n_test.py

index 21bcb02..dd0c5fd 100644 (file)
--- a/model-optimizer/extensions/front/eltwise_n_test.py
+++ b/model-optimizer/extensions/front/eltwise_n_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.eltwise_n import EltwiseNReplacement
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/global_pooling_to_reduce.py b/model-optimizer/extensions/front/global_pooling_to_reduce.py

index ac94760..bb92472 100644 (file)
--- a/model-optimizer/extensions/front/global_pooling_to_reduce.py
+++ b/model-optimizer/extensions/front/global_pooling_to_reduce.py
@@ -22,10 +22,8 @@ from extensions.ops.range import Range
  from extensions.ops.rank import Rank
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementPattern
-from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Graph
  from mo.ops.const import Const
-from mo.ops.unsqueeze import Unsqueeze
  
  
  class GlobalPoolingToReduce(FrontReplacementPattern):
diff --git a/model-optimizer/extensions/front/image_scaler.py b/model-optimizer/extensions/front/image_scaler.py

index 6aadf80..dc18baa 100644 (file)
--- a/model-optimizer/extensions/front/image_scaler.py
+++ b/model-optimizer/extensions/front/image_scaler.py
@@ -16,10 +16,10 @@
  
  import numpy as np
  
+from extensions.ops.elementwise import Mul, Add
  from mo.front.common.replacement import FrontReplacementOp
  from mo.graph.graph import Graph
  from mo.ops.const import Const
-from extensions.ops.elementwise import Mul, Add
  
  
  class ImageScaler(FrontReplacementOp):
diff --git a/model-optimizer/extensions/front/image_scaler_test.py b/model-optimizer/extensions/front/image_scaler_test.py

index 52776cc..8c71426 100644 (file)
--- a/model-optimizer/extensions/front/image_scaler_test.py
+++ b/model-optimizer/extensions/front/image_scaler_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.image_scaler import ImageScaler
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/instance_normalization.py b/model-optimizer/extensions/front/instance_normalization.py

index 1794096..8a22b7b 100644 (file)
--- a/model-optimizer/extensions/front/instance_normalization.py
+++ b/model-optimizer/extensions/front/instance_normalization.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node, Graph
  from extensions.ops.elementwise import Add, Mul
  from extensions.ops.mvn import MVN
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
  
  
  class InstanceNormalization(FrontReplacementOp):
diff --git a/model-optimizer/extensions/front/instance_normalization_test.py b/model-optimizer/extensions/front/instance_normalization_test.py

index bf3b26d..26814a2 100644 (file)
--- a/model-optimizer/extensions/front/instance_normalization_test.py
+++ b/model-optimizer/extensions/front/instance_normalization_test.py
@@ -19,8 +19,8 @@ import unittest
  import networkx as nx
  
  from extensions.front.instance_normalization import InstanceNormalization
-from mo.utils.unittest.graph import build_graph
  from mo.middle.pattern_match import node_match
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'input': {'kind': 'op', 'op': 'AnyOp'},
diff --git a/model-optimizer/extensions/front/kaldi/apply_counts_test.py b/model-optimizer/extensions/front/kaldi/apply_counts_test.py

index abeda4a..e654a65 100644 (file)
--- a/model-optimizer/extensions/front/kaldi/apply_counts_test.py
+++ b/model-optimizer/extensions/front/kaldi/apply_counts_test.py
@@ -17,6 +17,7 @@
  import unittest
  
  import numpy as np
+
  from extensions.front.kaldi.apply_counts import apply_biases_to_last_layer
  from mo.utils.ir_engine.compare_graphs import compare_graphs
  from mo.utils.unittest.graph import build_graph
diff --git a/model-optimizer/extensions/front/kaldi/logsoftmax.py b/model-optimizer/extensions/front/kaldi/logsoftmax.py

deleted file mode 100644 (file)

index 1cda00d..0000000
--- a/model-optimizer/extensions/front/kaldi/logsoftmax.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
- Copyright (C) 2018-2020 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-from extensions.ops.Log import LogOp
-from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
-from mo.ops.softmax import Softmax
-
-
-class LogsoftmaxFrontReplacer(FrontReplacementOp):
-    """
-    Replace LogSoftmax operation by Softmax -> Log.
-    """
-    op = "logsoftmaxcomponent"
-    enabled = True
-
-    def replace_op(self, graph: Graph, node: Node):
-        log = LogOp(graph, {'name': node.name + '/Log_'}).create_node()
-        softmax = Softmax(graph, {'axis': 1, 'name': node.name + '/SoftMax_'}).create_node()
-
-        # Connect nodes: input -> Softmax -> Log
-        node.in_port(0).get_connection().set_destination(softmax.in_port(0))
-        log.in_port(0).get_connection().set_source(softmax.out_port(0))
-
-        # The "explicit" version of the return value is: [(out_node.id, 0)])
-        return [log.id]
diff --git a/model-optimizer/extensions/front/kaldi/logsoftmax_component_ext.py b/model-optimizer/extensions/front/kaldi/logsoftmax_component_ext.py

new file mode 100644 (file)

index 0000000..8d4ddc6
--- /dev/null
+++ b/model-optimizer/extensions/front/kaldi/logsoftmax_component_ext.py
@@ -0,0 +1,28 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.ops.softmax import LogSoftmax
+from mo.front.extractor import FrontExtractorOp
+
+
+class LogSoftMaxComponentExtractor(FrontExtractorOp):
+    op = 'logsoftmaxcomponent'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        LogSoftmax.update_node_stat(node, {'axis': 1})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/kaldi/replace_eltwise_nin1.py b/model-optimizer/extensions/front/kaldi/replace_eltwise_nin1.py

index f79f6d1..2c7579f 100644 (file)
--- a/model-optimizer/extensions/front/kaldi/replace_eltwise_nin1.py
+++ b/model-optimizer/extensions/front/kaldi/replace_eltwise_nin1.py
@@ -18,7 +18,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.tf.graph_utils import create_op_with_const_inputs
  from mo.graph.graph import Node, Graph
-from mo.ops.const import Const
  from mo.ops.eltwise import Eltwise
  from mo.ops.eltwise_n import EltwiseN
  from mo.utils.error import Error
diff --git a/model-optimizer/extensions/front/kaldi/sigmoid_ext_test.py b/model-optimizer/extensions/front/kaldi/sigmoid_ext_test.py

index 882fbfa..8d30458 100644 (file)
--- a/model-optimizer/extensions/front/kaldi/sigmoid_ext_test.py
+++ b/model-optimizer/extensions/front/kaldi/sigmoid_ext_test.py
@@ -15,8 +15,8 @@
  """
  
  from extensions.front.kaldi.sigmoid_ext import SigmoidFrontExtractor
-from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
  from extensions.ops.activation_ops import Sigmoid
+from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
  from mo.ops.op import Op
  
  
diff --git a/model-optimizer/extensions/front/kaldi/tanh_ext_test.py b/model-optimizer/extensions/front/kaldi/tanh_ext_test.py

index 7f0195e..5f8df79 100644 (file)
--- a/model-optimizer/extensions/front/kaldi/tanh_ext_test.py
+++ b/model-optimizer/extensions/front/kaldi/tanh_ext_test.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
-from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
  from extensions.front.kaldi.tanh_component_ext import TanhFrontExtractor
  from extensions.ops.activation_ops import Tanh
+from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
  from mo.ops.op import Op
  
  
diff --git a/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py

index 2115cca..f5f236e 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py
+++ b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py
@@ -15,12 +15,12 @@
  """
  
  import unittest
+from argparse import Namespace
  
  import numpy as np
-from argparse import Namespace
  
-from mo.graph.graph import Node
  from extensions.front.mxnet.add_input_data_to_prior_boxes import AddInputDataToPriorBoxes
+from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
  
diff --git a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py

index bbe0e61..7a16959 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py
+++ b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.front.mxnet.check_softmax_node_inputs import CheckSoftmaxNodeInputs
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestCheckSoftmaxNodeInputs(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/conv_ext.py b/model-optimizer/extensions/front/mxnet/conv_ext.py

index ac42795..0b02c2d 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/conv_ext.py
+++ b/model-optimizer/extensions/front/mxnet/conv_ext.py
@@ -19,7 +19,7 @@ import numpy as np
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.convolution import Convolution
-from mo.front.common.extractors.utils import layout_attrs
+
  
  class ConvFrontExtractor(FrontExtractorOp):
      op = 'Convolution'
diff --git a/model-optimizer/extensions/front/mxnet/custom_test.py b/model-optimizer/extensions/front/mxnet/custom_test.py

index fda4174..47d69d9 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/custom_test.py
+++ b/model-optimizer/extensions/front/mxnet/custom_test.py
@@ -17,9 +17,9 @@
  import unittest
  
  from extensions.front.mxnet.custom import CustomFrontExtractorOp
-from mo.utils.unittest.graph import build_graph
  from mo.front.extractor import FrontExtractorOp, MXNetCustomFrontExtractorOp
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  attrs = {'test_attr': 1}
  
diff --git a/model-optimizer/extensions/front/mxnet/elementwise_ext.py b/model-optimizer/extensions/front/mxnet/elementwise_ext.py

index 81e8411..ee43443 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/elementwise_ext.py
+++ b/model-optimizer/extensions/front/mxnet/elementwise_ext.py
@@ -15,7 +15,8 @@
  """
  import numpy as np
  
-from extensions.ops.elementwise import Mul, Sub, Add, Maximum, Minimum, Div, Greater, GreaterEqual, Equal, Less, LessEqual, Pow, NotEqual, LogicalAnd, LogicalOr
+from extensions.ops.elementwise import Mul, Sub, Add, Maximum, Minimum, Div, Greater, GreaterEqual, Equal, Less, \
+    LessEqual, Pow, NotEqual, LogicalAnd, LogicalOr
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.graph.graph import Node
diff --git a/model-optimizer/extensions/front/mxnet/gather_test.py b/model-optimizer/extensions/front/mxnet/gather_test.py

index 69880e6..48a30d7 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/gather_test.py
+++ b/model-optimizer/extensions/front/mxnet/gather_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.mxnet.gather import GatherFrontReplacer
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class GatherTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/instance_norm_ext.py b/model-optimizer/extensions/front/mxnet/instance_norm_ext.py

index 2402afb..3cc61ab 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/instance_norm_ext.py
+++ b/model-optimizer/extensions/front/mxnet/instance_norm_ext.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-from mo.graph.graph import Node
  from extensions.ops.instance_normalization import InstanceNormalization
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
+from mo.graph.graph import Node
  
  
  class InstanceNormFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py b/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py

index feee647..05b1d9e 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py
+++ b/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
+from extensions.ops.DetectionOutput import DetectionOutput
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  
-from extensions.ops.DetectionOutput import DetectionOutput
  
  class MultiBoxDetectionOutputExtractor(FrontExtractorOp):
      op = '_contrib_MultiBoxDetection'
diff --git a/model-optimizer/extensions/front/mxnet/mx_reshape_reverse.py b/model-optimizer/extensions/front/mxnet/mx_reshape_reverse.py

index 6be834c..05cd662 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/mx_reshape_reverse.py
+++ b/model-optimizer/extensions/front/mxnet/mx_reshape_reverse.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Graph
-from mo.ops.const import Const
  from mo.ops.reshape import Reshape
  from mo.ops.shape import Shape
  from mo.ops.squeeze import Squeeze
diff --git a/model-optimizer/extensions/front/mxnet/null_ext.py b/model-optimizer/extensions/front/mxnet/null_ext.py

index 10da838..ec9c36c 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/null_ext.py
+++ b/model-optimizer/extensions/front/mxnet/null_ext.py
@@ -14,13 +14,9 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.parameter import Parameter
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
  from mo.ops.const import Const
-from mo.ops.pad import Pad
  
  
  class NullFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/mxnet/pad_ext.py b/model-optimizer/extensions/front/mxnet/pad_ext.py

index 918cf53..1ca39f9 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/pad_ext.py
+++ b/model-optimizer/extensions/front/mxnet/pad_ext.py
@@ -16,8 +16,8 @@
  
  import numpy as np
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.pad import AttributedPad
  
  
diff --git a/model-optimizer/extensions/front/mxnet/slice_like_ext.py b/model-optimizer/extensions/front/mxnet/slice_like_ext.py

index 1d17059..5d803fe 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/slice_like_ext.py
+++ b/model-optimizer/extensions/front/mxnet/slice_like_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.crop import Crop
  
  
diff --git a/model-optimizer/extensions/front/mxnet/softmax.py b/model-optimizer/extensions/front/mxnet/softmax.py

index 135ccc5..094a82d 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/softmax.py
+++ b/model-optimizer/extensions/front/mxnet/softmax.py
@@ -16,10 +16,10 @@
  
  import numpy as np
  
-from mo.graph.graph import Graph
  from extensions.ops.elementwise import Mul
-from mo.ops.const import Const
  from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
+from mo.ops.const import Const
  
  
  class SoftmaxFrontReplacementSubgraph(FrontReplacementSubgraph):
diff --git a/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py b/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py

index 0462a3b..ab2396f 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.softmax import Softmax
  
  
diff --git a/model-optimizer/extensions/front/mxnet/softmax_ext.py b/model-optimizer/extensions/front/mxnet/softmax_ext.py

index 52fa400..5fb37ee 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/softmax_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.softmax import Softmax
  
  
diff --git a/model-optimizer/extensions/front/mxnet/softmax_output_ext.py b/model-optimizer/extensions/front/mxnet/softmax_output_ext.py

index 8a5247f..c88c9e9 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/softmax_output_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_output_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.softmax import Softmax
  
  
diff --git a/model-optimizer/extensions/front/mxnet/squeeze_ext.py b/model-optimizer/extensions/front/mxnet/squeeze_ext.py

index c3423de..0dec2ff 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/squeeze_ext.py
+++ b/model-optimizer/extensions/front/mxnet/squeeze_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.squeeze import Squeeze
  
  
diff --git a/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py b/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py

index a4c0627..8d0f061 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_detection_output_replacer.py
@@ -21,8 +21,8 @@ from mo.front.common.replacement import FrontReplacementSubgraph
  from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Node, Graph
  from mo.middle.pattern_match import find_pattern_matches
-from mo.ops.result import Result
  from mo.ops.reshape import Reshape
+from mo.ops.result import Result
  
  
  class SsdPatternDetectionOutputReplacer(FrontReplacementSubgraph):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py

index cb2485c..104b128 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.front.mxnet.ssd_pattern_flatten_softmax_activation import SsdPatternFlattenSoftmaxActivation
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSsdPatternFlattenSoftmaxActivation(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py

index 80015eb..12061e9 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.front.mxnet.ssd_pattern_remove_flatten import SsdPatternRemoveFlatten
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSsdPatternRemoveFlatten(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py

index bb56b94..d2a7234 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.front.mxnet.ssd_pattern_remove_reshape import SsdPatternRemoveReshape
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSsdPatternRemoveReshape(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py

index 2eab60a..997ea99 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from extensions.front.mxnet.ssd_pattern_flatten_softmax_activation import SsdPatternFlattenSoftmaxActivation
  from extensions.front.mxnet.ssd_pattern_remove_flatten import SsdPatternRemoveFlatten
  from extensions.front.mxnet.ssd_pattern_remove_reshape import SsdPatternRemoveReshape
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py

index 8450c02..38a9d5a 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.front.mxnet.ssd_pattern_remove_transpose import SsdPatternRemoveTranspose
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSsdPatternRemoveTranspose(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py

index 0b3fb7a..b4a55a3 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
@@ -14,12 +14,10 @@
   limitations under the License.
  """
  
-import networkx as nx
-
-from mo.graph.graph import Graph
-from mo.front.common.replacement import FrontReplacementPattern
-from extensions.front.mxnet.ssd_pattern_remove_transpose import SsdPatternRemoveTranspose
  from extensions.front.mxnet.ssd_pattern_flatten_softmax_activation import SsdPatternFlattenSoftmaxActivation
+from extensions.front.mxnet.ssd_pattern_remove_transpose import SsdPatternRemoveTranspose
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
  
  
  class SsdReorderDetectionOutInputs(FrontReplacementPattern):
diff --git a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py

index d97a82a..91d7293 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.mxnet.ssd_reorder_detection_out_inputs import SsdReorderDetectionOutInputs
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSsdReorderDetectionOutInputs(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/mxnet/stack_ext.py b/model-optimizer/extensions/front/mxnet/stack_ext.py

index 86bc032..afbac1f 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/stack_ext.py
+++ b/model-optimizer/extensions/front/mxnet/stack_ext.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.pack import PackOp
  from mo.front.extractor import FrontExtractorOp
  from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
-from extensions.ops.pack import PackOp
  
  
  class StackFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/mxnet/zeros_ext.py b/model-optimizer/extensions/front/mxnet/zeros_ext.py

index 06441ee..e0fc4c0 100644 (file)
--- a/model-optimizer/extensions/front/mxnet/zeros_ext.py
+++ b/model-optimizer/extensions/front/mxnet/zeros_ext.py
@@ -14,11 +14,10 @@
   limitations under the License.
  """
  
-import ast
  import numpy as np
  
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.front.extractor import FrontExtractorOp
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
  from mo.ops.const import Const
  
  
diff --git a/model-optimizer/extensions/front/no_op_eraser.py b/model-optimizer/extensions/front/no_op_eraser.py

index 64a155f..cc131ad 100644 (file)
--- a/model-optimizer/extensions/front/no_op_eraser.py
+++ b/model-optimizer/extensions/front/no_op_eraser.py
@@ -16,8 +16,6 @@
  
  import logging as log
  
-import networkx as nx
-
  from mo.front.common.replacement import FrontReplacementSubgraph
  from mo.graph.graph import Graph
  
diff --git a/model-optimizer/extensions/front/onnx/affine_ext_test.py b/model-optimizer/extensions/front/onnx/affine_ext_test.py

index d9e5c7a..e3d874f 100644 (file)
--- a/model-optimizer/extensions/front/onnx/affine_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/affine_ext_test.py
@@ -20,8 +20,8 @@ import numpy as np
  import onnx
  
  from extensions.front.onnx.affine_ext import AffineFrontExtractor
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class AffineONNXExtractorTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/onnx/constant_fill_ext.py b/model-optimizer/extensions/front/onnx/constant_fill_ext.py

index 73cbe66..de3983f 100644 (file)
--- a/model-optimizer/extensions/front/onnx/constant_fill_ext.py
+++ b/model-optimizer/extensions/front/onnx/constant_fill_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.constant_fill import ConstantFill
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
diff --git a/model-optimizer/extensions/front/onnx/conv_ext.py b/model-optimizer/extensions/front/onnx/conv_ext.py

index dbe54a0..aebd258 100644 (file)
--- a/model-optimizer/extensions/front/onnx/conv_ext.py
+++ b/model-optimizer/extensions/front/onnx/conv_ext.py
@@ -16,12 +16,11 @@
  
  import numpy as np
  
-from mo.front.common.extractors.utils import layout_attrs
+from mo.front.common.partial_infer.utils import int64_array
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr, get_onnx_autopad
  from mo.ops.convolution import Convolution
  from mo.utils.error import Error
-from mo.front.common.partial_infer.utils import int64_array
  
  
  class ConvFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/onnx/conv_ext_test.py b/model-optimizer/extensions/front/onnx/conv_ext_test.py

index 2477fa6..85f1c42 100644 (file)
--- a/model-optimizer/extensions/front/onnx/conv_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/conv_ext_test.py
@@ -20,9 +20,9 @@ import numpy as np
  import onnx
  
  from extensions.front.onnx.conv_ext import ConvTransposeFrontExtractor
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
  from mo.utils.error import Error
+from mo.utils.unittest.graph import build_graph
  
  
  class ConvTransposeONNXExtractorTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/onnx/crop_ext_test.py b/model-optimizer/extensions/front/onnx/crop_ext_test.py

index add2794..2fd6a8e 100644 (file)
--- a/model-optimizer/extensions/front/onnx/crop_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/crop_ext_test.py
@@ -20,8 +20,8 @@ import numpy as np
  import onnx
  
  from extensions.front.onnx.crop_ext import CropFrontExtractor
-from mo.utils.unittest.graph import build_graph
  from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
  
  
  class CropONNXExtractorTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/onnx/deformable_conv_ext.py b/model-optimizer/extensions/front/onnx/deformable_conv_ext.py

index 9f0ed86..94b1eaf 100644 (file)
--- a/model-optimizer/extensions/front/onnx/deformable_conv_ext.py
+++ b/model-optimizer/extensions/front/onnx/deformable_conv_ext.py
@@ -19,8 +19,6 @@ import numpy as np
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr, get_onnx_autopad
  from mo.ops.deformable_convolution import DeformableConvolution
-from mo.utils.error import Error
-from mo.front.common.partial_infer.utils import int64_array
  
  
  class DeformableConvExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/onnx/detection_output_test.py b/model-optimizer/extensions/front/onnx/detection_output_test.py

index dddc5ac..267e801 100644 (file)
--- a/model-optimizer/extensions/front/onnx/detection_output_test.py
+++ b/model-optimizer/extensions/front/onnx/detection_output_test.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-import onnx
  import unittest
  
  import numpy as np
+import onnx
  
  from extensions.front.onnx.detection_output import DetectionOutputFrontExtractor
  from extensions.ops.DetectionOutput import DetectionOutput
diff --git a/model-optimizer/extensions/front/onnx/detectionoutput_ext.py b/model-optimizer/extensions/front/onnx/detectionoutput_ext.py

index 5904864..f72c80b 100644 (file)
--- a/model-optimizer/extensions/front/onnx/detectionoutput_ext.py
+++ b/model-optimizer/extensions/front/onnx/detectionoutput_ext.py
@@ -15,6 +15,7 @@
  """
  
  from math import log
+
  import numpy as np
  
  from extensions.ops.detectionoutput_onnx import ExperimentalDetectronDetectionOutput
diff --git a/model-optimizer/extensions/front/onnx/dropout_ext.py b/model-optimizer/extensions/front/onnx/dropout_ext.py

index 13d6112..6d23b3e 100644 (file)
--- a/model-optimizer/extensions/front/onnx/dropout_ext.py
+++ b/model-optimizer/extensions/front/onnx/dropout_ext.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.identity import IdentityOp
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
-from extensions.ops.identity import IdentityOp
  from mo.utils.error import Error
  
  
diff --git a/model-optimizer/extensions/front/onnx/image_scaler_ext.py b/model-optimizer/extensions/front/onnx/image_scaler_ext.py

index e504fe8..349ba47 100644 (file)
--- a/model-optimizer/extensions/front/onnx/image_scaler_ext.py
+++ b/model-optimizer/extensions/front/onnx/image_scaler_ext.py
@@ -17,8 +17,6 @@
  import numpy as np
  
  from mo.front.extractor import FrontExtractorOp
-from mo.ops.op import Op
-
  from mo.front.onnx.extractors.utils import onnx_attr
  
  
diff --git a/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py b/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py

index 3e6e0cd..4b1702d 100644 (file)
--- a/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py
+++ b/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py
@@ -26,7 +26,6 @@ from mo.graph.graph import Graph
  from mo.graph.graph import Node
  from mo.ops.reshape import Reshape
  
-
  input_fpn_heads = ('486', '454', '422', '390')
  
  
diff --git a/model-optimizer/extensions/front/onnx/non_zero_ext.py b/model-optimizer/extensions/front/onnx/non_zero_ext.py

new file mode 100644 (file)

index 0000000..6f3e97b
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/non_zero_ext.py
@@ -0,0 +1,29 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.non_zero import NonZero
+from mo.front.extractor import FrontExtractorOp
+
+
+class NonZeroExtractor(FrontExtractorOp):
+    op = 'NonZero'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        NonZero.update_node_stat(node, {'output_type': np.int64})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/person_detection_crossroad.json b/model-optimizer/extensions/front/onnx/person_detection_crossroad.json

new file mode 100644 (file)

index 0000000..8fbd555
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/person_detection_crossroad.json
@@ -0,0 +1,12 @@
+[
+  {
+    "custom_attributes":
+    {
+      "fpn_heads": ["634", "635", "636", "637"],
+      "ROI_feature_extractor_inputs": ["2475", "2834", "3192"],
+      "ROI_feature_extractor_outputs": ["2614", "2972", "3330"]
+    },
+    "id": "ONNXPersonDetectionCrossroadReplacement",
+    "match_kind": "general"
+  }
+]
diff --git a/model-optimizer/extensions/front/onnx/person_detection_crossroad_conversion.py b/model-optimizer/extensions/front/onnx/person_detection_crossroad_conversion.py

new file mode 100644 (file)

index 0000000..329f3dd
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/person_detection_crossroad_conversion.py
@@ -0,0 +1,56 @@
+"""
+ Copyright (c) 2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.roifeatureextractor_onnx import ExperimentalDetectronROIFeatureExtractor
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
+from mo.graph.graph import Graph, Node, rename_node
+
+
+class ONNXPersonDetectionCrossroadReplacement(FrontReplacementFromConfigFileGeneral):
+    """
+    Insert ExperimentalDetectronROIFeatureExtractor layers instead of sub-graphs of the model.
+    """
+    replacement_id = 'ONNXPersonDetectionCrossroadReplacement'
+
+    def transform_graph(self, graph: Graph, replacement_descriptions: dict):
+        fpn_heads = replacement_descriptions['fpn_heads']
+        for inp, out in zip(replacement_descriptions['ROI_feature_extractor_inputs'],
+                            replacement_descriptions['ROI_feature_extractor_outputs']):
+            insert_experimental_layers(graph, fpn_heads, inp, out)
+
+
+def insert_experimental_layers(graph: Graph, input_fpn_heads: list, inp: str, out: str):
+    old_output_node = Node(graph, out)
+    output_name = old_output_node.soft_get('name', old_output_node.id)
+    old_output_node_name = output_name + '/old'
+    rename_node(old_output_node, old_output_node_name)
+
+    input_fpn_head_nodes = [Node(graph, node_id) for node_id in input_fpn_heads]
+    fpn_roi_align = ExperimentalDetectronROIFeatureExtractor(graph, {'name': output_name,
+                                                                     'distribute_rois_between_levels': 1,
+                                                                     'image_id': 0,
+                                                                     'output_size': 7,
+                                                                     'preserve_rois_order': 1,
+                                                                     'pyramid_scales': int64_array(
+                                                                         [4, 8, 16, 32, 64]),
+                                                                     'sampling_ratio': 2, }).create_node()
+    rename_node(fpn_roi_align, output_name)
+    fpn_roi_align.in_port(0).connect(Node(graph, inp).out_port(0))
+    for ind, fpn_node in enumerate(input_fpn_head_nodes):
+        fpn_roi_align.in_port(ind + 1).connect(fpn_node.out_port(0))
+
+    old_output_node.out_port(0).get_connection().set_source(fpn_roi_align.out_port(0))
diff --git a/model-optimizer/extensions/front/onnx/pooling_ext.py b/model-optimizer/extensions/front/onnx/pooling_ext.py

index a9d1d70..b86fef2 100644 (file)
--- a/model-optimizer/extensions/front/onnx/pooling_ext.py
+++ b/model-optimizer/extensions/front/onnx/pooling_ext.py
@@ -79,13 +79,17 @@ class GlobalMaxPoolFrontExtractor(FrontExtractorOp):
  
  
  def common_onnx_pool_extractor(node):
+    kernel_shape = onnx_attr(node, 'kernel_shape', 'ints', default=None, dst_type=lambda x: np.array(x, dtype=np.int64))
+    final_kernel_shape = np.array([1, 1, *[x for x in kernel_shape]], dtype=np.int64) if kernel_shape is not None else None
+
      pads = onnx_attr(node, 'pads', 'ints', default=None, dst_type=lambda x: np.array(x, dtype=np.int64))
  
-    # Try to convert slightly incorrect models with insufficient pad parameters
-    if pads is not None and (pads.size == 2 or pads.size % 2 != 0):
-        log.warning(
-            'Node {} has pad = {} which is ill-formed -- it should consist of N%2==0 elements.'.format(node.name,
-                                                                                                       pads))
+    if kernel_shape is not None and pads is not None and kernel_shape.size * 2 != pads.size:
+        log.warning('Node {} has pad = {} which is ill-formed -- it should have even amount of elements.'.format(
+            node.soft_get('name', node.id), pads))
+
+        # Try to convert slightly incorrect models with insufficient pad parameters
+        assert pads.size * 2 == kernel_shape.size
          pads = np.concatenate([pads, pads])
          log.warning('Extended pads to {}'.format(pads))
  
@@ -102,10 +106,8 @@ def common_onnx_pool_extractor(node):
      final_strides = np.array([1, 1, *[x for x in strides]], dtype=np.int64) if strides is not None else None
  
      dilations = onnx_attr(node, 'dilations', 'ints', default=None, dst_type=lambda x: np.array(x, dtype=np.int64))
-    assert dilations is None, 'dilations attribute is not supported in node {}'.format(node.id)
-
-    kernel_shape = onnx_attr(node, 'kernel_shape', 'ints', default=None, dst_type=lambda x: np.array(x, dtype=np.int64))
-    final_kernel_shape = np.array([1, 1, *[x for x in kernel_shape]], dtype=np.int64) if kernel_shape is not None else None
+    assert dilations is None or np.all(dilations == 1),\
+        'Node {} has "dilations" attribute with values not equal to 1s which is not supported'.format(node.id)
  
      # exclude_pad = True only when count_include_pad == 0
      exclude_pad = onnx_attr(node, 'count_include_pad', 'i', default=0) == 0
diff --git a/model-optimizer/extensions/front/onnx/priorbox_clustered_ext_test.py b/model-optimizer/extensions/front/onnx/priorbox_clustered_ext_test.py

index 65652cf..118b04a 100644 (file)
--- a/model-optimizer/extensions/front/onnx/priorbox_clustered_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/priorbox_clustered_ext_test.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-import onnx
  import unittest
  
  import numpy as np
+import onnx
  
  from extensions.front.onnx.priorbox_clustered_ext import PriorBoxClusteredFrontExtractor
  from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
diff --git a/model-optimizer/extensions/front/onnx/priorbox_ext_test.py b/model-optimizer/extensions/front/onnx/priorbox_ext_test.py

index 66055a3..fdf96e5 100644 (file)
--- a/model-optimizer/extensions/front/onnx/priorbox_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/priorbox_ext_test.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-import onnx
  import unittest
  
  import numpy as np
+import onnx
  
  from extensions.front.onnx.priorbox_ext import PriorBoxFrontExtractor
  from extensions.ops.priorbox import PriorBoxOp
diff --git a/model-optimizer/extensions/front/onnx/quantize_ext.py b/model-optimizer/extensions/front/onnx/quantize_ext.py

index 63bfda6..7e60e5a 100644 (file)
--- a/model-optimizer/extensions/front/onnx/quantize_ext.py
+++ b/model-optimizer/extensions/front/onnx/quantize_ext.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.fakequantize import FakeQuantize
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
-from extensions.ops.fakequantize import FakeQuantize
  
  
  class FakeQuantizeFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/onnx/reverse_sequence_ext.py b/model-optimizer/extensions/front/onnx/reverse_sequence_ext.py

new file mode 100644 (file)

index 0000000..48cf743
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/reverse_sequence_ext.py
@@ -0,0 +1,36 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.reverse_sequence import ReverseSequence
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ReverseSequenceExtractor(FrontExtractorOp):
+    op = 'ReverseSequence'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        batch_axis = onnx_attr(node, 'batch_axis', 'i', default=1)
+        time_axis = onnx_attr(node, 'time_axis', 'i', default=0)
+
+        attrs = {
+            'batch_axis': batch_axis,
+            'seq_axis': time_axis,
+        }
+        ReverseSequence.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/roialign_ext.py b/model-optimizer/extensions/front/onnx/roialign_ext.py

new file mode 100644 (file)

index 0000000..bcf97fc
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/roialign_ext.py
@@ -0,0 +1,36 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.roialign import ROIAlign
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ROIAlignExtractor(FrontExtractorOp):
+    op = 'ROIAlign'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        mode = onnx_attr(node, 'mode', 's', default=b'avg').decode()
+        output_height = onnx_attr(node, 'output_height', 'i', default=1)
+        output_width = onnx_attr(node, 'output_width', 'i', default=1)
+        sampling_ratio = onnx_attr(node, 'sampling_ratio', 'i', default=0)
+        spatial_scale = onnx_attr(node, 'spatial_scale', 'f', default=1.0)
+
+        ROIAlign.update_node_stat(node, {'pooled_h': output_height, 'pooled_w': output_width,
+                                         'sampling_ratio': sampling_ratio, 'spatial_scale': spatial_scale,
+                                         'mode': mode})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/scatter_ext.py b/model-optimizer/extensions/front/onnx/scatter_ext.py

new file mode 100644 (file)

index 0000000..8d3be1c
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/scatter_ext.py
@@ -0,0 +1,41 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.scatter import ScatterElementsUpdate
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ScatterExtractor(FrontExtractorOp):
+    # deprecated ONNX operation
+    op = 'Scatter'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        axis = onnx_attr(node, 'axis', 'i', default=0)
+        ScatterElementsUpdate.update_node_stat(node, {'axis': axis})
+        return cls.enabled
+
+
+class ScatterElementsExtractor(FrontExtractorOp):
+    op = 'ScatterElements'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        axis = onnx_attr(node, 'axis', 'i', default=0)
+        ScatterElementsUpdate.update_node_stat(node, {'axis': axis})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/softmax_ext.py b/model-optimizer/extensions/front/onnx/softmax_ext.py

index 9cfedd8..f517f68 100644 (file)
--- a/model-optimizer/extensions/front/onnx/softmax_ext.py
+++ b/model-optimizer/extensions/front/onnx/softmax_ext.py
@@ -16,21 +16,26 @@
  
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
-from mo.ops.softmax import Softmax
+from mo.ops.softmax import LogSoftmax, Softmax
  
  
-class SoftmaxFrontExtractor(FrontExtractorOp):
+class SoftmaxExtractor(FrontExtractorOp):
      op = 'Softmax'
      enabled = True
  
      @classmethod
      def extract(cls, node):
          axis = onnx_attr(node, 'axis', 'i', default=1)
+        Softmax.update_node_stat(node, {'axis': axis})
+        return cls.enabled
+
  
-        attrs = {
-            'axis': axis
-        }
+class LogSoftmaxExtractor(FrontExtractorOp):
+    op = 'LogSoftmax'
+    enabled = True
  
-        # update the attributes of the node
-        Softmax.update_node_stat(node, attrs)
+    @classmethod
+    def extract(cls, node):
+        axis = onnx_attr(node, 'axis', 'i', default=1)
+        LogSoftmax.update_node_stat(node, {'axis': axis})
          return cls.enabled
diff --git a/model-optimizer/extensions/front/onnx/squeeze_ext.py b/model-optimizer/extensions/front/onnx/squeeze_ext.py

index b5049e3..c7ea1c7 100644 (file)
--- a/model-optimizer/extensions/front/onnx/squeeze_ext.py
+++ b/model-optimizer/extensions/front/onnx/squeeze_ext.py
@@ -16,9 +16,9 @@
  
  import numpy as np
  
-from mo.ops.squeeze import Squeeze
  from mo.front.extractor import FrontExtractorOp
  from mo.front.onnx.extractors.utils import onnx_attr
+from mo.ops.squeeze import Squeeze
  
  
  class SqueezeFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/onnx/transpose_ext_test.py b/model-optimizer/extensions/front/onnx/transpose_ext_test.py

index a09cef1..8030a52 100644 (file)
--- a/model-optimizer/extensions/front/onnx/transpose_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/transpose_ext_test.py
@@ -22,8 +22,8 @@ import onnx
  from generator import generator, generate
  
  from extensions.front.onnx.transpose_ext import TransposeFrontExtractor
-from mo.ops.op import Op
  from extensions.ops.transpose import Transpose
+from mo.ops.op import Op
  from mo.utils.unittest.extractors import PB
  
  
diff --git a/model-optimizer/extensions/front/reciprocal_test.py b/model-optimizer/extensions/front/reciprocal_test.py

index d99fbb3..4ba3c6f 100644 (file)
--- a/model-optimizer/extensions/front/reciprocal_test.py
+++ b/model-optimizer/extensions/front/reciprocal_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.reciprocal import ReciprocalReplacer
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/scatter_normalizer.py b/model-optimizer/extensions/front/scatter_normalizer.py

new file mode 100644 (file)

index 0000000..07afa84
--- /dev/null
+++ b/model-optimizer/extensions/front/scatter_normalizer.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+from mo.ops.const import Const
+
+
+class ScatterNormalizer(FrontReplacementPattern):
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for node in graph.get_op_nodes(is_scatter=True):
+            name = node.soft_get('name', node.id)
+            input_ports_count = len([port for port in node.in_ports().values() if not port.disconnected()])
+            has_axis = node.has_valid('axis')
+
+            if has_axis:
+                assert input_ports_count == 3, \
+                    '{} node {} has unexpected number of input ports {}'.format(node.op, name, input_ports_count)
+                const = Const(graph, {'name': name + '/axis', 'value': np.int64(node.axis)}).create_node()
+                node.add_input_port(3, skip_if_exist=True)
+                node.in_port(3).connect(const.out_port(0))
+                del node['axis']
+            else:
+                assert input_ports_count == 4, \
+                    '{} node {} has unexpected number of input ports {}'.format(node.op, name, input_ports_count)
diff --git a/model-optimizer/extensions/front/scatter_normalizer_test.py b/model-optimizer/extensions/front/scatter_normalizer_test.py

new file mode 100644 (file)

index 0000000..016cd99
--- /dev/null
+++ b/model-optimizer/extensions/front/scatter_normalizer_test.py
@@ -0,0 +1,80 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.scatter_normalizer import ScatterNormalizer
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph, result, connect, \
+    regular_op_with_empty_data
+
+nodes = {
+    **regular_op_with_empty_data('placeholder_1', {'type': 'Parameter'}),
+    **regular_op_with_empty_data('placeholder_2', {'type': 'Parameter'}),
+    **regular_op_with_empty_data('placeholder_3', {'type': 'Parameter'}),
+    **regular_op_with_empty_data('node', {'op': 'ScatterElementsUpdate', 'is_scatter': True}),
+    **regular_op_with_empty_data('axis', {'type': 'Const', 'value': None}),
+    **result(),
+}
+
+edges = [
+    *connect('placeholder_1', '0:node'),
+    *connect('placeholder_2', '1:node'),
+    *connect('placeholder_3', '2:node'),
+    *connect('node', 'output'),
+]
+
+
+class TestDiv(unittest.TestCase):
+    def test_ScatterElementsUpdate_has_axis_and_3_inputs(self):
+        graph = build_graph(nodes, edges, {'node': {'axis': 1}}, nodes_with_edges_only=True)
+        ScatterNormalizer().find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(nodes, [
+            *edges,
+            *connect('axis', '3:node'),
+        ], {'axis': {'value': np.int64(1)}}, nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_ScatterElementsUpdate_has_axis_and_4_inputs(self):
+        graph = build_graph(nodes, [
+            *edges,
+            *connect('axis', '3:node'),
+        ], {'node': {'axis': 1}, 'axis': {'value': np.int64(1)}}, nodes_with_edges_only=True)
+        self.assertRaises(AssertionError, ScatterNormalizer().find_and_replace_pattern, graph)
+
+    def test_ScatterElementsUpdate_has_no_axis_and_3_inputs(self):
+        graph = build_graph(nodes, edges, nodes_with_edges_only=True)
+        self.assertRaises(AssertionError, ScatterNormalizer().find_and_replace_pattern, graph)
+
+    def test_ScatterElementsUpdate_has_no_axis_and_4_inputs(self):
+        graph = build_graph(nodes, [
+            *edges,
+            *connect('axis', '3:node'),
+        ], {'axis': {'value': np.int64(1)}}, nodes_with_edges_only=True)
+        ScatterNormalizer().find_and_replace_pattern(graph)
+
+        graph_ref = build_graph(nodes, [
+            *edges,
+            *connect('axis', '3:node'),
+        ], {'axis': {'value': np.int64(1)}}, nodes_with_edges_only=True)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/softsign_replacer_test.py b/model-optimizer/extensions/front/softsign_replacer_test.py

index 5cdd165..ad3a7e4 100644 (file)
--- a/model-optimizer/extensions/front/softsign_replacer_test.py
+++ b/model-optimizer/extensions/front/softsign_replacer_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.softsign_replacer import SoftSign
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter', 'shape': np.array([1, 227, 227, 3])},
diff --git a/model-optimizer/extensions/front/squared_difference_test.py b/model-optimizer/extensions/front/squared_difference_test.py

index 3e6d7d1..26d095c 100644 (file)
--- a/model-optimizer/extensions/front/squared_difference_test.py
+++ b/model-optimizer/extensions/front/squared_difference_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.squared_difference import SquaredDifference
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter', 'shape': np.array([1, 227, 227, 3])},
diff --git a/model-optimizer/extensions/front/standalone_const_eraser.py b/model-optimizer/extensions/front/standalone_const_eraser.py

index ea053e7..c029096 100644 (file)
--- a/model-optimizer/extensions/front/standalone_const_eraser.py
+++ b/model-optimizer/extensions/front/standalone_const_eraser.py
@@ -16,8 +16,6 @@
  
  import logging as log
  
-import networkx as nx
-
  from mo.front.common.replacement import FrontReplacementSubgraph
  from mo.graph.graph import Graph
  
diff --git a/model-optimizer/extensions/front/tf/BlockLSTM.py b/model-optimizer/extensions/front/tf/BlockLSTM.py

index 02f5f2b..11d8b43 100644 (file)
--- a/model-optimizer/extensions/front/tf/BlockLSTM.py
+++ b/model-optimizer/extensions/front/tf/BlockLSTM.py
@@ -16,8 +16,6 @@
  
  import logging as log
  
-import networkx as nx
-
  from mo.front.common.replacement import FrontReplacementOp
  from mo.graph.graph import Node, Graph
  from mo.utils.error import Error
diff --git a/model-optimizer/extensions/front/tf/LoopCond_ext.py b/model-optimizer/extensions/front/tf/LoopCond_ext.py

index b2a41b8..9748252 100644 (file)
--- a/model-optimizer/extensions/front/tf/LoopCond_ext.py
+++ b/model-optimizer/extensions/front/tf/LoopCond_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from mo.front.common.partial_infer.elemental import single_output_infer
+from mo.front.extractor import FrontExtractorOp
  
  
  class LoopCondFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py

index bfab111..26d6e1e 100644 (file)
--- a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
+++ b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.front.standalone_const_eraser import StandaloneConstEraser
  from extensions.ops.DetectionOutput import DetectionOutput
  from mo.front.common.partial_infer.utils import int64_array
diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py

index 44bcb95..f46bb92 100644 (file)
--- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py
+++ b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+from typing import Optional
  
  from extensions.ops.elementwise import Mul
  from extensions.ops.interpolate import Interpolate
@@ -24,7 +25,6 @@ from mo.graph.graph import Graph, Node
  from mo.ops.const import Const
  from mo.ops.shape import Shape
  from mo.ops.strided_slice import StridedSlice
-from typing import Optional
  
  
  def get_concat_after_split(split: Node) -> Optional[Node]:
diff --git a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py

index 05ccae6..6eb9e5f 100644 (file)
--- a/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py
+++ b/model-optimizer/extensions/front/tf/SplitConcatPairToInterpolate_test.py
@@ -21,9 +21,8 @@ import numpy as np
  
  from extensions.front.tf.SplitConcatPairToInterpolate import SplitConcatPairToInterpolate
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
-
+from mo.utils.unittest.graph import build_graph
  
  graph_node_attrs_for_2d_spatial_case = {
          'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/tf/SwitchMergeOptimization.py b/model-optimizer/extensions/front/tf/SwitchMergeOptimization.py

index aea70f0..727a79c 100644 (file)
--- a/model-optimizer/extensions/front/tf/SwitchMergeOptimization.py
+++ b/model-optimizer/extensions/front/tf/SwitchMergeOptimization.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  from extensions.ops.select import Select
-from mo.graph.graph import Graph
  from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
  
  
  class SwitchMergeOptimization(FrontReplacementSubgraph):
diff --git a/model-optimizer/extensions/front/tf/SwitchMergeOptimization_test.py b/model-optimizer/extensions/front/tf/SwitchMergeOptimization_test.py

index 5d177a2..780a169 100644 (file)
--- a/model-optimizer/extensions/front/tf/SwitchMergeOptimization_test.py
+++ b/model-optimizer/extensions/front/tf/SwitchMergeOptimization_test.py
@@ -17,8 +17,8 @@ import unittest
  
  from extensions.front.tf.SwitchMergeOptimization import SwitchMergeOptimization
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class SwitchMergeOptimizationTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py b/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py

index 3b81887..6800574 100644 (file)
--- a/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py
+++ b/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.TensorArrayGather import TensorArrayGather
-from mo.front.tf.extractors.utils import tf_int_list, tf_tensor_shape
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_tensor_shape
  from mo.graph.graph import Node
  
  
diff --git a/model-optimizer/extensions/front/tf/assign_elimination.py b/model-optimizer/extensions/front/tf/assign_elimination.py

index 09ed9d1..e251dfd 100644 (file)
--- a/model-optimizer/extensions/front/tf/assign_elimination.py
+++ b/model-optimizer/extensions/front/tf/assign_elimination.py
@@ -19,7 +19,7 @@ import logging as log
  import networkx as nx
  
  from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.utils.error import Error
  
  
diff --git a/model-optimizer/extensions/front/tf/bucketize.py b/model-optimizer/extensions/front/tf/bucketize.py

index ac3dc7b..7cea253 100644 (file)
--- a/model-optimizer/extensions/front/tf/bucketize.py
+++ b/model-optimizer/extensions/front/tf/bucketize.py
@@ -16,11 +16,8 @@
  
  import logging as log
  
-import numpy as np
-
-from extensions.ops.bucketize import Bucketize
  from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Graph, Node
+from mo.graph.graph import Graph
  from mo.ops.const import Const
  
  
diff --git a/model-optimizer/extensions/front/tf/conv_ext.py b/model-optimizer/extensions/front/tf/conv_ext.py

index 8ef4706..82ae051 100644 (file)
--- a/model-optimizer/extensions/front/tf/conv_ext.py
+++ b/model-optimizer/extensions/front/tf/conv_ext.py
@@ -17,7 +17,7 @@ import numpy as np
  
  from mo.front.common.partial_infer.utils import convert_tf_padding_to_str, int64_array
  from mo.front.extractor import FrontExtractorOp
-from mo.front.tf.extractors.utils import tf_data_format_spatial, tf_data_format_channel, tf_data_format_batch, \
+from mo.front.tf.extractors.utils import tf_data_format_channel, tf_data_format_batch, \
      tf_int_list
  from mo.ops.convolution import Convolution
  from mo.ops.op import PermuteAttrs
@@ -89,7 +89,7 @@ def tf_create_attrs(node, input_feature_channel, output_feature_channel):
  
      attrs = {
          'type': 'Convolution',
-        'auto_pad': convert_tf_padding_to_str(node.pb.attr['padding']),
+        'auto_pad': convert_tf_padding_to_str(node.pb.attr['padding'].s.decode()),
          'bias_addable': True,
          'bias_term': False,
          'dilation': dilations,
diff --git a/model-optimizer/extensions/front/tf/deconv_ext.py b/model-optimizer/extensions/front/tf/deconv_ext.py

index 16206a3..adae35f 100644 (file)
--- a/model-optimizer/extensions/front/tf/deconv_ext.py
+++ b/model-optimizer/extensions/front/tf/deconv_ext.py
@@ -13,9 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  
-from mo.front.common.partial_infer.utils import convert_tf_padding_to_str, int64_array
+from mo.front.common.partial_infer.utils import convert_deconv_tf_padding_to_str, int64_array
  from mo.front.extractor import FrontExtractorOp
  from mo.front.tf.extractors.utils import tf_data_format_spatial, tf_data_format_channel, tf_data_format_batch, \
      tf_int_list
@@ -65,7 +64,7 @@ def tf_create_attrs(node, input_feature_channel, output_feature_channel):
      data_format = node.pb.attr["data_format"]
  
      return {
-        'auto_pad': convert_tf_padding_to_str(node.pb.attr['padding']),
+        'auto_pad': convert_deconv_tf_padding_to_str(node.pb.attr['padding'].s.decode()),
          'bias_addable': True,
          'bias_term': False,
          'spatial_dims': tf_data_format_spatial(data_format),
diff --git a/model-optimizer/extensions/front/tf/fake_const_ext.py b/model-optimizer/extensions/front/tf/fake_const_ext.py

index fcc6343..ed63e27 100644 (file)
--- a/model-optimizer/extensions/front/tf/fake_const_ext.py
+++ b/model-optimizer/extensions/front/tf/fake_const_ext.py
@@ -15,12 +15,10 @@
  """
  import logging as log
  
-import numpy as np
-
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.tf.extractors.utils import tf_dtype_extractor
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.ops.const import Const
  
  
diff --git a/model-optimizer/extensions/front/tf/mvn_unrolled_test.py b/model-optimizer/extensions/front/tf/mvn_unrolled_test.py

index 8109ce9..265b54e 100644 (file)
--- a/model-optimizer/extensions/front/tf/mvn_unrolled_test.py
+++ b/model-optimizer/extensions/front/tf/mvn_unrolled_test.py
@@ -16,10 +16,10 @@
  import unittest
  
  from extensions.front.tf.mvn_unrolled import MVNUnrolled
+from extensions.ops.mvn import MVN
  from mo.ops.op import Op
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
-from extensions.ops.mvn import MVN
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class MVNUnrolledMatchingTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/tf/pooling_ext.py b/model-optimizer/extensions/front/tf/pooling_ext.py

index 0500da7..028ed2f 100644 (file)
--- a/model-optimizer/extensions/front/tf/pooling_ext.py
+++ b/model-optimizer/extensions/front/tf/pooling_ext.py
@@ -13,12 +13,10 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  
  from mo.front.common.partial_infer.utils import convert_tf_padding_to_str
  from mo.front.extractor import FrontExtractorOp
-from mo.front.tf.extractors.utils import tf_data_format_spatial, tf_data_format_channel, tf_data_format_batch, \
-    tf_int_list
+from mo.front.tf.extractors.utils import tf_data_format_spatial, tf_int_list
  from mo.ops.pooling import Pooling
  
  
@@ -78,7 +76,7 @@ def create_pooling_attrs(node, pool_method):
      data_format = node.pb.attr["data_format"]
  
      attrs = {
-        'auto_pad': convert_tf_padding_to_str(node.pb.attr['padding']),
+        'auto_pad': convert_tf_padding_to_str(node.pb.attr['padding'].s.decode()),
          'window': tf_int_list(node.pb.attr["ksize"].list),
          'spatial_dims': tf_data_format_spatial(data_format),
          'pad': None,  # will be inferred when input shape is known
diff --git a/model-optimizer/extensions/front/tf/softmax_ext.py b/model-optimizer/extensions/front/tf/softmax_ext.py

index ff653d2..94c2b0f 100644 (file)
--- a/model-optimizer/extensions/front/tf/softmax_ext.py
+++ b/model-optimizer/extensions/front/tf/softmax_ext.py
@@ -15,10 +15,10 @@
  """
  
  from mo.front.extractor import FrontExtractorOp
-from mo.ops.softmax import Softmax
+from mo.ops.softmax import LogSoftmax, Softmax
  
  
-class SoftmaxFrontExtractor(FrontExtractorOp):
+class SoftmaxExtractor(FrontExtractorOp):
      op = 'Softmax'
      enabled = True
  
@@ -30,3 +30,17 @@ class SoftmaxFrontExtractor(FrontExtractorOp):
              axis = node.pb.attr['axis'].i
          Softmax.update_node_stat(node, {'axis': axis})
          return cls.enabled
+
+
+class LogSoftmaxExtractor(FrontExtractorOp):
+    op = 'LogSoftmax'
+    enabled = True
+
+    @classmethod
+    def extract(cls, node):
+        # the default value for the TF LogSoftmax is -1
+        axis = -1
+        if 'axis' in node.pb.attr:
+            axis = node.pb.attr['axis'].i
+        LogSoftmax.update_node_stat(node, {'axis': axis})
+        return cls.enabled
diff --git a/model-optimizer/extensions/front/tf/space_to_batch.py b/model-optimizer/extensions/front/tf/space_to_batch.py

index 58f6eed..520be08 100644 (file)
--- a/model-optimizer/extensions/front/tf/space_to_batch.py
+++ b/model-optimizer/extensions/front/tf/space_to_batch.py
@@ -13,9 +13,9 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-from extensions.ops.split import Split
  from extensions.ops.elementwise import Sub
  from extensions.ops.rank import Rank
+from extensions.ops.split import Split
  from extensions.ops.transpose import Transpose
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementPattern
diff --git a/model-optimizer/extensions/front/tf/space_to_depth_ext.py b/model-optimizer/extensions/front/tf/space_to_depth_ext.py

index f5044b4..efee2b4 100644 (file)
--- a/model-optimizer/extensions/front/tf/space_to_depth_ext.py
+++ b/model-optimizer/extensions/front/tf/space_to_depth_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.space_to_depth import SpaceToDepth
+from mo.front.extractor import FrontExtractorOp
  
  
  class SpaceToDepthFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py b/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py

index d859573..5edff71 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py
+++ b/model-optimizer/extensions/front/tf/sparse_fill_empty_rows_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.sparse_fill_empty_rows import SparseFillEmptyRows
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/front/tf/sparse_segment_mean_ext.py b/model-optimizer/extensions/front/tf/sparse_segment_mean_ext.py

index fc2440c..d2a656d 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_segment_mean_ext.py
+++ b/model-optimizer/extensions/front/tf/sparse_segment_mean_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.sparse_segment_mean import SparseSegmentMean
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/front/tf/sparse_segment_sqrtn_ext.py b/model-optimizer/extensions/front/tf/sparse_segment_sqrtn_ext.py

index 9b08e39..b0137d1 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_segment_sqrtn_ext.py
+++ b/model-optimizer/extensions/front/tf/sparse_segment_sqrtn_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.sparse_segment_sqrtn import SparseSegmentSqrtN
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/front/tf/sparse_segment_sum_ext.py b/model-optimizer/extensions/front/tf/sparse_segment_sum_ext.py

index 292b6e2..33a2e36 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_segment_sum_ext.py
+++ b/model-optimizer/extensions/front/tf/sparse_segment_sum_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.sparse_segment_sum import SparseSegmentSum
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/front/tf/sparse_to_dense_ext.py b/model-optimizer/extensions/front/tf/sparse_to_dense_ext.py

index a9b425e..b331775 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_to_dense_ext.py
+++ b/model-optimizer/extensions/front/tf/sparse_to_dense_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.sparse_to_dense import SparseToDense
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/front/tf/sparse_weighted_sum.py b/model-optimizer/extensions/front/tf/sparse_weighted_sum.py

index 1544e1f..2732212 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_weighted_sum.py
+++ b/model-optimizer/extensions/front/tf/sparse_weighted_sum.py
@@ -18,9 +18,7 @@ import logging as log
  
  from extensions.ops.sparse_weighted_sum import ExperimentalSparseWeightedSum
  from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
-from mo.ops.shape import Shape
+from mo.graph.graph import Graph
  
  
  class ExperimentalSparseWeightedSumFrontReplacer(FrontReplacementSubgraph):
diff --git a/model-optimizer/extensions/front/tf/sparse_weighted_sum_test.py b/model-optimizer/extensions/front/tf/sparse_weighted_sum_test.py

index 03d6abb..becbac3 100644 (file)
--- a/model-optimizer/extensions/front/tf/sparse_weighted_sum_test.py
+++ b/model-optimizer/extensions/front/tf/sparse_weighted_sum_test.py
@@ -16,10 +16,11 @@
  
  import unittest
  
-from extensions.front.tf.sparse_weighted_sum import ExperimentalSparseWeightedSumFrontReplacer, ExperimentalSparseWeightedSumFrontReplacer2
+from extensions.front.tf.sparse_weighted_sum import ExperimentalSparseWeightedSumFrontReplacer, \
+    ExperimentalSparseWeightedSumFrontReplacer2
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class ExperimentalSparseWeightedSumFrontReplacersTest(unittest.TestCase):
diff --git a/model-optimizer/extensions/front/tf/swish_test.py b/model-optimizer/extensions/front/tf/swish_test.py

index 31c7597..211e042 100644 (file)
--- a/model-optimizer/extensions/front/tf/swish_test.py
+++ b/model-optimizer/extensions/front/tf/swish_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.front.tf.swish import Swish
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': np.array([1, 227, 227, 3]), 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/front/tf/topk_ext.py b/model-optimizer/extensions/front/tf/topk_ext.py

index affd55a..7206994 100644 (file)
--- a/model-optimizer/extensions/front/tf/topk_ext.py
+++ b/model-optimizer/extensions/front/tf/topk_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.topk import TopK
+from mo.front.extractor import FrontExtractorOp
  
  
  class TopKExtractor(FrontExtractorOp):
diff --git a/model-optimizer/extensions/front/tf/unique_ext.py b/model-optimizer/extensions/front/tf/unique_ext.py

index 7ce426d..75f1d5b 100644 (file)
--- a/model-optimizer/extensions/front/tf/unique_ext.py
+++ b/model-optimizer/extensions/front/tf/unique_ext.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.unique import Unique
  from mo.front.extractor import FrontExtractorOp
  
diff --git a/model-optimizer/extensions/middle/ApplyPermutations.py b/model-optimizer/extensions/middle/ApplyPermutations.py

index 3cc12f0..da318a7 100644 (file)
--- a/model-optimizer/extensions/middle/ApplyPermutations.py
+++ b/model-optimizer/extensions/middle/ApplyPermutations.py
@@ -17,9 +17,9 @@ import logging as log
  
  import numpy as np
  
+from extensions.middle.ApplyNHWCtoNCHWpermutation import ApplyNHWCtoNCHWpermutation
  from extensions.middle.InsertLayoutPropagationTransposes import is_input_data_in_correct_layout, \
      is_output_data_in_correct_layout
-from extensions.middle.ApplyNHWCtoNCHWpermutation import ApplyNHWCtoNCHWpermutation
  from extensions.middle.pass_separator import PostMiddleStart
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
diff --git a/model-optimizer/extensions/middle/BiasAddBroadcasting.py b/model-optimizer/extensions/middle/BiasAddBroadcasting.py

index 909357c..8d7803f 100644 (file)
--- a/model-optimizer/extensions/middle/BiasAddBroadcasting.py
+++ b/model-optimizer/extensions/middle/BiasAddBroadcasting.py
@@ -13,6 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
+
  from extensions.middle.EltwiseChecker import EltwiseChecker
  from extensions.ops.elementwise import Add
  from mo.front.common.layout import get_features_dim
@@ -20,7 +22,6 @@ from mo.graph.graph import Graph
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
  from mo.ops.unsqueeze import Unsqueeze
-import numpy as np
  
  
  class BiasAddInputBroadcasting(MiddleReplacementPattern):
diff --git a/model-optimizer/extensions/middle/ConstSwitchResolver.py b/model-optimizer/extensions/middle/ConstSwitchResolver.py

index 1d50efa..8705f03 100644 (file)
--- a/model-optimizer/extensions/middle/ConstSwitchResolver.py
+++ b/model-optimizer/extensions/middle/ConstSwitchResolver.py
@@ -14,7 +14,7 @@
   limitations under the License.
  """
  
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.middle.passes.eliminate import remove_op_node_with_data_node
  from mo.middle.replacement import MiddleReplacementPattern
  
diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py

index 5552a15..441d50d 100644 (file)
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
@@ -21,8 +21,8 @@ import numpy as np
  from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/middle/CutInputHavingZeroDimFromConcat_test.py b/model-optimizer/extensions/middle/CutInputHavingZeroDimFromConcat_test.py

index e85d352..c3493d7 100644 (file)
--- a/model-optimizer/extensions/middle/CutInputHavingZeroDimFromConcat_test.py
+++ b/model-optimizer/extensions/middle/CutInputHavingZeroDimFromConcat_test.py
@@ -14,14 +14,14 @@
   limitations under the License.
  """
  
+import unittest
+
  import numpy as np
  
-import unittest
  from extensions.middle.CutInputHavingZeroDimFromConcat import CutInputHavingZeroDimFromConcat
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
-
+from mo.utils.unittest.graph import build_graph
  
  node_attrs_for_the_case_when_there_are_no_zero_shape_constants = {
      'const0': {
diff --git a/model-optimizer/extensions/middle/Deconvolution3rdInputNormalization.py b/model-optimizer/extensions/middle/Deconvolution3rdInputNormalization.py

index 32ce713..f0f0901 100644 (file)
--- a/model-optimizer/extensions/middle/Deconvolution3rdInputNormalization.py
+++ b/model-optimizer/extensions/middle/Deconvolution3rdInputNormalization.py
@@ -14,14 +14,12 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from extensions.ops.gather import Gather
  from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
  from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import PermuteAttrs
  from mo.ops.const import Const
-from mo.graph.graph import Graph, rename_nodes
+from mo.ops.op import PermuteAttrs
  
  
  class Deconvolution3rdInputNormalization(MiddleReplacementPattern):
diff --git a/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py b/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py

index 918317a..1e2bf92 100644 (file)
--- a/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py
+++ b/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py
@@ -20,7 +20,6 @@ from mo.graph.graph import Graph, add_opoutput
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
  from mo.ops.op import Op
-from mo.ops.reshape import Reshape
  from mo.ops.squeeze import Squeeze
  from mo.ops.unsqueeze import Unsqueeze
  
diff --git a/model-optimizer/extensions/middle/GroupNorm.py b/model-optimizer/extensions/middle/GroupNorm.py

index b9c27ab..4e7ed9b 100644 (file)
--- a/model-optimizer/extensions/middle/GroupNorm.py
+++ b/model-optimizer/extensions/middle/GroupNorm.py
@@ -16,16 +16,14 @@
  
  from typing import Dict
  
-import logging as log
-
  import numpy as np
  
+from extensions.ops.elementwise import Mul, Add
  from extensions.ops.mvn import MVN
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
-from extensions.ops.elementwise import Mul, Add
  from mo.ops.reshape import Reshape
  from mo.ops.shape import Shape
  from mo.utils.shape import node_to_get_spatial_dimensions_value, node_to_get_features_dimension_value, \
diff --git a/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py b/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py

index b70cf87..e9b7870 100644 (file)
--- a/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py
+++ b/model-optimizer/extensions/middle/InsertLayoutPropagationTransposes.py
@@ -16,8 +16,8 @@
  
  from extensions.middle.pass_separator import PostMiddleStart
  from extensions.ops.transpose import Transpose
-from mo.middle.replacement import MiddleReplacementPattern
  from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
  from mo.ops.op import PermuteAttrs
  
diff --git a/model-optimizer/extensions/middle/InsertSelect_test.py b/model-optimizer/extensions/middle/InsertSelect_test.py

index d670705..72adc17 100644 (file)
--- a/model-optimizer/extensions/middle/InsertSelect_test.py
+++ b/model-optimizer/extensions/middle/InsertSelect_test.py
@@ -13,13 +13,14 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  import unittest
  
+import numpy as np
+
  from extensions.middle.InsertSelect import AddSelectBeforeMemoryNodePattern
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class InsertSelectTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate.py b/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate.py

index 86e0b5f..ef83275 100644 (file)
--- a/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate.py
+++ b/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate.py
@@ -15,13 +15,13 @@
  """
  
  import logging as log
+from typing import List
  
  from extensions.ops.interpolate import Interpolate
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
-from typing import List
  
  
  def node_has_one_consumer(node: Node) -> bool:
diff --git a/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate_test.py b/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate_test.py

index a5a9bf8..6e6390c 100644 (file)
--- a/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate_test.py
+++ b/model-optimizer/extensions/middle/InterpolateSequenceToInterpolate_test.py
@@ -19,9 +19,8 @@ import unittest
  
  from extensions.middle.InterpolateSequenceToInterpolate import InterpolateSequenceToInterpolate
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
-
+from mo.utils.unittest.graph import build_graph
  
  graph_node_attrs_for_2d_case_1 = {
      'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/middle/L2NormToNorm_test.py b/model-optimizer/extensions/middle/L2NormToNorm_test.py

index 20075f2..5d130e9 100644 (file)
--- a/model-optimizer/extensions/middle/L2NormToNorm_test.py
+++ b/model-optimizer/extensions/middle/L2NormToNorm_test.py
@@ -15,7 +15,9 @@
  """
  
  import unittest
+
  import numpy as np
+
  from extensions.middle.L2NormToNorm import L2NormToNorm
  from mo.utils.ir_engine.compare_graphs import compare_graphs
  from mo.utils.unittest.graph import build_graph_with_attrs
diff --git a/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py b/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py

index f8ced45..04a759c 100644 (file)
--- a/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py
+++ b/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py
@@ -22,7 +22,6 @@ from mo.graph.graph import Graph, add_opoutput
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
  from mo.ops.op import Op
-from mo.ops.reshape import Reshape
  from mo.ops.squeeze import Squeeze
  from mo.ops.unsqueeze import Unsqueeze
  
diff --git a/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py b/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py

index 7e38104..7802c7d 100644 (file)
--- a/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py
+++ b/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py
@@ -15,7 +15,6 @@
  """
  import numpy as np
  
-from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.concat import Concat
diff --git a/model-optimizer/extensions/middle/MXTileReplacer.py b/model-optimizer/extensions/middle/MXTileReplacer.py

index bd42e76..58a7283 100644 (file)
--- a/model-optimizer/extensions/middle/MXTileReplacer.py
+++ b/model-optimizer/extensions/middle/MXTileReplacer.py
@@ -13,24 +13,26 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.tf.graph_utils import create_op_node_with_second_input
  from mo.graph.graph import Graph
  from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.const import Const
-from mo.ops.reshape import Reshape
-from mo.ops.tile import Tile
+from mo.ops.unsqueeze import Unsqueeze
  
  
  class MXTileReplacer(MiddleReplacementPattern):
      """
-        This class Reshape Tile operation if len input shape < output shape.
+        Aligns Tile operation from MxNet framework with OpenVINO Tile
+
+        MxNet has no restrictions for `tile_array` input of `Tile` operation.
+        If len(tile_array) > rank(data), this transformation will insert Unsqueeze before Tile operation,
+        because in this case output_shape > input_shape
+
+        DOC link: https://beta.mxnet.io/api/ndarray/_autogen/mxnet.ndarray.tile.html#mxnet.ndarray.tile
      """
  
      enabled = True
-    force_clean_up = True
  
      def pattern(self):
          return dict(
@@ -42,16 +44,17 @@ class MXTileReplacer(MiddleReplacementPattern):
  
      @staticmethod
      def replace_pattern(graph: Graph, match: dict):
-        mxtile = match['tile']
-
-        in_shape = mxtile.in_port(0).data.get_shape()
-        out_shape = mxtile.out_node(0).shape
-
-        tile_array_diff = (len(out_shape) - len(in_shape))
-        if tile_array_diff > 0:
-            reshape_shape = np.copy(in_shape)
-            for i in range(tile_array_diff):
-                reshape_shape = np.insert(in_shape, 0, 1, axis=0)
-            reshape_node = create_op_node_with_second_input(graph, Reshape, int64_array(reshape_shape), dict(name=mxtile.id + "/Reshape"))
-            mxtile.in_port(0).get_source().get_connection().set_destination(reshape_node.in_port(0))
-            reshape_node.out_port(0).get_connection().set_destination(mxtile.in_port(0))
+        node = match['tile']
+        name = node.soft_get('name', node.id)
+        in_shape = node.in_port(0).data.get_shape()
+        out_shape = node.out_port(0).data.get_shape()
+
+        tile_array_diff = len(out_shape) - len(in_shape)
+        if tile_array_diff == 0:
+            return
+        assert tile_array_diff > 0,\
+            'Unexpected difference between rank(input) and rank(output) for node {}'.format(name)
+        unsqueeze_dims = int64_array(range(tile_array_diff))
+        unsqueeze = create_op_node_with_second_input(graph, Unsqueeze, unsqueeze_dims,
+                                                     dict(name=name + '/Unsqueeze', override_output_shape=True))
+        node.in_port(0).get_connection().insert_node(unsqueeze)
diff --git a/model-optimizer/extensions/middle/MXTileReplacer_test.py b/model-optimizer/extensions/middle/MXTileReplacer_test.py

new file mode 100644 (file)

index 0000000..5226dfe
--- /dev/null
+++ b/model-optimizer/extensions/middle/MXTileReplacer_test.py
@@ -0,0 +1,118 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+from extensions.middle.MXTileReplacer import MXTileReplacer
+from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+    'placeholder': {'kind': 'op', 'op': 'Parameter'},
+    'placeholder_data': {'kind': 'data'},
+    'tile': {'kind': 'op', 'op': 'Tile'},
+    'tile_data': {'kind': 'data', 'shape': int64_array([1, 1, 1, 1])},
+    'result': {'kind': 'op', 'op': 'Result'},
+
+    'unsqueeze_1': {'kind': 'op', 'op': 'Unsqueeze'},
+    'unsqueeze_1_data': {'kind': 'data'},
+    'unsqueeze_1_const': {'kind': 'op', 'op': 'Const'},
+    'unsqueeze_1_const_data': {'kind': 'data'},
+}
+
+
+class MXTileReplacerTest(unittest.TestCase):
+
+    def test_insert_one_unsqueeze(self):
+        graph = build_graph(
+            nodes_attributes,
+            [
+                ('placeholder', 'placeholder_data'),
+                ('placeholder_data', 'tile'),
+                ('tile', 'tile_data'),
+                ('tile_data', 'result')
+            ],
+            {
+                'placeholder_data': {'shape': int64_array([1, 1, 1])}
+            },
+            nodes_with_edges_only=True
+        )
+
+        ref_graph = build_graph(
+            nodes_attributes,
+            [
+                ('placeholder', 'placeholder_data'),
+                ('placeholder_data', 'unsqueeze_1', {'in': 0}),
+                ('unsqueeze_1_const', 'unsqueeze_1_const_data'),
+                ('unsqueeze_1_const_data', 'unsqueeze_1', {'in': 1}),
+                ('unsqueeze_1', 'unsqueeze_1_data'),
+                ('unsqueeze_1_data', 'tile'),
+                ('tile', 'tile_data'),
+                ('tile_data', 'result')
+            ],
+            {
+                'placeholder_data': {'shape': int64_array([1, 1, 1])},
+                'unsqueeze_1_const_data': {'value': int64_array([0])}
+            },
+            nodes_with_edges_only=True
+        )
+
+        MXTileReplacer().find_and_replace_pattern(graph)
+        graph.clean_up()
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_insert_two_unsqueezes(self):
+        graph = build_graph(
+            nodes_attributes,
+            [
+                ('placeholder', 'placeholder_data'),
+                ('placeholder_data', 'tile'),
+                ('tile', 'tile_data'),
+                ('tile_data', 'result')
+            ],
+            {
+                'placeholder_data': {'shape': int64_array([1, 1])}
+            },
+            nodes_with_edges_only=True
+        )
+
+        ref_graph = build_graph(
+            nodes_attributes,
+            [
+                ('placeholder', 'placeholder_data'),
+                ('placeholder_data', 'unsqueeze_1', {'in': 0}),
+                ('unsqueeze_1_const', 'unsqueeze_1_const_data'),
+                ('unsqueeze_1_const_data', 'unsqueeze_1', {'in': 1}),
+                ('unsqueeze_1', 'unsqueeze_1_data'),
+                ('unsqueeze_1_data', 'tile'),
+                ('tile', 'tile_data'),
+                ('tile_data', 'result')
+            ],
+            {
+                'placeholder_data': {'shape': int64_array([1, 1])},
+                'unsqueeze_1_const_data': {'value': int64_array([0, 1])}
+            },
+            nodes_with_edges_only=True
+        )
+
+        MXTileReplacer().find_and_replace_pattern(graph)
+        graph.clean_up()
+
+        (flag, resp) = compare_graphs(graph, ref_graph, 'placeholder', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py b/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py

index c76ff48..52f16f7 100644 (file)
--- a/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py
+++ b/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.MinimumMiddleReplacer import MinimumMiddleReplacer
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/extensions/middle/ReluQuantizeFuse.py b/model-optimizer/extensions/middle/ReluQuantizeFuse.py

index 91b60fe..ca89b4c 100644 (file)
--- a/model-optimizer/extensions/middle/ReluQuantizeFuse.py
+++ b/model-optimizer/extensions/middle/ReluQuantizeFuse.py
@@ -16,6 +16,7 @@
  
  import logging as log
  from typing import Dict
+
  import numpy as np
  
  from extensions.middle.BinarizeWeightsM1P1 import BinarizeWeightsM1P1
diff --git a/model-optimizer/extensions/middle/ReluQuantizeFuse_test.py b/model-optimizer/extensions/middle/ReluQuantizeFuse_test.py

index 2a8960b..0af60ef 100644 (file)
--- a/model-optimizer/extensions/middle/ReluQuantizeFuse_test.py
+++ b/model-optimizer/extensions/middle/ReluQuantizeFuse_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.ReluQuantizeFuse import ReluQuantizeFuse, ReluFakeQuantizeMark
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes = {
      # input
diff --git a/model-optimizer/extensions/middle/RemoveDuplicationMemory_test.py b/model-optimizer/extensions/middle/RemoveDuplicationMemory_test.py

index c263e68..9b8de7c 100644 (file)
--- a/model-optimizer/extensions/middle/RemoveDuplicationMemory_test.py
+++ b/model-optimizer/extensions/middle/RemoveDuplicationMemory_test.py
@@ -16,8 +16,8 @@
  import unittest
  
  from extensions.middle.RemoveDuplicationMemory import RemoveMemoryDuplicationPattern, MergeNeighborSplicePattern
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class RemoveMemoryDuplicationPatternTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/RemoveUselessConcatSplit_test.py b/model-optimizer/extensions/middle/RemoveUselessConcatSplit_test.py

index 15f5ecf..7f91a7e 100644 (file)
--- a/model-optimizer/extensions/middle/RemoveUselessConcatSplit_test.py
+++ b/model-optimizer/extensions/middle/RemoveUselessConcatSplit_test.py
@@ -17,8 +17,8 @@ import unittest
  
  from extensions.middle.RemoveUselessConcatSplit import RemoveUselessConcatSplitPattern
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class RemoveUselessConcatSplitTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/RemoveUselessCrops_test.py b/model-optimizer/extensions/middle/RemoveUselessCrops_test.py

index b43832b..54554fc 100644 (file)
--- a/model-optimizer/extensions/middle/RemoveUselessCrops_test.py
+++ b/model-optimizer/extensions/middle/RemoveUselessCrops_test.py
@@ -16,8 +16,8 @@
  import unittest
  
  from extensions.middle.RemoveUselessCrops import RemoveUselessCropsPattern
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class RemoveUselessCropsPatternTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice_test.py b/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice_test.py

index 89d341f..c00bd53 100644 (file)
--- a/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice_test.py
+++ b/model-optimizer/extensions/middle/ReplaceMemoryOffsetWithSplice_test.py
@@ -17,8 +17,8 @@ import unittest
  
  from extensions.middle.ReplaceMemoryOffsetWithSplice import ReplaceMemoryOffsetNodePattern
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class ReplaceMemoryOffsetNodePatternTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py b/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py

index 9681918..4bb01de 100644 (file)
--- a/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py
+++ b/model-optimizer/extensions/middle/ReplacePNormNodePattern_test.py
@@ -16,8 +16,8 @@
  import unittest
  
  from extensions.middle.ReplacePNorm import ReplacePNormNodePattern
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class ReplacePNormNodePatternTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/ReplaceSpliceNodePattern_test.py b/model-optimizer/extensions/middle/ReplaceSpliceNodePattern_test.py

index 99cd084..f892c52 100644 (file)
--- a/model-optimizer/extensions/middle/ReplaceSpliceNodePattern_test.py
+++ b/model-optimizer/extensions/middle/ReplaceSpliceNodePattern_test.py
@@ -17,8 +17,8 @@ import unittest
  
  from extensions.middle.ReplaceSpliceNodePattern import ReplaceSpliceNodePattern
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class ReplaceSpliceNodePatternTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/ScaleInput_test.py b/model-optimizer/extensions/middle/ScaleInput_test.py

index 4a42caa..1148843 100644 (file)
--- a/model-optimizer/extensions/middle/ScaleInput_test.py
+++ b/model-optimizer/extensions/middle/ScaleInput_test.py
@@ -19,8 +19,8 @@ from argparse import Namespace
  import numpy as np
  
  from extensions.middle.ScaleInput import ScaleInput
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
                      'node_1_data': {'value': None, 'kind': 'data', 'data_type': None},
diff --git a/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py b/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py

index 6b88d51..0be30a4 100644 (file)
--- a/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py
+++ b/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.SharedWeightsDuplication import SharedWeightsDuplication
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'const': {'shape': None, 'type': 'Const', 'kind': 'op', 'op': 'Const'},
diff --git a/model-optimizer/extensions/middle/SliceConverter.py b/model-optimizer/extensions/middle/SliceConverter.py

index 2f38cd1..ba6e7e1 100644 (file)
--- a/model-optimizer/extensions/middle/SliceConverter.py
+++ b/model-optimizer/extensions/middle/SliceConverter.py
@@ -17,7 +17,7 @@
  import numpy as np
  
  from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Graph, Node, rename_node, rename_nodes
+from mo.graph.graph import Graph, Node, rename_nodes
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.ops.const import Const
  from mo.ops.crop import Crop
diff --git a/model-optimizer/extensions/middle/TensorIteratorBackEdge.py b/model-optimizer/extensions/middle/TensorIteratorBackEdge.py

index 9998e5a..d3397fc 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorBackEdge.py
+++ b/model-optimizer/extensions/middle/TensorIteratorBackEdge.py
@@ -47,7 +47,6 @@ class BackEdgesMatching(MiddleReplacementPattern):
      graph_condition = [lambda graph: graph.graph['is_cyclic']]
  
      def run_after(self):
-        from extensions.middle.TensorIteratorCondition import SimpleConditionMatcher
          return [DynamicDecoderConditionMatcher]
  
      def run_before(self):
diff --git a/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py b/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py

index 85fc190..9abd5d2 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py
@@ -17,8 +17,8 @@
  import unittest
  
  from extensions.middle.TensorIteratorBackEdge import BackEdgesMatching
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class BackEdgesMatchingTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/TensorIteratorCondition_test.py b/model-optimizer/extensions/middle/TensorIteratorCondition_test.py

index dc8ac92..81838e2 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorCondition_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorCondition_test.py
@@ -18,8 +18,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.TensorIteratorCondition import LoopConditionMatcher
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class TensorIteratorConditionTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/TensorIteratorInput_test.py b/model-optimizer/extensions/middle/TensorIteratorInput_test.py

index ae66ecf..2279ae8 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorInput_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorInput_test.py
@@ -18,8 +18,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.TensorIteratorInput import SmartInputMatcher, SimpleInputMatcher, BackEdgeSimpleInputMatcher
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class SmartInputMatcherTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py b/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py

index 0096d5b..d2f7f41 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py
+++ b/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py
@@ -14,13 +14,13 @@
   limitations under the License.
  """
  
+from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize
  from extensions.middle.TF_lstm_cell_to_generic import TensorFlowLSTMtoGeneric
  from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
  from mo.graph.graph import Graph
  from mo.middle.pattern_match import find_isomorphisms
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.utils.error import Error
-from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize
  
  
  class TensorIteratorLSTM(MiddleReplacementPattern):
diff --git a/model-optimizer/extensions/middle/TensorIteratorMerge.py b/model-optimizer/extensions/middle/TensorIteratorMerge.py

index 9181015..d62d0dd 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorMerge.py
+++ b/model-optimizer/extensions/middle/TensorIteratorMerge.py
@@ -15,9 +15,9 @@
  """
  
  from collections import deque
+from copy import deepcopy
  
  import numpy as np
-from copy import deepcopy
  
  from extensions.ops.tensor_iterator import TensorIterator
  from mo.graph.graph import Node, Graph, add_opoutput
diff --git a/model-optimizer/extensions/middle/TensorIteratorOutput_test.py b/model-optimizer/extensions/middle/TensorIteratorOutput_test.py

index a800eda..470a76c 100644 (file)
--- a/model-optimizer/extensions/middle/TensorIteratorOutput_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorOutput_test.py
@@ -18,8 +18,8 @@ import unittest
  import numpy as np
  
  from extensions.middle.TensorIteratorOutput import SmartOutputMatcher
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class SmartOutputMatcherTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/quantize_fuses_test.py b/model-optimizer/extensions/middle/quantize_fuses_test.py

index 82e18aa..0d063ba 100644 (file)
--- a/model-optimizer/extensions/middle/quantize_fuses_test.py
+++ b/model-optimizer/extensions/middle/quantize_fuses_test.py
@@ -20,8 +20,8 @@ import numpy as np
  
  from extensions.middle.quantize_fuses import FakeQuantizeFuse
  from mo.front.common.partial_infer.eltwise import eltwise_infer
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes = {
      'placeholder': {'kind': 'op', 'op': 'Placeholder'},
diff --git a/model-optimizer/extensions/middle/sparse_reshape.py b/model-optimizer/extensions/middle/sparse_reshape.py

index 847401f..eeaa381 100644 (file)
--- a/model-optimizer/extensions/middle/sparse_reshape.py
+++ b/model-optimizer/extensions/middle/sparse_reshape.py
@@ -16,8 +16,7 @@
  
  import numpy as np
  
-from extensions.ops.identity import IdentityOp
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.middle.passes.eliminate import merge_data_nodes
  from mo.middle.replacement import MiddleReplacementPattern
  from mo.utils.error import Error
diff --git a/model-optimizer/extensions/middle/sparse_reshape_test.py b/model-optimizer/extensions/middle/sparse_reshape_test.py

index 47d0a00..d99c202 100644 (file)
--- a/model-optimizer/extensions/middle/sparse_reshape_test.py
+++ b/model-optimizer/extensions/middle/sparse_reshape_test.py
@@ -17,8 +17,8 @@ import unittest
  
  from extensions.middle.sparse_reshape import SparseReshapeMiddleReplacer
  from mo.front.common.partial_infer.utils import int64_array
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class SparseReshapeMiddleReplacerTests(unittest.TestCase):
diff --git a/model-optimizer/extensions/middle/weights_permute_normalizer_test.py b/model-optimizer/extensions/middle/weights_permute_normalizer_test.py

index 4609349..bfd753a 100644 (file)
--- a/model-optimizer/extensions/middle/weights_permute_normalizer_test.py
+++ b/model-optimizer/extensions/middle/weights_permute_normalizer_test.py
@@ -15,13 +15,9 @@
  """
  import unittest
  
-import numpy as np
-
-from extensions.middle.UselessStridedSlice import UselessStridedSliceEraser
  from extensions.middle.wights_permute_normalizer import WeightsPermuteNormalizer
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
-from mo.utils.ir_engine.compare_graphs import compare_graphs
  
  nodes_attributes = {
      'placeholder': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
diff --git a/model-optimizer/extensions/ops/BlockLSTM.py b/model-optimizer/extensions/ops/BlockLSTM.py

index eaa928a..4256ad5 100644 (file)
--- a/model-optimizer/extensions/ops/BlockLSTM.py
+++ b/model-optimizer/extensions/ops/BlockLSTM.py
@@ -14,12 +14,9 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.front.common.partial_infer.utils import mark_input_bins
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
-import numpy as np
  
  
  class BlockLSTM(Op):
diff --git a/model-optimizer/extensions/ops/Enter.py b/model-optimizer/extensions/ops/Enter.py

index edef1bc..167c6b2 100644 (file)
--- a/model-optimizer/extensions/ops/Enter.py
+++ b/model-optimizer/extensions/ops/Enter.py
@@ -14,12 +14,10 @@
   limitations under the License.
  """
  
-
-import networkx as nx
  import numpy as np
+
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
-from mo.utils.error import Error
  
  
  class Enter(Op):
diff --git a/model-optimizer/extensions/ops/Exit.py b/model-optimizer/extensions/ops/Exit.py

index 0d5798d..cd2a48f 100644 (file)
--- a/model-optimizer/extensions/ops/Exit.py
+++ b/model-optimizer/extensions/ops/Exit.py
@@ -15,9 +15,9 @@
  """
  
  import numpy as np
+
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
-from mo.utils.error import Error
  
  
  class Exit(Op):
diff --git a/model-optimizer/extensions/ops/GRU.py b/model-optimizer/extensions/ops/GRU.py

index 47ac286..e943e7d 100644 (file)
--- a/model-optimizer/extensions/ops/GRU.py
+++ b/model-optimizer/extensions/ops/GRU.py
@@ -13,10 +13,11 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
+import numpy as np
+
  from extensions.ops.RNN import rnn_infer
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
-import numpy as np
  
  
  class GRU(Op):
diff --git a/model-optimizer/extensions/ops/MatMul_value_propagation_test.py b/model-optimizer/extensions/ops/MatMul_value_propagation_test.py

index 1f5c47d..e0a98b5 100644 (file)
--- a/model-optimizer/extensions/ops/MatMul_value_propagation_test.py
+++ b/model-optimizer/extensions/ops/MatMul_value_propagation_test.py
@@ -24,7 +24,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  graph_nodes_attrs = {
      'A': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'shape': None, 'value': None},
      'A_data': {'kind': 'data', 'shape': None, 'value': None},
diff --git a/model-optimizer/extensions/ops/TensorArrayGather.py b/model-optimizer/extensions/ops/TensorArrayGather.py

index 9c048fb..eddc0a8 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayGather.py
+++ b/model-optimizer/extensions/ops/TensorArrayGather.py
@@ -14,7 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/TensorArrayRead.py b/model-optimizer/extensions/ops/TensorArrayRead.py

index 3071d2f..d16b2f4 100644 (file)
--- a/model-optimizer/extensions/ops/TensorArrayRead.py
+++ b/model-optimizer/extensions/ops/TensorArrayRead.py
@@ -14,7 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/TensorIterator_ops.py b/model-optimizer/extensions/ops/TensorIterator_ops.py

index ef752ca..fc2564b 100644 (file)
--- a/model-optimizer/extensions/ops/TensorIterator_ops.py
+++ b/model-optimizer/extensions/ops/TensorIterator_ops.py
@@ -14,9 +14,6 @@
   limitations under the License.
  """
  
-
-import networkx as nx
-
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
  
diff --git a/model-optimizer/extensions/ops/adaptive_avg_pooling.py b/model-optimizer/extensions/ops/adaptive_avg_pooling.py

index 73cec11..1d22a6f 100644 (file)
--- a/model-optimizer/extensions/ops/adaptive_avg_pooling.py
+++ b/model-optimizer/extensions/ops/adaptive_avg_pooling.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
+from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
  from mo.ops.op import Op
  from mo.ops.pooling import Pooling
-from mo.front.common.partial_infer.utils import int64_array
  
  
  class AdaptiveAvgPooling(Op):
diff --git a/model-optimizer/extensions/ops/argmax.py b/model-optimizer/extensions/ops/argmax.py

index 030fd9f..52c1a93 100644 (file)
--- a/model-optimizer/extensions/ops/argmax.py
+++ b/model-optimizer/extensions/ops/argmax.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  import numpy as np
  
  from mo.front.caffe.extractors.utils import get_canonical_axis_index
diff --git a/model-optimizer/extensions/ops/assert_op.py b/model-optimizer/extensions/ops/assert_op.py

index d1d3916..3f626cf 100644 (file)
--- a/model-optimizer/extensions/ops/assert_op.py
+++ b/model-optimizer/extensions/ops/assert_op.py
@@ -16,7 +16,6 @@
  
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
-from mo.utils.error import Error
  
  
  class Assert(Op):
diff --git a/model-optimizer/extensions/ops/aten.py b/model-optimizer/extensions/ops/aten.py

index 45b8d45..f1c68f9 100644 (file)
--- a/model-optimizer/extensions/ops/aten.py
+++ b/model-optimizer/extensions/ops/aten.py
@@ -14,8 +14,7 @@
   limitations under the License.
  """
  
-from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.ops.op import Op
  
  
diff --git a/model-optimizer/extensions/ops/bucketize.py b/model-optimizer/extensions/ops/bucketize.py

index 4773383..9255a73 100644 (file)
--- a/model-optimizer/extensions/ops/bucketize.py
+++ b/model-optimizer/extensions/ops/bucketize.py
@@ -19,6 +19,7 @@ import numpy as np
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
  
+
  class Bucketize(Op):
      op = 'Bucketize'
  
diff --git a/model-optimizer/extensions/ops/bucketize_test.py b/model-optimizer/extensions/ops/bucketize_test.py

index d165e33..fbc141a 100644 (file)
--- a/model-optimizer/extensions/ops/bucketize_test.py
+++ b/model-optimizer/extensions/ops/bucketize_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  nodes_attributes = {'input_tensor': {'shape': None, 'value': None, 'kind': 'data'},
                      'input_buckets': {'shape': None, 'value': None, 'kind': 'data'},
                      'bucketize_node': {'op': 'Bucketize', 'kind': 'op', 'with_right_bound': False},
diff --git a/model-optimizer/extensions/ops/data_augmentation.py b/model-optimizer/extensions/ops/data_augmentation.py

index b5dc642..8676c1d 100644 (file)
--- a/model-optimizer/extensions/ops/data_augmentation.py
+++ b/model-optimizer/extensions/ops/data_augmentation.py
@@ -18,8 +18,6 @@
  #                axis - dimension number for tensors concatenation
  import copy
  
-import networkx as nx
-
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
  
diff --git a/model-optimizer/extensions/ops/depth_to_space.py b/model-optimizer/extensions/ops/depth_to_space.py

index 3f91e95..a559f08 100644 (file)
--- a/model-optimizer/extensions/ops/depth_to_space.py
+++ b/model-optimizer/extensions/ops/depth_to_space.py
@@ -16,6 +16,7 @@
  
  import numpy as np
  
+from mo.front.common.layout import shape_for_layout, get_height_dim, get_batch_dim, get_features_dim, get_width_dim
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
@@ -51,13 +52,25 @@ class DepthToSpaceOp(Op):
          if in_shape.size != 4:
              raise Error('TensorFlow DepthToSpace operation is supported for 4D \'NHWC\' input layout only. '
                          'Current input shape is \'{}\''.format(in_shape))
-        N, H, W, C = in_shape
+
+        layout = node.graph.graph['layout']
+
+        N = in_shape[get_batch_dim(layout, 4)]
+        H = in_shape[get_height_dim(layout, 4)]
+        W = in_shape[get_width_dim(layout, 4)]
+        C = in_shape[get_features_dim(layout, 4)]
+
          block_size = node['block_size']
          if C % (block_size ** 2):
              raise Error('Feature dimensions of input tensor of DepthToSpace operation have to be divisible by square '
                          'of DepthToSpace \'block_size\' parameter. Input tensor shape = {}. Feature dimension = {}. '
                          'block_size = {}'.format(in_shape, C, block_size))
-        out_shape = [N, int(H * block_size), int(W * block_size), int(C / (block_size ** 2))]
-        if np.prod(in_shape) != np.prod(out_shape):
-            return
+
+        out_shape = shape_for_layout(layout,
+                                     batch=N,
+                                     features=int(C / (block_size ** 2)),
+                                     height=int(H * block_size),
+                                     width=int(W * block_size))
+
+        assert np.prod(in_shape) == np.prod(out_shape)
          node.out_node().shape = int64_array(out_shape)
diff --git a/model-optimizer/extensions/ops/depth_to_space_test.py b/model-optimizer/extensions/ops/depth_to_space_test.py

index 2d995c2..e8e558c 100644 (file)
--- a/model-optimizer/extensions/ops/depth_to_space_test.py
+++ b/model-optimizer/extensions/ops/depth_to_space_test.py
@@ -15,9 +15,7 @@
  """
  
  import unittest
-
  import numpy as np
-
  from extensions.ops.depth_to_space import DepthToSpaceOp
  from mo.graph.graph import Node
  from mo.utils.error import Error
@@ -36,22 +34,43 @@ edges = [
  
  
  class TestDepthToSpacePartialInfer(unittest.TestCase):
-    def test_tf_depth_to_space_infer(self):
+    def test_tf_depth_to_space_infer_nhwc(self):
          graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
          dts_node = Node(graph, 'DtS')
          DepthToSpaceOp.infer(dts_node)
          exp_shape = np.array([1, 2048, 1152, 64])
          res_shape = graph.node['out_data_node']['shape']
          self.assertTrue(np.array_equal(exp_shape, res_shape))
  
+    def test_tf_depth_to_space_infer_nchw(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NCHW'
+        graph.node['in_data_node']['shape'] = np.array([1, 256, 1024, 576])
+        dts_node = Node(graph, 'DtS')
+        DepthToSpaceOp.infer(dts_node)
+        exp_shape = np.array([1, 64, 2048, 1152])
+        res_shape = graph.node['out_data_node']['shape']
+        self.assertTrue(np.array_equal(exp_shape, res_shape))
+
      def test_tf_depth_to_space_infer_error(self):
          graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
          graph.node['in_data_node']['shape'] = np.array([1024, 576, 256])
          dts_node = Node(graph, 'DtS')
          self.assertRaises(Error, DepthToSpaceOp.infer, dts_node)
  
-    def test_tf_depth_to_space_infer_error_1(self):
+    def test_tf_depth_to_space_infer_divisibility_error_1(self):
          graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
          graph.node['in_data_node']['shape'] = np.array([1, 1024, 576, 255])
          dts_node = Node(graph, 'DtS')
          self.assertRaises(Error, DepthToSpaceOp.infer, dts_node)
+
+    def test_tf_depth_to_space_infer_divisibility_error_2(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NCHW'
+        graph.node['in_data_node']['shape'] = np.array([1, 255, 1024, 576])
+        dts_node = Node(graph, 'DtS')
+        self.assertRaises(Error, DepthToSpaceOp.infer, dts_node)
+
diff --git a/model-optimizer/extensions/ops/div_value_propagation_test.py b/model-optimizer/extensions/ops/div_value_propagation_test.py

index b72bedc..31b53d0 100644 (file)
--- a/model-optimizer/extensions/ops/div_value_propagation_test.py
+++ b/model-optimizer/extensions/ops/div_value_propagation_test.py
@@ -19,12 +19,11 @@ import unittest
  import numpy as np
  from generator import generator, generate
  
-from extensions.ops.elementwise import Div, Elementwise
+from extensions.ops.elementwise import Div
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  graph_nodes_attrs = {
      'A': {'type': 'Const', 'op': 'Const', 'kind': 'op', 'shape': None, 'value': None},
      'A_data': {'kind': 'data', 'shape': None, 'value': None},
diff --git a/model-optimizer/extensions/ops/exp.py b/model-optimizer/extensions/ops/exp.py

index 8cf32a7..c04beb9 100644 (file)
--- a/model-optimizer/extensions/ops/exp.py
+++ b/model-optimizer/extensions/ops/exp.py
@@ -14,13 +14,10 @@
   limitations under the License.
  """
  
-import logging as log
-import networkx as nx
  import numpy as np
  
-from mo.front.caffe.extractors.utils import get_canonical_axis_index
  from mo.graph.graph import Node, Graph
-from mo.ops.op import Op, PermuteAttrs
+from mo.ops.op import Op
  
  
  class ExpOp(Op):
diff --git a/model-optimizer/extensions/ops/grn.py b/model-optimizer/extensions/ops/grn.py

index 8c73485..8f20683 100644 (file)
--- a/model-optimizer/extensions/ops/grn.py
+++ b/model-optimizer/extensions/ops/grn.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.front.common.partial_infer.elemental import copy_shape_infer
  from mo.graph.graph import Graph
  from mo.ops.op import Op
diff --git a/model-optimizer/extensions/ops/instance_normalization.py b/model-optimizer/extensions/ops/instance_normalization.py

index 6f6d315..03b2ae8 100644 (file)
--- a/model-optimizer/extensions/ops/instance_normalization.py
+++ b/model-optimizer/extensions/ops/instance_normalization.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.graph.graph import Graph
  from mo.ops.op import Op
  
diff --git a/model-optimizer/extensions/ops/instance_normalization_test.py b/model-optimizer/extensions/ops/instance_normalization_test.py

index 3fc19cc..233d564 100644 (file)
--- a/model-optimizer/extensions/ops/instance_normalization_test.py
+++ b/model-optimizer/extensions/ops/instance_normalization_test.py
@@ -16,9 +16,8 @@
  
  import unittest
  
-import networkx as nx
-from mo.graph.graph import Graph
  from extensions.ops.instance_normalization import InstanceNormalization
+from mo.graph.graph import Graph
  
  
  class InstanceNormalizationOp(unittest.TestCase):
diff --git a/model-optimizer/extensions/ops/lstm_cell.py b/model-optimizer/extensions/ops/lstm_cell.py

index d1b6d18..aaae4c2 100644 (file)
--- a/model-optimizer/extensions/ops/lstm_cell.py
+++ b/model-optimizer/extensions/ops/lstm_cell.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.front.common.partial_infer.utils import mark_input_bins
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
diff --git a/model-optimizer/extensions/ops/merge_test.py b/model-optimizer/extensions/ops/merge_test.py

index df52d25..9def594 100644 (file)
--- a/model-optimizer/extensions/ops/merge_test.py
+++ b/model-optimizer/extensions/ops/merge_test.py
@@ -15,12 +15,13 @@
  """
  
  import unittest
+
  import numpy as np
  
  from extensions.ops.merge import Merge
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class TestMerge(unittest.TestCase):
diff --git a/model-optimizer/extensions/ops/mxslice.py b/model-optimizer/extensions/ops/mxslice.py

index c149819..9310c32 100644 (file)
--- a/model-optimizer/extensions/ops/mxslice.py
+++ b/model-optimizer/extensions/ops/mxslice.py
@@ -14,10 +14,7 @@
   limitations under the License.
  """
  
-import numpy as np
-
-from mo.front.caffe.extractors.utils import get_canonical_axis_index
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.ops.op import Op
  
  
diff --git a/model-optimizer/extensions/ops/non_max_suppression.py b/model-optimizer/extensions/ops/non_max_suppression.py

index 760da10..82b512d 100644 (file)
--- a/model-optimizer/extensions/ops/non_max_suppression.py
+++ b/model-optimizer/extensions/ops/non_max_suppression.py
@@ -13,9 +13,10 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import numpy as np
  import logging as log
  
+import numpy as np
+
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
diff --git a/model-optimizer/extensions/ops/non_zero.py b/model-optimizer/extensions/ops/non_zero.py

index 59f366d..151e896 100644 (file)
--- a/model-optimizer/extensions/ops/non_zero.py
+++ b/model-optimizer/extensions/ops/non_zero.py
@@ -1,5 +1,5 @@
  """
- Copyright (C) 2017-2020 Intel Corporation
+ Copyright (C) 2018-2020 Intel Corporation
  
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -18,30 +18,54 @@ import numpy as np
  
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
+from mo.middle.passes.convert_data_type import np_data_type_to_destination_type
  from mo.ops.op import Op
  
  
  class NonZero(Op):
      op = 'NonZero'
+    enabled = False
  
      def __init__(self, graph: Graph, attrs: dict):
+        assert 'output_type' in attrs, 'NonZero has mandatory `output_type` attribute'
          mandatory_props = {
-            'type': None,
-            'op': __class__.op,
-            'infer': NonZero.infer,
+            'op': self.op,
+            'type': self.op,
+            'version': 'opset3',
+
+            'infer': self.infer,
+            'type_infer': self.type_infer,
+
              'in_ports_count': 1,
              'out_ports_count': 1,
          }
          super().__init__(graph, mandatory_props, attrs)
  
+    def backend_attrs(self):
+        return [
+            ('output_type', lambda node: np_data_type_to_destination_type(node.output_type)),
+        ]
+
      @staticmethod
      def infer(node: Node):
-        input_shape = node.in_node(0).shape
-        if input_shape is None:
-            return
-        input_value = node.in_node(0).value
+        node_name = node.soft_get('name', node.id)
+        input_shape = node.in_port(0).data.get_shape()
+        assert input_shape is not None, 'The input shape for node "{}" is None'.format(node_name)
+        assert node.has_valid('output_type'), \
+            '`output_type` attribute is not set for NonZero node `{}`'.format(node_name)
+        assert node.output_type in [np.int64, np.int32], \
+            'NonZero `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+
+        input_value = node.in_port(0).data.get_value()
          if input_value is not None:
-            node.out_port(0).data.set_value(np.array(np.nonzero(input_value)))
+            node.out_port(0).data.set_value(np.array(np.nonzero(input_value), dtype=node.output_type))
          else:
-            node.out_port(0).data.set_shape(int64_array([len(input_shape), *input_shape]))
+            # output shape of NonZero should be [input_rank, dynamic]
+            # having restriction to save IR with static shape only we count upper-bound shape value here
+            node.out_port(0).data.set_shape(int64_array([len(input_shape), np.prod(input_shape)]))
  
+    @staticmethod
+    def type_infer(node):
+        assert node.output_type in [np.int64, np.int32], \
+            'NonZero `output_type` attribute must be int32 or int64, `{}` found'.format(np.dtype(node.output_type).name)
+        node.out_port(0).set_data_type(node.output_type)
diff --git a/model-optimizer/extensions/ops/normalize.py b/model-optimizer/extensions/ops/normalize.py

index 4d9c075..429a54c 100644 (file)
--- a/model-optimizer/extensions/ops/normalize.py
+++ b/model-optimizer/extensions/ops/normalize.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.common.partial_infer.utils import mark_input_bins
  from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.front.common.partial_infer.utils import mark_input_bins
  from mo.graph.graph import Graph, Node
  from mo.ops.op import Op
  from mo.utils.utils import convert_param_type
diff --git a/model-optimizer/extensions/ops/pack.py b/model-optimizer/extensions/ops/pack.py

index d030894..9bad21e 100644 (file)
--- a/model-optimizer/extensions/ops/pack.py
+++ b/model-optimizer/extensions/ops/pack.py
@@ -14,10 +14,7 @@
   limitations under the License.
  """
  
-import numpy as np
-import networkx as nx
-
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Graph
  from mo.ops.op import Op
  
  
diff --git a/model-optimizer/extensions/ops/power_file.py b/model-optimizer/extensions/ops/power_file.py

index bbf4831..5a8e46b 100644 (file)
--- a/model-optimizer/extensions/ops/power_file.py
+++ b/model-optimizer/extensions/ops/power_file.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.front.common.partial_infer.elemental import copy_shape_infer
  from mo.graph.graph import Graph
  from mo.ops.op import Op
diff --git a/model-optimizer/extensions/ops/prediction_heatmap.py b/model-optimizer/extensions/ops/prediction_heatmap.py

index 80331ad..80b8968 100644 (file)
--- a/model-optimizer/extensions/ops/prediction_heatmap.py
+++ b/model-optimizer/extensions/ops/prediction_heatmap.py
@@ -14,7 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/prelu.py b/model-optimizer/extensions/ops/prelu.py

index a151f67..4ebe599 100644 (file)
--- a/model-optimizer/extensions/ops/prelu.py
+++ b/model-optimizer/extensions/ops/prelu.py
@@ -17,9 +17,9 @@
  import numpy as np
  
  from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.front.common.partial_infer.utils import mark_input_bins
  from mo.graph.graph import Graph
  from mo.ops.op import Op
-from mo.front.common.partial_infer.utils import mark_input_bins
  
  
  class PreluOp(Op):
diff --git a/model-optimizer/extensions/ops/proposal_python_example.py b/model-optimizer/extensions/ops/proposal_python_example.py

index fecf1cb..6d1ee64 100644 (file)
--- a/model-optimizer/extensions/ops/proposal_python_example.py
+++ b/model-optimizer/extensions/ops/proposal_python_example.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from extensions.ops.proposal import ProposalOp
  from mo.front.caffe.extractor import register_caffe_python_extractor
  from mo.graph.graph import Graph
diff --git a/model-optimizer/extensions/ops/reorgyolo.py b/model-optimizer/extensions/ops/reorgyolo.py

index 6c6fb13..58f2635 100644 (file)
--- a/model-optimizer/extensions/ops/reorgyolo.py
+++ b/model-optimizer/extensions/ops/reorgyolo.py
@@ -14,7 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/roialign.py b/model-optimizer/extensions/ops/roialign.py

new file mode 100644 (file)

index 0000000..4d9ea71
--- /dev/null
+++ b/model-optimizer/extensions/ops/roialign.py
@@ -0,0 +1,82 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.layout import get_features_dim, shape_for_layout
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class ROIAlign(Op):
+    op = 'ROIAlign'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        assert 'mode' in attrs, '`mode` attribute is not set for ROIAlign during creation'
+        assert 'pooled_h' in attrs, '`pooled_h` attribute is not set for ROIAlign during creation'
+        assert 'pooled_w' in attrs, '`pooled_w` attribute is not set for ROIAlign during creation'
+        assert 'sampling_ratio' in attrs, '`sampling_ratio` attribute is not set for ROIAlign during creation'
+        assert 'spatial_scale' in attrs, '`spatial_scale` attribute is not set for ROIAlign during creation'
+
+        super().__init__(graph, {
+            'op': self.op,
+            'type': self.op,
+            'version': 'opset3',
+
+            'infer': self.infer,
+
+            'in_ports_count': 3,
+            'out_ports_count': 1,
+        }, attrs)
+
+    def backend_attrs(self):
+        return [
+            ('mode', lambda node: str(node.mode)),
+            ('pooled_h', lambda node: str(int(node.pooled_h))),
+            ('pooled_w', lambda node: str(int(node.pooled_w))),
+            ('sampling_ratio', lambda node: str(int(node.sampling_ratio))),
+            ('spatial_scale', lambda node: str(float(node.spatial_scale))),
+        ]
+
+    @staticmethod
+    def infer(node):
+        layout = node.graph.graph['layout']
+        node_name = node.soft_get('name', node.id)
+
+        assert len([port for port in node.in_ports().values() if not port.disconnected()]) == 3, \
+            'The node "{}" must 3 inputs'.format(node_name)
+
+        assert node.has_valid('pooled_w'), '"pooled_w" attribute is not set for node "{}"'.format(node_name)
+        assert node.has_valid('pooled_h'), '"pooled_h" attribute is not set for node "{}"'.format(node_name)
+        assert node.has_valid('mode'), '"mode" attribute is not set for node "{}"'.format(node_name)
+        assert node.mode in ['avg', 'max'], \
+            '"mode" attribute range of values is ["avg", "max"], got {} for node "{}"'.format(node.mode, node_name)
+
+        input_shape = node.in_port(0).data.get_shape()
+        rois_shape = node.in_port(1).data.get_shape()
+        indices_shape = node.in_port(2).data.get_shape()
+        assert input_shape is not None and rois_shape is not None and indices_shape is not None, \
+            'The node "{}" input shape is None'.format(node_name)
+        assert rois_shape[0] == indices_shape[0], 'The number of batch indices does not correspond to number of ROIs ' \
+                                                  'for node "{}"'.format(node_name)
+        assert rois_shape[1] == 4, 'The size of ROI element must be 4 for node "{}"'.format(node_name)
+        assert len(input_shape) == 4, 'The rank of port 0 input tensor of node "{}" must be 4.'.format(node_name)
+
+        node.out_port(0).data.set_shape(
+            shape_for_layout(layout,
+                             batch=rois_shape[0],
+                             features=input_shape[get_features_dim(layout, 4)],
+                             height=node.pooled_h,
+                             width=node.pooled_w)
+        )
diff --git a/model-optimizer/extensions/ops/scatter.py b/model-optimizer/extensions/ops/scatter.py

new file mode 100644 (file)

index 0000000..a800ac9
--- /dev/null
+++ b/model-optimizer/extensions/ops/scatter.py
@@ -0,0 +1,136 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class Scatter(Op):
+    enabled = False
+
+    op = op_type = None
+    version = None
+
+    def __init__(self, graph: Graph, attrs: dict):
+        assert self.op is not None and self.op_type is not None and self.version is not None, \
+            'Please use specialized Scatter operation class, Scatter is base class'
+
+        mandatory_props = {
+            'op': self.op,
+            'type': self.op_type,
+            'version': self.version,
+
+            'is_scatter': True,  # is used for gathering all types of scatters in common transformations
+            'infer': self.infer,
+
+            'in_ports_count': 4,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node: Node):
+        node_name = node.soft_get('name', node.id)
+
+        input_shape = node.in_port(0).data.get_shape()
+        indices_shape = node.in_port(1).data.get_shape()
+        updates_shape = node.in_port(2).data.get_shape()
+        assert input_shape is not None and updates_shape is not None and indices_shape is not None, \
+            'The node "{}" input shape is None'.format(node_name)
+
+        node.out_port(0).data.set_shape(input_shape)
+
+
+class ScatterElementsAdd(Scatter):
+    op = 'ScatterElementsAdd'
+    op_type = None
+    version = None
+
+
+class ScatterElementsDiv(Scatter):
+    op = 'ScatterElementsDiv'
+    op_type = None
+    version = None
+
+
+class ScatterElementsMax(Scatter):
+    op = 'ScatterElementsMax'
+    op_type = None
+    version = None
+
+
+class ScatterElementsMin(Scatter):
+    op = 'ScatterElementsMin'
+    op_type = None
+    version = None
+
+
+class ScatterElementsMul(Scatter):
+    op = 'ScatterElementsMul'
+    op_type = None
+    version = 'opset3'
+
+
+class ScatterElementsSub(Scatter):
+    op = 'ScatterElementsSub'
+    op_type = None
+    version = None
+
+
+class ScatterElementsUpdate(Scatter):
+    op = op_type = 'ScatterElementsUpdate'
+    version = 'opset3'
+
+
+class ScatterAdd(Scatter):
+    op = 'ScatterAdd'
+    op_type = None
+    version = None
+
+
+class ScatterDiv(Scatter):
+    op = 'ScatterDiv'
+    op_type = None
+    version = None
+
+
+class ScatterMax(Scatter):
+    op = 'ScatterMax'
+    op_type = None
+    version = None
+
+
+class ScatterMin(Scatter):
+    op = 'ScatterMin'
+    op_type = None
+    version = None
+
+
+class ScatterMul(Scatter):
+    op = 'ScatterMul'
+    op_type = None
+    version = None
+
+
+class ScatterSub(Scatter):
+    op = 'ScatterSub'
+    op_type = None
+    version = None
+
+
+class ScatterUpdate(Scatter):
+    op = op_type = 'ScatterUpdate'
+    version = 'opset3'
diff --git a/model-optimizer/extensions/ops/select_test.py b/model-optimizer/extensions/ops/select_test.py

index 212d979..6d8d075 100644 (file)
--- a/model-optimizer/extensions/ops/select_test.py
+++ b/model-optimizer/extensions/ops/select_test.py
@@ -15,12 +15,13 @@
  """
  
  import unittest
+
  import numpy as np
  
  from extensions.ops.select import Select
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_attrs
  
  
  class TestSelect(unittest.TestCase):
diff --git a/model-optimizer/extensions/ops/simplernms.py b/model-optimizer/extensions/ops/simplernms.py

index 6af7c75..af7490a 100644 (file)
--- a/model-optimizer/extensions/ops/simplernms.py
+++ b/model-optimizer/extensions/ops/simplernms.py
@@ -16,7 +16,6 @@
  
  import logging as log
  
-import networkx as nx
  import numpy as np
  
  from mo.front.extractor import attr_getter
diff --git a/model-optimizer/extensions/ops/space_to_depth.py b/model-optimizer/extensions/ops/space_to_depth.py

index a2296ee..f91d365 100644 (file)
--- a/model-optimizer/extensions/ops/space_to_depth.py
+++ b/model-optimizer/extensions/ops/space_to_depth.py
@@ -14,13 +14,13 @@
   limitations under the License.
  """
  
-import logging as log
-
  import numpy as np
  
+from mo.front.common.layout import shape_for_layout, get_height_dim, get_batch_dim, get_features_dim, get_width_dim
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
+from mo.utils.error import Error
  
  
  class SpaceToDepth(Op):
@@ -50,16 +50,26 @@ class SpaceToDepth(Op):
      def infer(node: Node):
          in_shape = node.in_node().shape
          if in_shape.size != 4:
-            log.error('TensorFlow SpaceToDepth operation is supported for 4D \'NHWC\' input layout only. '
-                      'Current input shape is \'{}\''.format(in_shape))
-            return
-        N, H, W, C = in_shape
+            raise Error('TensorFlow SpaceToDepth operation is supported for 4D \'NHWC\' input layout only. '
+                        'Current input shape is \'{}\''.format(in_shape))
+
+        layout = node.graph.graph['layout']
+        N = in_shape[get_batch_dim(layout, 4)]
+        H = in_shape[get_height_dim(layout, 4)]
+        W = in_shape[get_width_dim(layout, 4)]
+        C = in_shape[get_features_dim(layout, 4)]
+
          block_size = node['block_size']
          if H % block_size or W % block_size:
-            log.error('Spatial dimensions of input tensor of SpaceToDepth operation have to be divisible by '
-                      'SpaceToDepth \'block_size\' parameter. Input tensor shape = {}. Spatial dimensions = {},{}. '
-                      'block_size = {}'.format(in_shape, H, W, block_size))
-            return
-        out_shape = [N, int(H / block_size), int(W / block_size), int(C * (block_size ** 2))]
+            raise Error('Spatial dimensions of input tensor of SpaceToDepth operation have to be divisible by '
+                        'SpaceToDepth \'block_size\' parameter. Input tensor shape = {}. Spatial dimensions = {},{}. '
+                        'block_size = {}'.format(in_shape, H, W, block_size))
+
+        out_shape = shape_for_layout(layout,
+                                     batch=N,
+                                     features=int(C * (block_size ** 2)),
+                                     height=int(H / block_size),
+                                     width=int(W / block_size))
+
          assert np.prod(in_shape) == np.prod(out_shape)
          node.out_node().shape = int64_array(out_shape)
diff --git a/model-optimizer/extensions/ops/space_to_depth_test.py b/model-optimizer/extensions/ops/space_to_depth_test.py

new file mode 100644 (file)

index 0000000..61f3a21
--- /dev/null
+++ b/model-optimizer/extensions/ops/space_to_depth_test.py
@@ -0,0 +1,74 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+import numpy as np
+from extensions.ops.space_to_depth import SpaceToDepth
+from mo.graph.graph import Node
+from mo.utils.error import Error
+from mo.utils.unittest.graph import build_graph
+
+nodes = {
+    'in_data_node': {'value': None, 'kind': 'data', 'shape': np.array([1, 2048, 1152, 64])},
+    'StD': {'op': 'SpaceToDepth', 'kind': 'op', 'block_size': 2},
+    'out_data_node': {'value': None, 'kind': 'data', 'shape': None}
+}
+
+edges = [
+    ('in_data_node', 'StD'),
+    ('StD', 'out_data_node')
+]
+
+class TestSpaceToDepthPartialInfer(unittest.TestCase):
+    def test_tf_space_to_depth_infer_nhwc(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
+        std_node = Node(graph, 'StD')
+        SpaceToDepth.infer(std_node)
+        exp_shape = np.array([1, 1024, 576, 256])
+        res_shape = graph.node['out_data_node']['shape']
+        self.assertTrue(np.array_equal(exp_shape, res_shape))
+
+    def test_tf_space_to_depth_infer_nchw(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NCHW'
+        graph.node['in_data_node']['shape'] = np.array([1, 64, 2048, 1152])
+        std_node = Node(graph, 'StD')
+        SpaceToDepth.infer(std_node)
+        exp_shape = np.array([1, 256, 1024, 576])
+        res_shape = graph.node['out_data_node']['shape']
+        self.assertTrue(np.array_equal(exp_shape, res_shape))
+
+    def test_tf_space_to_depth_infer_shape_error(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
+        graph.node['in_data_node']['shape'] = np.array([1024, 576, 256])
+        std_node = Node(graph, 'StD')
+        self.assertRaises(Error, SpaceToDepth.infer, std_node)
+
+    def test_tf_space_to_depth_infer_divisibility_error_1(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NHWC'
+        graph.node['in_data_node']['shape'] = np.array([1, 1024, 577, 256])
+        std_node = Node(graph, 'StD')
+        self.assertRaises(Error, SpaceToDepth.infer, std_node)
+
+    def test_tf_space_to_depth_infer_divisibility_error_2(self):
+        graph = build_graph(nodes, edges)
+        graph.graph['layout'] = 'NCHW'
+        graph.node['in_data_node']['shape'] = np.array([1, 256, 1024, 577])
+        std_node = Node(graph, 'StD')
+        self.assertRaises(Error, SpaceToDepth.infer, std_node)
+\ No newline at end of file
diff --git a/model-optimizer/extensions/ops/sparse_fill_empty_rows.py b/model-optimizer/extensions/ops/sparse_fill_empty_rows.py

index 5e3737c..4f76e5e 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_fill_empty_rows.py
+++ b/model-optimizer/extensions/ops/sparse_fill_empty_rows.py
@@ -16,7 +16,6 @@
  
  import logging as log
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py b/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py

index 7ae05fd..39b7996 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py
+++ b/model-optimizer/extensions/ops/sparse_fill_empty_rows_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  nodes_attributes = {'input_indices': {'shape': None, 'value': None, 'kind': 'data'},
                      'input_values': {'shape': None, 'value': None, 'kind': 'data'},
                      'dense_shape': {'shape': None, 'value': None, 'kind': 'data'},
diff --git a/model-optimizer/extensions/ops/sparse_reshape_test.py b/model-optimizer/extensions/ops/sparse_reshape_test.py

index 9a48e61..f2595bd 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_reshape_test.py
+++ b/model-optimizer/extensions/ops/sparse_reshape_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  nodes_attributes = {'input_indices': {'shape': None, 'value': None, 'kind': 'data'},
                      'input_shape': {'shape': None, 'value': None, 'kind': 'data'},
                      'new_shape': {'shape': None, 'value': None, 'kind': 'data'},
diff --git a/model-optimizer/extensions/ops/sparse_segment_mean.py b/model-optimizer/extensions/ops/sparse_segment_mean.py

index 363100b..8eecc7f 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_mean.py
+++ b/model-optimizer/extensions/ops/sparse_segment_mean.py
@@ -14,9 +14,6 @@
   limitations under the License.
  """
  
-import logging as log
-
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/sparse_segment_sqrtn.py b/model-optimizer/extensions/ops/sparse_segment_sqrtn.py

index 18f335c..84473c9 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_sqrtn.py
+++ b/model-optimizer/extensions/ops/sparse_segment_sqrtn.py
@@ -14,9 +14,6 @@
   limitations under the License.
  """
  
-import logging as log
-
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/sparse_segment_sum.py b/model-optimizer/extensions/ops/sparse_segment_sum.py

index 1075acf..47989ab 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_segment_sum.py
+++ b/model-optimizer/extensions/ops/sparse_segment_sum.py
@@ -14,9 +14,6 @@
   limitations under the License.
  """
  
-import logging as log
-
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/sparse_to_dense_test.py b/model-optimizer/extensions/ops/sparse_to_dense_test.py

index 6c09db5..f58ed09 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_to_dense_test.py
+++ b/model-optimizer/extensions/ops/sparse_to_dense_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  # graph 1
  nodes_attributes = {
      'input_indices': {'kind': 'op', 'op': 'Parameter', 'shape': int64_array([5, 2])},
diff --git a/model-optimizer/extensions/ops/sparse_weighted_sum.py b/model-optimizer/extensions/ops/sparse_weighted_sum.py

index e702496..ad7b275 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_weighted_sum.py
+++ b/model-optimizer/extensions/ops/sparse_weighted_sum.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import numpy as np
-
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node, Graph
  from mo.ops.op import Op
diff --git a/model-optimizer/extensions/ops/sparse_weighted_sum_test.py b/model-optimizer/extensions/ops/sparse_weighted_sum_test.py

index 3490d21..0f1b400 100644 (file)
--- a/model-optimizer/extensions/ops/sparse_weighted_sum_test.py
+++ b/model-optimizer/extensions/ops/sparse_weighted_sum_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  nodes_attributes = {'input_indices': {'shape': None, 'value': None, 'kind': 'data'},
                      'input_values': {'shape': None, 'value': None, 'kind': 'data'},
                      'input_dense_shape': {'shape': None, 'value': None, 'kind': 'data'},
diff --git a/model-optimizer/extensions/ops/split.py b/model-optimizer/extensions/ops/split.py

index a865dad..23935ed 100644 (file)
--- a/model-optimizer/extensions/ops/split.py
+++ b/model-optimizer/extensions/ops/split.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  import numpy as np
  
  from mo.front.common.partial_infer.utils import int64_array
diff --git a/model-optimizer/extensions/ops/split_test.py b/model-optimizer/extensions/ops/split_test.py

index ed8e574..02ffc9e 100644 (file)
--- a/model-optimizer/extensions/ops/split_test.py
+++ b/model-optimizer/extensions/ops/split_test.py
@@ -18,11 +18,11 @@ import unittest
  
  import numpy as np
  
+from extensions.ops.split import AttributedSplit, AttributedVariadicSplit
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
-from extensions.ops.split import AttributedSplit, AttributedVariadicSplit
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class TestSplitOp(unittest.TestCase):
diff --git a/model-optimizer/extensions/ops/stop_gradient.py b/model-optimizer/extensions/ops/stop_gradient.py

index f8be09c..45df1af 100644 (file)
--- a/model-optimizer/extensions/ops/stop_gradient.py
+++ b/model-optimizer/extensions/ops/stop_gradient.py
@@ -17,7 +17,6 @@
  from mo.front.common.partial_infer.elemental import copy_shape_infer
  from mo.graph.graph import Graph
  from mo.ops.op import Op
-from mo.front.common.partial_infer.utils import mark_input_bins
  
  
  class StopGradientOp(Op):
diff --git a/model-optimizer/extensions/ops/switch_test.py b/model-optimizer/extensions/ops/switch_test.py

index 290e6e2..27d3660 100644 (file)
--- a/model-optimizer/extensions/ops/switch_test.py
+++ b/model-optimizer/extensions/ops/switch_test.py
@@ -21,8 +21,8 @@ import numpy as np
  
  from extensions.ops.switch import Switch
  from mo.graph.graph import Node
-from mo.utils.unittest.graph import build_graph_with_edge_attrs, build_graph_with_attrs
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph_with_edge_attrs, build_graph_with_attrs
  
  
  class TestSwitch(unittest.TestCase):
diff --git a/model-optimizer/extensions/ops/transpose_test.py b/model-optimizer/extensions/ops/transpose_test.py

index a37e5fc..94eebb0 100644 (file)
--- a/model-optimizer/extensions/ops/transpose_test.py
+++ b/model-optimizer/extensions/ops/transpose_test.py
@@ -24,7 +24,6 @@ from extensions.ops.transpose import Transpose
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  input_shape = np.array([1, 3, 224, 224])
  
  
diff --git a/model-optimizer/extensions/ops/unique.py b/model-optimizer/extensions/ops/unique.py

index 26dcf84..38916f1 100644 (file)
--- a/model-optimizer/extensions/ops/unique.py
+++ b/model-optimizer/extensions/ops/unique.py
@@ -14,9 +14,6 @@
   limitations under the License.
  """
  
-import logging as log
-
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/extensions/ops/unique_test.py b/model-optimizer/extensions/ops/unique_test.py

index fc18ab3..65da100 100644 (file)
--- a/model-optimizer/extensions/ops/unique_test.py
+++ b/model-optimizer/extensions/ops/unique_test.py
@@ -23,7 +23,6 @@ from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.utils.unittest.graph import build_graph
  
-
  # graph 1 with two outputs: uniques and indices
  nodes_attributes = {'input': {'shape': None, 'value': None, 'kind': 'data'},
                      'unique_node': {'op': 'Unique', 'kind': 'op'},
diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py

index 173e8f0..46390b4 100644 (file)
--- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
+++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
@@ -15,11 +15,12 @@
  """
  
  import hashlib
-from defusedxml.minidom import parseString
  from xml.etree.ElementTree import Element, SubElement, tostring
  
+from defusedxml.minidom import parseString
+
  from mo.graph.graph import *
-from mo.middle.passes.convert_data_type import data_type_str_to_precision, np_data_type_to_precision
+from mo.middle.passes.convert_data_type import np_data_type_to_precision
  from mo.utils.unsupported_ops import UnsupportedOps
  from mo.utils.utils import refer_to_faq_msg
  from mo.utils.version import get_version
diff --git a/model-optimizer/mo/front/caffe/custom_layers_mapping.py b/model-optimizer/mo/front/caffe/custom_layers_mapping.py

index 145249d..4cb3c46 100644 (file)
--- a/model-optimizer/mo/front/caffe/custom_layers_mapping.py
+++ b/model-optimizer/mo/front/caffe/custom_layers_mapping.py
@@ -15,6 +15,7 @@
  """
  import logging as log
  from builtins import AttributeError
+
  from defusedxml import ElementTree
  
  from mo.front.caffe.collect_attributes import collect_attributes
diff --git a/model-optimizer/mo/front/common/layout.py b/model-optimizer/mo/front/common/layout.py

index 8233fb4..d40437c 100644 (file)
--- a/model-optimizer/mo/front/common/layout.py
+++ b/model-optimizer/mo/front/common/layout.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import logging as log
-
  import numpy as np
  
  from mo.utils.error import Error
diff --git a/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py b/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py

index 828d3e8..6eb896e 100644 (file)
--- a/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py
+++ b/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py
@@ -17,7 +17,6 @@
  import logging as log
  import os
  
-import networkx as nx
  import numpy as np
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/mo/front/common/partial_infer/concat.py b/model-optimizer/mo/front/common/partial_infer/concat.py

index de4c170..7e0f2dc 100644 (file)
--- a/model-optimizer/mo/front/common/partial_infer/concat.py
+++ b/model-optimizer/mo/front/common/partial_infer/concat.py
@@ -19,9 +19,9 @@ import logging as log
  # Concat infer : N - number of inputs to concat
  #                axis - dimension number for tensors concatenation
  import numpy as np
-from mo.front.common.partial_infer.utils import int64_array
  
  from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.front.common.partial_infer.utils import int64_array
  from mo.ops.op import PermuteAttrs
  
  
diff --git a/model-optimizer/mo/front/common/partial_infer/roipooling.py b/model-optimizer/mo/front/common/partial_infer/roipooling.py

index 762b443..162cb89 100644 (file)
--- a/model-optimizer/mo/front/common/partial_infer/roipooling.py
+++ b/model-optimizer/mo/front/common/partial_infer/roipooling.py
@@ -15,9 +15,10 @@
  """
  
  import logging as log
+
  import numpy as np
  
-from mo.front.common.layout import get_batch_dim, get_features_dim, get_height_dim, get_width_dim, shape_for_layout
+from mo.front.common.layout import get_batch_dim, get_features_dim, shape_for_layout
  from mo.graph.graph import Node
  
  
diff --git a/model-optimizer/mo/front/common/partial_infer/utils.py b/model-optimizer/mo/front/common/partial_infer/utils.py

index 36711f8..e88198e 100644 (file)
--- a/model-optimizer/mo/front/common/partial_infer/utils.py
+++ b/model-optimizer/mo/front/common/partial_infer/utils.py
@@ -15,11 +15,10 @@
  """
  
  import logging as log
+from typing import Iterable
  
  import numpy as np
  
-from typing import Iterable
-
  
  def int64_array(l: Iterable):
      return np.array(l, dtype=np.int64)
@@ -55,8 +54,16 @@ def copy_or_none(x):
  
  
  def convert_tf_padding_to_str(padding):
-    mapping = {b'SAME': 'same_upper', b'VALID': 'valid'}
-    return mapping[padding.s]
+    mapping = {'SAME': 'same_upper', 'VALID': 'valid'}
+    return mapping[padding]
+
+
+def convert_deconv_tf_padding_to_str(padding):
+    # according to the formulas for calculating "auto_pad" values of the
+    # ConvBackpropData layer in the Operation Specification,
+    # the "same_lower" value matches to the "same" value for conv_transpose layer in TensorFlow
+    mapping = {'SAME': 'same_lower', 'VALID': 'valid'}
+    return mapping[padding]
  
  
  # TODO eliminate this dependency and pass necessary function as an argument
@@ -69,10 +76,7 @@ def tf_window_op_pad_infer(input, window, stride, auto_pad, is_deconv=False):
          normalized_stride = 1 / stride
  
      if auto_pad in ['same_lower', 'same_upper']:
-        if auto_pad == 'same_upper':
-            output = np.int64(np.ceil(input / normalized_stride))
-        else:
-            output = np.int64(np.floor(input / normalized_stride))
+        output = np.int64(np.ceil(input / normalized_stride))
          residual = input % stride
          mask = residual == 0
          full_pad = window.copy()
diff --git a/model-optimizer/mo/front/common/replacement.py b/model-optimizer/mo/front/common/replacement.py

index fd720c9..d7f3190 100644 (file)
--- a/model-optimizer/mo/front/common/replacement.py
+++ b/model-optimizer/mo/front/common/replacement.py
@@ -15,8 +15,6 @@
  """
  import logging as log
  
-import networkx as nx
-
  from mo.front.subgraph_matcher import SubgraphMatch
  from mo.graph.graph import Node, merge_edge_props, Graph
  from mo.middle.pattern_match import apply_pattern
diff --git a/model-optimizer/mo/front/extractor_test.py b/model-optimizer/mo/front/extractor_test.py

index eba1cdb..1ade5f6 100644 (file)
--- a/model-optimizer/mo/front/extractor_test.py
+++ b/model-optimizer/mo/front/extractor_test.py
@@ -25,9 +25,9 @@ from mo.front.extractor import spatial_attr_getter, add_input_ops, attr_getter,
      add_output_ops
  from mo.graph.graph import Node
  from mo.utils.error import Error
+from mo.utils.ir_engine.compare_graphs import compare_graphs
  from mo.utils.unittest.extractors import FakeMultiParam
  from mo.utils.unittest.graph import build_graph, build_graph_with_edge_attrs, build_graph_with_attrs
-from mo.utils.ir_engine.compare_graphs import compare_graphs
  
  
  class FakePythonParam:
diff --git a/model-optimizer/mo/front/kaldi/extractors/add_ext.py b/model-optimizer/mo/front/kaldi/extractors/add_ext.py

index 2a0650b..c9ab624 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/add_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/add_ext.py
@@ -13,8 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.elementwise import Add
+from mo.front.extractor import FrontExtractorOp
  
  
  class AddFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py

index 25df83f..9f94ad6 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/batchnorm_component_ext.py
@@ -18,7 +18,8 @@ import numpy as np
  
  from mo.front.caffe.extractors.utils import embed_input
  from mo.front.extractor import FrontExtractorOp
-from mo.front.kaldi.loader.utils import read_binary_bool_token, read_binary_integer32_token, collect_until_token, read_binary_float_token
+from mo.front.kaldi.loader.utils import read_binary_bool_token, read_binary_integer32_token, collect_until_token, \
+    read_binary_float_token
  from mo.front.kaldi.utils import read_binary_vector
  from mo.ops.scale_shift import ScaleShiftOp
  from mo.utils.error import Error
diff --git a/model-optimizer/mo/front/kaldi/extractors/clip_ext.py b/model-optimizer/mo/front/kaldi/extractors/clip_ext.py

index 63b3157..c5f1e3a 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/clip_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/clip_ext.py
@@ -13,8 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.identity import IdentityOp
+from mo.front.extractor import FrontExtractorOp
  
  
  class ClipGradientComponentFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/mo/front/kaldi/extractors/copy_ext.py b/model-optimizer/mo/front/kaldi/extractors/copy_ext.py

index 6964b52..fb6bda6 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/copy_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/copy_ext.py
@@ -16,8 +16,8 @@
  
  import numpy as np
  
-from extensions.ops.transpose import Transpose
  from extensions.ops.gather import Gather
+from extensions.ops.transpose import Transpose
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.common.replacement import FrontReplacementOp
  from mo.front.kaldi.loader.utils import read_binary_integer32_token, read_blob
diff --git a/model-optimizer/mo/front/kaldi/extractors/elementwise_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/elementwise_component_ext.py

index 9c1c086..f7fd97d 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/elementwise_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/elementwise_component_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  from mo.front.extractor import FrontExtractorOp
-from mo.ops.eltwise_ninputs_in_1 import EltwiseNin1
  from mo.front.kaldi.utils import read_token_value
+from mo.ops.eltwise_ninputs_in_1 import EltwiseNin1
  
  
  class ElementwiseProductComponentFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/mo/front/kaldi/extractors/linear_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/linear_component_ext.py

index f0a96c1..e1a7a5e 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/linear_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/linear_component_ext.py
@@ -14,11 +14,11 @@
   limitations under the License.
  """
  
+from extensions.ops.MatMul import FullyConnected
  from mo.front.caffe.extractors.utils import embed_input
  from mo.front.extractor import FrontExtractorOp
  from mo.front.kaldi.loader.utils import collect_until_token
  from mo.front.kaldi.utils import read_binary_matrix
-from extensions.ops.MatMul import FullyConnected
  
  
  class LinearComponentFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/mo/front/kaldi/extractors/lstm_nonlinearity_ext.py b/model-optimizer/mo/front/kaldi/extractors/lstm_nonlinearity_ext.py

index 2c96f13..2552c4b 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/lstm_nonlinearity_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/lstm_nonlinearity_ext.py
@@ -32,9 +32,13 @@ class LSTMNonlinearityFrontExtractor(FrontExtractorOp):
  
          mapping_rule = {}
  
-        embed_input(mapping_rule, 1, 'i_weights', ifo_x_weights[0:1024])
-        embed_input(mapping_rule, 2, 'f_weights', ifo_x_weights[1024:2048])
-        embed_input(mapping_rule, 3, 'o_weights', ifo_x_weights[2048:])
+        assert len(ifo_x_weights_shape) == 2, "Unexpected shape of weights in LSTMNonLinearityComponent"
+        assert ifo_x_weights_shape[0] == 3, "Unexpected shape of weights in LSTMNonLinearityComponent"
+
+        ifo_x_weights = ifo_x_weights.reshape(ifo_x_weights_shape)
+        embed_input(mapping_rule, 1, 'i_weights', ifo_x_weights[0][:])
+        embed_input(mapping_rule, 2, 'f_weights', ifo_x_weights[1][:])
+        embed_input(mapping_rule, 3, 'o_weights', ifo_x_weights[2][:])
  
          LstmNonLinearity.update_node_stat(node, mapping_rule)
          return cls.enabled
diff --git a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py

index 377ec8a..ab15306 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py
@@ -17,7 +17,7 @@ import numpy as np
  
  from mo.front.common.extractors.utils import layout_attrs
  from mo.front.extractor import FrontExtractorOp
-from mo.front.kaldi.loader.utils import read_token_value, collect_until_whitespace, collect_until_token, \
+from mo.front.kaldi.loader.utils import read_token_value, collect_until_token, \
      read_binary_integer32_token, find_next_tag, read_placeholder
  from mo.ops.pooling import Pooling
  from mo.utils.error import Error
diff --git a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py

index aa160d7..f7250ab 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py
@@ -16,11 +16,11 @@
  
  import numpy as np
  
+from extensions.ops.normalize import NormalizeOp
  from mo.front.caffe.extractors.utils import embed_input
  from mo.front.extractor import FrontExtractorOp
  from mo.front.kaldi.loader.utils import collect_until_token, read_binary_bool_token, read_binary_integer32_token, \
-                                        read_binary_float_token
-from extensions.ops.normalize import NormalizeOp
+    read_binary_float_token
  from mo.utils.error import Error
  
  
diff --git a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext_test.py

index bbfca2b..d604141 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext_test.py
@@ -17,8 +17,8 @@
  import numpy as np
  
  from extensions.ops.normalize import NormalizeOp
-from mo.front.kaldi.extractors.normalize_component_ext import NormalizeComponentFrontExtractor
  from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
+from mo.front.kaldi.extractors.normalize_component_ext import NormalizeComponentFrontExtractor
  from mo.front.kaldi.loader.utils_test import TestKaldiUtilsLoading
  from mo.ops.op import Op
  
diff --git a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py

index d19c55b..068befe 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from extensions.ops.pnorm import PNormOp
  from mo.front.extractor import FrontExtractorOp
  from mo.front.kaldi.loader.utils import collect_until_token, read_binary_integer32_token, read_binary_float_token
-from extensions.ops.pnorm import PNormOp
  from mo.utils.error import Error
  
  
diff --git a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py

index e298027..6456808 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/pnorm_component_ext_test.py
@@ -17,8 +17,8 @@
  import numpy as np
  
  from extensions.ops.pnorm import PNormOp
-from mo.front.kaldi.extractors.pnorm_component_ext import PNormComponentFrontExtractor
  from mo.front.kaldi.extractors.common_ext_test import KaldiFrontExtractorTest
+from mo.front.kaldi.extractors.pnorm_component_ext import PNormComponentFrontExtractor
  from mo.front.kaldi.loader.utils_test import TestKaldiUtilsLoading
  from mo.ops.op import Op
  
diff --git a/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py

index a22336c..53becac 100644 (file)
--- a/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.front.extractor import FrontExtractorOp
  from extensions.ops.activation_ops import ReLU
+from mo.front.extractor import FrontExtractorOp
  
  
  class RectifiedLinearComponentFrontExtractor(FrontExtractorOp):
diff --git a/model-optimizer/mo/front/kaldi/loader/loader.py b/model-optimizer/mo/front/kaldi/loader/loader.py

index 95817ca..b471c3c 100644 (file)
--- a/model-optimizer/mo/front/kaldi/loader/loader.py
+++ b/model-optimizer/mo/front/kaldi/loader/loader.py
@@ -14,13 +14,12 @@
   limitations under the License.
  """
  import io
-
-import numpy as np
+import logging as log
  import struct
  from io import IOBase
  
  import networkx as nx
-import logging as log
+import numpy as np
  
  from mo.front.kaldi.loader.utils import find_next_tag, read_placeholder, find_next_component, get_name_from_path, \
      find_end_of_component, end_of_nnet_tag, read_binary_integer32_token, get_parameters, read_token_value, \
diff --git a/model-optimizer/mo/front/kaldi/loader/loader_test.py b/model-optimizer/mo/front/kaldi/loader/loader_test.py

index 32037f1..ff49c9c 100644 (file)
--- a/model-optimizer/mo/front/kaldi/loader/loader_test.py
+++ b/model-optimizer/mo/front/kaldi/loader/loader_test.py
@@ -14,14 +14,15 @@
   limitations under the License.
  """
  import io
-import numpy as np
  import struct
  import unittest
  
+import numpy as np
+
  from mo.front.kaldi.loader.loader import load_topology_map, load_components
  from mo.graph.graph import Graph, Node
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  class TestKaldiModelsLoading(unittest.TestCase):
diff --git a/model-optimizer/mo/front/kaldi/loader/utils.py b/model-optimizer/mo/front/kaldi/loader/utils.py

index dc1d146..06ae201 100644 (file)
--- a/model-optimizer/mo/front/kaldi/loader/utils.py
+++ b/model-optimizer/mo/front/kaldi/loader/utils.py
@@ -14,11 +14,11 @@
   limitations under the License.
  """
  import io
-
-import numpy as np
  import os
  import struct
  
+import numpy as np
+
  from mo.utils.error import Error
  from mo.utils.utils import refer_to_faq_msg
  
diff --git a/model-optimizer/mo/front/kaldi/utils.py b/model-optimizer/mo/front/kaldi/utils.py

index 8cbc1ca..63c4a5f 100644 (file)
--- a/model-optimizer/mo/front/kaldi/utils.py
+++ b/model-optimizer/mo/front/kaldi/utils.py
@@ -14,11 +14,13 @@
   limitations under the License.
  """
  import io
-import numpy as np
-import os
  import logging as log
+import os
+
+import numpy as np
  
-from mo.front.kaldi.loader.utils import read_placeholder, read_binary_integer32_token, read_blob, read_token_value, find_next_tag
+from mo.front.kaldi.loader.utils import read_placeholder, read_binary_integer32_token, read_blob, read_token_value, \
+    find_next_tag
  from mo.utils.error import Error
  
  
diff --git a/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py b/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py

index 9d1d9ed..af96c0b 100644 (file)
--- a/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py
+++ b/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py
@@ -15,7 +15,6 @@
  """
  
  from mo.front.common.partial_infer.multi_box_prior import multi_box_prior_infer_mxnet
-from mo.utils.error import Error
  
  
  def multi_box_prior_ext(attr):
diff --git a/model-optimizer/mo/front/mxnet/extractors/utils.py b/model-optimizer/mo/front/mxnet/extractors/utils.py

index 9d6e2d7..7914ed6 100644 (file)
--- a/model-optimizer/mo/front/mxnet/extractors/utils.py
+++ b/model-optimizer/mo/front/mxnet/extractors/utils.py
@@ -16,9 +16,9 @@
  
  import mxnet as mx
  
+from extensions.ops.elementwise import Elementwise
  from mo.graph.graph import Node, Graph
  from mo.ops.const import Const
-from extensions.ops.elementwise import Elementwise
  from mo.utils.error import Error
  from mo.utils.str_to import StrTo
  from mo.utils.utils import refer_to_faq_msg
diff --git a/model-optimizer/mo/front/mxnet/loader.py b/model-optimizer/mo/front/mxnet/loader.py

index 205a481..e47d974 100644 (file)
--- a/model-optimizer/mo/front/mxnet/loader.py
+++ b/model-optimizer/mo/front/mxnet/loader.py
@@ -14,15 +14,15 @@
   limitations under the License.
  """
  
-import os
  import json
+import logging as log
+import os
  
-import numpy as np
  import mxnet as mx
-import logging as log
+import numpy as np
  
-from mo.front.mxnet.extractors.utils import get_mxnet_node_edges, load_params, init_rnn_states
  from mo.front.mxnet.extractor import common_mxnet_fields
+from mo.front.mxnet.extractors.utils import get_mxnet_node_edges, load_params, init_rnn_states
  from mo.front.mxnet.nd_to_params import build_params_file
  from mo.graph.graph import Node, Graph
  from mo.utils.error import Error
diff --git a/model-optimizer/mo/front/mxnet/nd_to_params.py b/model-optimizer/mo/front/mxnet/nd_to_params.py

index acee183..a76809c 100644 (file)
--- a/model-optimizer/mo/front/mxnet/nd_to_params.py
+++ b/model-optimizer/mo/front/mxnet/nd_to_params.py
@@ -17,6 +17,7 @@
  import os
  
  import mxnet as mx
+
  from mo.front.mxnet.extractors.utils import load_params
  
  
diff --git a/model-optimizer/mo/front/onnx/loader.py b/model-optimizer/mo/front/onnx/loader.py

index 6ea9095..9e1225c 100644 (file)
--- a/model-optimizer/mo/front/onnx/loader.py
+++ b/model-optimizer/mo/front/onnx/loader.py
@@ -20,7 +20,6 @@ from __future__ import unicode_literals
  
  import logging as log
  
-import networkx as nx
  import onnx
  
  from mo.graph.graph import fill_graph_with_nodes, Graph
diff --git a/model-optimizer/mo/front/tf/extractors/utils_test.py b/model-optimizer/mo/front/tf/extractors/utils_test.py

index 1fc7a7c..f10466b 100644 (file)
--- a/model-optimizer/mo/front/tf/extractors/utils_test.py
+++ b/model-optimizer/mo/front/tf/extractors/utils_test.py
@@ -14,9 +14,10 @@
   limitations under the License.
  """
  
-import numpy as np
  import unittest
  
+import numpy as np
+
  from mo.front.tf.extractors.utils import collect_tf_attrs, tf_tensor_content
  from mo.utils.unittest.extractors import PB
  
diff --git a/model-optimizer/mo/graph/graph.py b/model-optimizer/mo/graph/graph.py

index ee63b8a..32b70c3 100644 (file)
--- a/model-optimizer/mo/graph/graph.py
+++ b/model-optimizer/mo/graph/graph.py
@@ -15,9 +15,8 @@
  """
  import collections
  import logging as log
-from typing import List
-
  from copy import deepcopy
+from typing import List
  
  import networkx as nx
  import numpy as np
diff --git a/model-optimizer/mo/graph/graph_test.py b/model-optimizer/mo/graph/graph_test.py

index be4ec35..46e233d 100644 (file)
--- a/model-optimizer/mo/graph/graph_test.py
+++ b/model-optimizer/mo/graph/graph_test.py
@@ -17,14 +17,13 @@
  import unittest
  
  import numpy as np
-
  from generator import generator, generate
  
  from mo.graph.graph import Node, Graph, add_opoutput, dict_includes_compare_attrs
  from mo.ops.const import Const
  from mo.utils.error import Error
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes = {
      '0': {'name': 'input1', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py

index cee1964..92b5edc 100644 (file)
--- a/model-optimizer/mo/main.py
+++ b/model-optimizer/mo/main.py
@@ -23,13 +23,13 @@ import traceback
  from collections import OrderedDict
  
  import numpy as np
-from mo.pipeline.unified import unified_pipeline
  
  from extensions.back.SpecialNodesFinalization import RemoveConstOps, CreateConstNodesReplacement, RemoveOutputOps, \
      NormalizeTI
  from mo.graph.graph import Graph
  from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively, for_each_sub_graph_recursively
  from mo.pipeline.common import prepare_emit_ir, get_ir_version
+from mo.pipeline.unified import unified_pipeline
  from mo.utils import import_extensions
  from mo.utils.cli_parser import get_placeholder_shapes, get_tuple_values, get_model_name, \
      get_common_cli_options, get_caffe_cli_options, get_tf_cli_options, get_mxnet_cli_options, get_kaldi_cli_options, \
diff --git a/model-optimizer/mo/middle/passes/conv_test.py b/model-optimizer/mo/middle/passes/conv_test.py

index eae0378..d7a75a5 100644 (file)
--- a/model-optimizer/mo/middle/passes/conv_test.py
+++ b/model-optimizer/mo/middle/passes/conv_test.py
@@ -20,8 +20,8 @@ import numpy as np
  
  from mo.graph.graph import Node
  from mo.middle.passes.conv import convert_muladd_to_scaleshift, convert_add_or_mul_to_scaleshift
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/mo/middle/passes/eliminate.py b/model-optimizer/mo/middle/passes/eliminate.py

index dbb1695..a2ead94 100644 (file)
--- a/model-optimizer/mo/middle/passes/eliminate.py
+++ b/model-optimizer/mo/middle/passes/eliminate.py
@@ -13,8 +13,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
  """
-import re
  import logging as log
+import re
  from collections import deque
  
  import networkx as nx
diff --git a/model-optimizer/mo/middle/passes/eliminate_test.py b/model-optimizer/mo/middle/passes/eliminate_test.py

index e7a9afb..0b0159a 100644 (file)
--- a/model-optimizer/mo/middle/passes/eliminate_test.py
+++ b/model-optimizer/mo/middle/passes/eliminate_test.py
@@ -18,7 +18,7 @@ import unittest
  
  import numpy as np
  
-from mo.graph.graph import Node, Graph
+from mo.graph.graph import Node
  from mo.middle.passes.eliminate import mark_output_reachable_nodes, mark_const_producer_nodes
  from mo.utils.unittest.graph import build_graph
  
diff --git a/model-optimizer/mo/middle/passes/fusing/decomposition_test.py b/model-optimizer/mo/middle/passes/fusing/decomposition_test.py

index be2c6b4..f72f653 100644 (file)
--- a/model-optimizer/mo/middle/passes/fusing/decomposition_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/decomposition_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from mo.middle.passes.fusing.decomposition import convert_scale_shift_to_mul_add, convert_batch_norm
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py

index e7f6a0c..a9968da 100644 (file)
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py
@@ -21,8 +21,8 @@ import numpy as np
  from mo.front.common.partial_infer.eltwise import eltwise_infer
  from mo.graph.graph import Node
  from mo.middle.passes.fusing.fuse_linear_ops import _fuse_mul, _fuse_add, fuse_linear_ops
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py

index 01bfbe1..5a7a708 100644 (file)
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py
@@ -18,11 +18,11 @@ import logging as log
  
  import numpy as np
  
-from mo.ops.const import Const
  from extensions.ops.elementwise import Mul, Add
  from mo.graph.graph import Node, Graph
  from mo.middle.passes.fusing.helpers import get_value_in_port, \
      get_tensor_in_port
+from mo.ops.const import Const
  
  
  def _fuse_linear_sequence(graph: Graph, start_node: Node):
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py

index ddc571c..afa4a89 100644 (file)
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py
@@ -19,8 +19,8 @@ import unittest
  import numpy as np
  
  from mo.middle.passes.fusing.fuse_linear_seq import fuse_mul_add_sequence
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
      'placeholder_1': {'shape': None, 'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'},
diff --git a/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py b/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py

index f32f59f..50d6c83 100644 (file)
--- a/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py
@@ -23,8 +23,8 @@ from mo.front.common.partial_infer.eltwise import eltwise_infer
  from mo.middle.passes.fusing.resnet_optimization import stride_optimization
  from mo.ops.convolution import Convolution
  from mo.ops.pooling import Pooling
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  max_elt_lambda = lambda node: eltwise_infer(node, lambda a, b: np.maximum(a, b))
  
diff --git a/model-optimizer/mo/middle/passes/infer.py b/model-optimizer/mo/middle/passes/infer.py

index 309ce28..2609772 100644 (file)
--- a/model-optimizer/mo/middle/passes/infer.py
+++ b/model-optimizer/mo/middle/passes/infer.py
@@ -15,6 +15,7 @@
  """
  
  import logging as log
+
  import networkx as nx
  import numpy as np
  
diff --git a/model-optimizer/mo/middle/passes/leaky_relu.py b/model-optimizer/mo/middle/passes/leaky_relu.py

index dce839e..d2e417b 100644 (file)
--- a/model-optimizer/mo/middle/passes/leaky_relu.py
+++ b/model-optimizer/mo/middle/passes/leaky_relu.py
@@ -16,8 +16,6 @@
  
  import logging as log
  
-import numpy as np
-
  from extensions.ops.activation_ops import LeakyReLU
  from mo.graph.graph import Graph
  from mo.middle.pattern_match import apply_pattern
diff --git a/model-optimizer/mo/middle/passes/mean_scale_values_test.py b/model-optimizer/mo/middle/passes/mean_scale_values_test.py

index 82c2e6f..3b4410d 100644 (file)
--- a/model-optimizer/mo/middle/passes/mean_scale_values_test.py
+++ b/model-optimizer/mo/middle/passes/mean_scale_values_test.py
@@ -20,8 +20,8 @@ from argparse import Namespace
  import numpy as np
  
  from mo.middle.passes.mean_scale_values import move_scaleshift_to_preprocess
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
                      'node_2': {'type': 'Identity', 'value': None, 'kind': 'op'},
diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py

index 0f62a20..5ec3959 100644 (file)
--- a/model-optimizer/mo/middle/passes/tensor_names.py
+++ b/model-optimizer/mo/middle/passes/tensor_names.py
@@ -15,9 +15,10 @@
  """
  
  
-from defusedxml.minidom import parseString
  from xml.etree.ElementTree import Element, SubElement, tostring
  
+from defusedxml.minidom import parseString
+
  from mo.graph.graph import Node, Graph
  
  
diff --git a/model-optimizer/mo/ops/convolution.py b/model-optimizer/mo/ops/convolution.py

index 8e498be..47371a6 100644 (file)
--- a/model-optimizer/mo/ops/convolution.py
+++ b/model-optimizer/mo/ops/convolution.py
@@ -44,12 +44,21 @@ class Convolution(Op):
  
      def backend_attrs(self):
          if self.ir_version == 10:
+            def pad_attribute_helper(node: Node, pad_type: str='begin'):
+                assert pad_type in ['begin', 'end']
+                if not node.has_valid('pad'):
+                    return None
+                pad = get_backend_pad(node.pad, node.spatial_dims, 0 if pad_type == 'begin' else 1)
+                if node.has_valid('auto_pad'):
+                    pad = [0 for _ in pad]
+                return ','.join(map(str, pad))
+
              return [
                  'auto_pad',
                  ('strides', lambda node: ','.join(map(str, node['stride'][node.spatial_dims]))),
                  ('dilations', lambda node: ','.join(map(str, node['dilation'][node.spatial_dims]))),
-                ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0))) if node.has_valid('pad') else None),
-                ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1))) if node.has_valid('pad') else None),
+                ('pads_begin', lambda node: pad_attribute_helper(node, 'begin')),
+                ('pads_end', lambda node: pad_attribute_helper(node, 'end')),
                  ('output_padding', lambda node: ','.join(map(str, node.output_padding[node.spatial_dims])) \
                      if node.has_valid('output_padding') else None),
  
diff --git a/model-optimizer/mo/ops/shape.py b/model-optimizer/mo/ops/shape.py

index 558bc6b..5819c54 100644 (file)
--- a/model-optimizer/mo/ops/shape.py
+++ b/model-optimizer/mo/ops/shape.py
@@ -14,9 +14,10 @@
   limitations under the License.
  """
  
-import numpy as np
  import logging as log
  
+import numpy as np
+
  from mo.graph.graph import Graph
  from mo.ops.op import Op
  
diff --git a/model-optimizer/mo/ops/softmax.py b/model-optimizer/mo/ops/softmax.py

index c3b4856..1ab415d 100644 (file)
--- a/model-optimizer/mo/ops/softmax.py
+++ b/model-optimizer/mo/ops/softmax.py
@@ -21,7 +21,7 @@ from mo.ops.op import Op, PermuteAttrs
  
  class Softmax(Op):
      op = 'SoftMax'
-    enabled = True
+    enabled = False
  
      def __init__(self, graph: Graph, attrs: dict):
          super().__init__(graph, {
@@ -44,3 +44,18 @@ class Softmax(Op):
          copy_shape_infer(node)
          PermuteAttrs.create_permute_attrs(node, attrs=[('axis', 'input:0')])
  
+
+class LogSoftmax(Op):
+    op = 'LogSoftmax'
+    enabled = False
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'infer': None,
+            'kind': 'op',
+            'axis': 1,
+            'type': None,  # the operation will be replaced with a Log(Softmax(x)) sub-graph
+            'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }, attrs)
diff --git a/model-optimizer/mo/ops/space_to_batch.py b/model-optimizer/mo/ops/space_to_batch.py

index 28a3399..759dcaa 100644 (file)
--- a/model-optimizer/mo/ops/space_to_batch.py
+++ b/model-optimizer/mo/ops/space_to_batch.py
@@ -14,10 +14,11 @@
   limitations under the License.
  """
  
+import numpy as np
+
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.perm_inputs import PermuteInputs
  from mo.ops.op import Op
-import numpy as np
  
  
  class SpaceToBatch(Op):
diff --git a/model-optimizer/mo/ops/squeeze_test.py b/model-optimizer/mo/ops/squeeze_test.py

index 417079e..49835d1 100644 (file)
--- a/model-optimizer/mo/ops/squeeze_test.py
+++ b/model-optimizer/mo/ops/squeeze_test.py
@@ -17,9 +17,7 @@ import unittest
  
  import numpy as np
  
-from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
-from mo.ops.op import PermuteAttrs
  from mo.ops.squeeze import Squeeze
  from mo.utils.unittest.graph import build_graph
  
diff --git a/model-optimizer/mo/ops/strided_slice.py b/model-optimizer/mo/ops/strided_slice.py

index 9289005..2d7da0c 100644 (file)
--- a/model-optimizer/mo/ops/strided_slice.py
+++ b/model-optimizer/mo/ops/strided_slice.py
@@ -77,7 +77,7 @@ def permute_masks(node: Node, permutation: PermuteAttrs.Permutation, attr: str):
      if not node.has_valid(attr):
          return None
  
-    node[attr] = permute_array_with_ellipsis(node, node[attr], attr in ['begin_mask', 'end_mask'])
+    node[attr] = permute_array_with_ellipsis(node, node[attr], 0)
      return node[attr]
  
  
@@ -115,17 +115,33 @@ class StridedSlice(Op):
                                                             ('begin_mask', 'input:0', permute_masks),
                                                             ('end_mask', 'input:0', permute_masks),
                                                             ])
-            for i in range(1, len(node.in_nodes())):
-                if node.in_node(i).value is not None and len(node.in_node(0).shape) > 3:
-                    node.in_node(i).value = permute_array_with_ellipsis(node, node.in_node(i).value, 0)
+            # extend inputs according to ellipsis mask
+            in_shape = node.in_port(0).get_source().data.get_shape()
+            assert in_shape is not None, \
+                'Input shape is unknown for 0 input of node {}'.format(node.name)
+            input_rank = len(in_shape)
+            if input_rank > 3:
+                for i_port in node.in_ports().values():
+                    if i_port.idx == 0 or i_port.disconnected():
+                        continue
+                    old_value = i_port.data.get_value()
+                    # additional check for non-const input
+                    # error will be return in shape inference if non-const will be added
+                    # it is paranoid check for case if shape inference will be changed
+                    assert old_value is not None, \
+                        '{} input of {} node is not constant: \'value\' attribute for edge ' + \
+                        'contains None'.format(i_port.idx, node.name)
+                    # insert 0 for begin and end and 1 for stride
+                    new_value = permute_array_with_ellipsis(node, old_value, int(i_port.idx == 3))
+                    # set_value additionally set_shape and propagate value to Const node
+                    i_port.data.set_value(new_value)
  
              # extend masks before removing ellipsis
              if np.any(node.ellipsis_mask):
                  for attr in ["new_axis_mask", "shrink_axis_mask", "begin_mask", "end_mask"]:
                      node[attr] = int64_array(extend_mask_according_ellipsis(node.ellipsis_mask, node.shrink_axis_mask,
                                                                              len(node.out_port(0).data.get_shape()),
-                                                                            list(node[attr]),
-                                                                            attr in ["begin_mask", "end_mask"]))
+                                                                            list(node[attr]), 0))
  
              # due to permutation from nhwc to nchw we will extend all masks and inputs
              idx = np.nonzero(node.ellipsis_mask)
diff --git a/model-optimizer/mo/ops/strided_slice_test.py b/model-optimizer/mo/ops/strided_slice_test.py

index 907ffc8..64821ce 100644 (file)
--- a/model-optimizer/mo/ops/strided_slice_test.py
+++ b/model-optimizer/mo/ops/strided_slice_test.py
@@ -22,25 +22,48 @@ from mo.graph.graph import Node
  from mo.ops.op import PermuteAttrs
  from mo.ops.strided_slice import extend_mask_according_ellipsis, permute_masks, permute_array_with_ellipsis, \
      StridedSlice
+from mo.utils.error import Error
  from mo.utils.unittest.graph import build_graph
  
  nodes_attributes = {
+    'input': {
+        'kind': 'op',
+        'op': None
+    },
      'data_1': {
          'kind': 'data',
          'shape': None,
          'value': None,
      },
      'begin': {
+        'kind': 'op',
+        'op': 'Const',
+        'value': None,
+        'shape': None
+    },
+    'begin_data': {
          'kind': 'data',
          'shape': None,
          'value': np.array([]),
      },
      'end': {
+        'kind': 'op',
+        'op': 'Const',
+        'value': None,
+        'shape': None
+    },
+    'end_data': {
          'kind': 'data',
          'shape': None,
          'value': np.array([]),
      },
      'stride': {
+        'kind': 'op',
+        'op': 'Const',
+        'value': None,
+        'shape': None
+    },
+    'stride_data': {
          'kind': 'data',
          'shape': None,
          'value': np.array([]),
@@ -102,10 +125,10 @@ class TestPermutationStridedSlice(unittest.TestCase):
  
          slice_node = Node(graph, 'strided_slice')
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 0, 0, 0])))
  
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 0, 1, 0])))
  
      def test_permute_begin_end_long(self):
          # Testing constant path case
@@ -172,10 +195,10 @@ class TestPermutationStridedSlice(unittest.TestCase):
  
          slice_node = Node(graph, 'strided_slice')
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'begin_mask')
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0, 1])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 0, 0, 0, 0])))
  
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'end_mask')
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0, 1])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 0, 1, 0, 0])))
  
      def test_permute_begin_end_shrink(self):
          # Testing constant path case
@@ -217,20 +240,27 @@ class TestPermutationStridedSlice(unittest.TestCase):
  
          slice_node = Node(graph, 'strided_slice')
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 0, 0, 0])))
  
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 0, 1, 0])))
  
      def test_permute_begin_end_ellipsis(self):
          # Testing constant path case
          graph = build_graph(nodes_attributes,
-                            [('data_1', 'strided_slice'),
-                             ('begin', 'strided_slice'),
-                             ('end', 'strided_slice'),
-                             ('stride', 'strided_slice'),
+                            [('input', 'data_1'),
+                             ('data_1', 'strided_slice'),
+                             ('begin', 'begin_data'),
+                             ('begin_data', 'strided_slice'),
+                             ('end', 'end_data'),
+                             ('end_data', 'strided_slice'),
+                             ('stride', 'stride_data'),
+                             ('stride_data', 'strided_slice'),
                               ('strided_slice', 'data_2')],
                              {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'begin': {'value': [0, 1], 'shape': [2]},
+                             'end': {'value': [1, 0], 'shape': [2]},
+                             'stride': {'value': [1, 2], 'shape': [2]},
                               'strided_slice': {'begin_mask': np.array([0, 0]), 'end_mask': np.array([1, 0]),
                                                 'new_axis_mask': np.array([0]), 'shrink_axis_mask': [0],
                                                 'ellipsis_mask': np.array([1, 0])},
@@ -239,20 +269,30 @@ class TestPermutationStridedSlice(unittest.TestCase):
  
          slice_node = Node(graph, 'strided_slice')
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 1, 1])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 0, 0])))
  
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 1, 1])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 0, 0])))
  
      def test_permute_begin_end_ellipsis_infer(self):
          # Testing constant path case
          graph = build_graph(nodes_attributes,
-                            [('data_1', 'strided_slice'),
-                             ('begin', 'strided_slice'),
-                             ('end', 'strided_slice'),
-                             ('stride', 'strided_slice'),
+                            [('input', 'data_1'),
+                             ('data_1', 'strided_slice', {'in': 0}),
+                             ('begin', 'begin_data'),
+                             ('begin_data', 'strided_slice', {'in': 1}),
+                             ('end', 'end_data'),
+                             ('end_data', 'strided_slice', {'in': 2}),
+                             ('stride', 'stride_data'),
+                             ('stride_data', 'strided_slice', {'in': 3}),
                               ('strided_slice', 'data_2')],
                              {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'begin': {'value': [0, 1], 'shape': [2]},
+                             'end': {'value': [1, 0], 'shape': [2]},
+                             'stride': {'value': [1, 2], 'shape': [2]},
+                             'begin_data': {'value': [0, 1], 'shape': [2]},
+                             'end_data': {'value': [1, 0], 'shape': [2]},
+                             'stride_data': {'value': [1, 2], 'shape': [2]},
                               'strided_slice': {'begin_mask': np.array([0, 0]), 'end_mask': np.array([1, 0]),
                                                 'new_axis_mask': np.array([0]), 'shrink_axis_mask': [0],
                                                 'ellipsis_mask': np.array([1, 0])},
@@ -261,22 +301,38 @@ class TestPermutationStridedSlice(unittest.TestCase):
          graph.graph['layout'] = "NHWC"
  
          slice_node = Node(graph, 'strided_slice')
+        begin_node = Node(graph, 'begin')
+        end_node = Node(graph, 'end')
+        stride_node = Node(graph, 'stride')
          StridedSlice.infer(slice_node)
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 1, 1, 0])))
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 1, 1, 0])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 0, 0])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 0, 0])))
          self.assertTrue(np.array_equal(slice_node.shrink_axis_mask, np.array([0, 0, 0, 0])))
          self.assertTrue(np.array_equal(slice_node.new_axis_mask, np.array([0, 0, 0, 0])))
+        self.assertTrue(np.array_equal(begin_node.value, np.array([0, 1, 0, 0])))
+        self.assertTrue(np.array_equal(end_node.value, np.array([1, 0, 0, 0])))
+        self.assertTrue(np.array_equal(stride_node.value, np.array([1, 2, 1, 1])))
  
      def test_permute_begin_end_ellipsis_new(self):
          # Testing constant path case
          graph = build_graph(nodes_attributes,
-                            [('data_1', 'strided_slice'),
-                             ('begin', 'strided_slice'),
-                             ('end', 'strided_slice'),
-                             ('stride', 'strided_slice'),
+                            [('input', 'data_1'),
+                             ('data_1', 'strided_slice', {'in': 0}),
+                             ('begin', 'begin_data'),
+                             ('begin_data', 'strided_slice', {'in': 1}),
+                             ('end', 'end_data'),
+                             ('end_data', 'strided_slice', {'in': 2}),
+                             ('stride', 'stride_data'),
+                             ('stride_data', 'strided_slice', {'in': 3}),
                               ('strided_slice', 'data_2')],
                              {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
-                             'strided_slice': {'begin_mask': np.array([0, 0, 0]), 'end_mask': np.array([1, 0, 0]),
+                             'begin': {'value': [0, 1, 0], 'shape': [3]},
+                             'begin_data': {'value': [0, 1, 0], 'shape': [3]},
+                             'end': {'value': [1, 0, 1], 'shape': [3]},
+                             'end_data': {'value': [1, 0, 1], 'shape': [3]},
+                             'stride': {'value': [1, 2, 3], 'shape': [3]},
+                             'stride_data': {'value': [1, 2, 3], 'shape': [3]},
+                             'strided_slice': {'begin_mask': np.array([1, 2, 3]), 'end_mask': np.array([1, 2, 3]),
                                                 'new_axis_mask': np.array([1, 0, 0]), 'shrink_axis_mask': [0],
                                                 'ellipsis_mask': np.array([0, 1, 0])},
                               'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
@@ -284,18 +340,22 @@ class TestPermutationStridedSlice(unittest.TestCase):
  
          slice_node = Node(graph, 'strided_slice')
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'begin_mask')
-        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 0, 1, 1])))
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 3, 2, 0, 0])))
  
          permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'end_mask')
-        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 0, 1, 1])))
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 3, 2, 0, 0])))
  
      def test_permute_begin_end_ellipsis_new_inputs(self):
          # Testing constant path case
          graph = build_graph(nodes_attributes,
-                            [('data_1', 'strided_slice'),
-                             ('begin', 'strided_slice'),
-                             ('end', 'strided_slice'),
-                             ('stride', 'strided_slice'),
+                            [('input', 'data_1'),
+                             ('data_1', 'strided_slice', {'in': 0}),
+                             ('begin', 'begin_data'),
+                             ('begin_data', 'strided_slice', {'in': 1}),
+                             ('end', 'end_data'),
+                             ('end_data', 'strided_slice', {'in': 2}),
+                             ('stride', 'stride_data'),
+                             ('stride_data', 'strided_slice', {'in': 3}),
                               ('strided_slice', 'data_2')],
                              {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
                               'strided_slice': {'begin_mask': np.array([0, 0, 0]), 'end_mask': np.array([1, 0, 0]),
@@ -304,6 +364,9 @@ class TestPermutationStridedSlice(unittest.TestCase):
                               'begin': {'value': np.array([0, 1, 2])},
                               'end': {'value': np.array([1, 2, 3])},
                               'stride': {'value': np.array([1, 1, 1])},
+                             'begin_data': {'value': np.array([0, 1, 2])},
+                             'end_data': {'value': np.array([1, 2, 3])},
+                             'stride_data': {'value': np.array([1, 1, 1])},
                               'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
                               })
  
@@ -342,3 +405,29 @@ class TestPermutationStridedSlice(unittest.TestCase):
          shrink_mask = extend_mask_according_ellipsis(ellipsis_mask, shrink_mask, length_shape, list(shrink_mask),
                                                       ins_value)
          self.assertEquals(shrink_mask, [0, 0, 2, 2, 1])
+
+    def test_non_const_infer(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('input', 'data_1'),
+                             ('data_1', 'strided_slice', {'in': 0}),
+                             ('data_1', 'strided_slice', {'in': 1}),
+                             ('end', 'end_data'),
+                             ('end_data', 'strided_slice', {'in': 2}),
+                             ('stride', 'stride_data'),
+                             ('stride_data', 'strided_slice', {'in': 3}),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'end': {'value': [1, 0], 'shape': [2]},
+                             'stride': {'value': [1, 2], 'shape': [2]},
+                             'strided_slice': {'begin_mask': np.array([0, 0]), 'end_mask': np.array([1, 0]),
+                                               'new_axis_mask': np.array([0]), 'shrink_axis_mask': [0],
+                                               'ellipsis_mask': np.array([1, 0])},
+                             'data_2': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             })
+        graph.graph['layout'] = "NHWC"
+
+        slice_node = Node(graph, 'strided_slice')
+        with self.assertRaises(Error) as error:
+            StridedSlice.infer(slice_node)
+        self.assertTrue('Strided slice layer supports only constant begin and end inputs' in str(error.exception))
diff --git a/model-optimizer/mo/ops/unsqueeze_test.py b/model-optimizer/mo/ops/unsqueeze_test.py

index 170fca0..3b2c7b7 100644 (file)
--- a/model-optimizer/mo/ops/unsqueeze_test.py
+++ b/model-optimizer/mo/ops/unsqueeze_test.py
@@ -22,8 +22,8 @@ from generator import generator
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Node
  from mo.ops.unsqueeze import Unsqueeze
-from mo.utils.unittest.graph import build_graph
  from mo.utils.ir_engine.compare_graphs import compare_graphs
+from mo.utils.unittest.graph import build_graph
  
  
  @generator
diff --git a/model-optimizer/mo/utils/class_registration.py b/model-optimizer/mo/utils/class_registration.py

index f73a90b..1c940dd 100644 (file)
--- a/model-optimizer/mo/utils/class_registration.py
+++ b/model-optimizer/mo/utils/class_registration.py
@@ -14,11 +14,11 @@
   limitations under the License.
  """
  import logging as log
-
-import networkx as nx
  import os
  from enum import Enum
  
+import networkx as nx
+
  from mo.graph.graph import Graph
  from mo.middle.passes.eliminate import shape_inference
  from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
diff --git a/model-optimizer/mo/utils/custom_replacement_config.py b/model-optimizer/mo/utils/custom_replacement_config.py

index 38f40e8..e15e5b4 100644 (file)
--- a/model-optimizer/mo/utils/custom_replacement_config.py
+++ b/model-optimizer/mo/utils/custom_replacement_config.py
@@ -19,8 +19,6 @@ import logging as log
  import os
  from re import compile, match
  
-import networkx as nx
-
  from mo.graph.graph import Node, Graph
  from mo.utils.error import Error
  from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes
diff --git a/model-optimizer/mo/utils/graph.py b/model-optimizer/mo/utils/graph.py

index b55b090..9a66951 100644 (file)
--- a/model-optimizer/mo/utils/graph.py
+++ b/model-optimizer/mo/utils/graph.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
+import logging as log
  from collections import deque
  from re import match, compile
  
-import logging as log
  import networkx as nx
  
  from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/mo/utils/graph_test.py b/model-optimizer/mo/utils/graph_test.py

index 0484d9d..c2d1e76 100644 (file)
--- a/model-optimizer/mo/utils/graph_test.py
+++ b/model-optimizer/mo/utils/graph_test.py
@@ -16,11 +16,10 @@
  
  import unittest
  
-import networkx as nx
-
+from mo.graph.graph import Graph
  from mo.utils.error import Error
  from mo.utils.graph import bfs_search, is_connected_component, sub_graph_between_nodes
-from mo.graph.graph import Graph
+
  
  class TestGraphUtils(unittest.TestCase):
      def test_simple_dfs(self):
diff --git a/model-optimizer/mo/utils/ir_reader/extender.py b/model-optimizer/mo/utils/ir_reader/extender.py

index 62d8ee2..fd6604a 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extender.py
@@ -15,10 +15,10 @@
  """
  
  import logging as log
-from mo.utils.graph import Node
-from mo.utils import class_registration
  
  from mo.front.common.partial_infer.utils import int64_array
+from mo.utils import class_registration
+from mo.utils.graph import Node
  
  
  class Extender(object):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/GRUCell_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/GRUCell_extender.py

new file mode 100644 (file)

index 0000000..b5ff5a1
--- /dev/null
+++ b/model-optimizer/mo/utils/ir_reader/extenders/GRUCell_extender.py
@@ -0,0 +1,32 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.partial_infer.utils import mark_input_bins
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class GRUCell_extender(Extender):
+    op = 'GRUCell'
+
+    @staticmethod
+    def extend(op: Node):
+        if not op.has_valid('activations'):
+            op['activations'] = None
+
+        mark_input_bins(op, start_port=2)
+
+        op['need_copy_input_blobs'] = True
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/LSTMCell_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/LSTMCell_extender.py

index 74dfca8..4ce8d8d 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/LSTMCell_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/LSTMCell_extender.py
@@ -14,10 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
-
-from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.ir_reader.extender import Extender
  
  
  class LSTMCell_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/RNNCell_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/RNNCell_extender.py

new file mode 100644 (file)

index 0000000..7fcb3ff
--- /dev/null
+++ b/model-optimizer/mo/utils/ir_reader/extenders/RNNCell_extender.py
@@ -0,0 +1,27 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class RNNCell_extender(Extender):
+    op = 'RNNCell'
+
+    @staticmethod
+    def extend(op: Node):
+        if not op.has_valid('activations'):
+            op['activations'] = None
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/binary_convolution_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/binary_convolution_extender.py

index f839966..c5f64f1 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/binary_convolution_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/binary_convolution_extender.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from mo.utils.graph import Node
  from mo.utils.ir_reader.extender import Extender
  from mo.utils.ir_reader.extenders.conv_extender import Conv_extender
-from mo.utils.graph import Node
  
  
  class BinaryConv_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/conv_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/conv_extender.py

index c40853c..389366b 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/conv_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/conv_extender.py
@@ -14,10 +14,9 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
-from mo.utils.graph import Node
-
  from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class Conv_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/deconvolution_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/deconvolution_extender.py

index 7711e84..d7b9b44 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/deconvolution_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/deconvolution_extender.py
@@ -16,10 +16,9 @@
  
  import numpy as np
  
-from mo.utils.ir_reader.extender import Extender
-from mo.utils.graph import Node
-
  from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class ConvolutionBackpropData_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/deformable_convolution_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/deformable_convolution_extender.py

index fde792f..b48def2 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/deformable_convolution_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/deformable_convolution_extender.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from mo.utils.graph import Node
  from mo.utils.ir_reader.extender import Extender
  from mo.utils.ir_reader.extenders.conv_extender import Conv_extender
-from mo.utils.graph import Node
  
  
  class DeformableConv_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/experimental_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/experimental_extender.py

index cf8cc0e..c513386 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/experimental_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/experimental_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class ExperimentalDetectronROIFeatureExtractor_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/fakequantize_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/fakequantize_extender.py

index 2402bee..a1958d6 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/fakequantize_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/fakequantize_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class FakeQuantize_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/interpolate_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/interpolate_extender.py

index da47527..a83c293 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/interpolate_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/interpolate_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class Interpolate_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/non_zero_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/non_zero_extender.py

new file mode 100644 (file)

index 0000000..d276cce
--- /dev/null
+++ b/model-optimizer/mo/utils/ir_reader/extenders/non_zero_extender.py
@@ -0,0 +1,27 @@
+"""
+ Copyright (C) 2018-2020 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
+
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
+
+
+class NonZeroExtender(Extender):
+    op = 'NonZero'
+
+    @staticmethod
+    def extend(op: Node):
+        op['output_type'] = destination_type_to_np_data_type(op.output_type)
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/pad_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/pad_extender.py

index f13e930..35632b7 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/pad_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/pad_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class Pad_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/parameter_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/parameter_extender.py

index a3d675c..e0b8583 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/parameter_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/parameter_extender.py
@@ -14,10 +14,10 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
+from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
-from mo.middle.passes.convert_data_type import destination_type_to_np_data_type
  
  class Parameter_extender(Extender):
      op = 'Parameter'
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/pooling_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/pooling_extender.py

index 4bc94bc..e47dcca 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/pooling_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/pooling_extender.py
@@ -14,10 +14,9 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
-from mo.graph.graph import Node
-
  from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class AvgPool_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/priorbox_clustered_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/priorbox_clustered_extender.py

index 9b8a03b..600f586 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/priorbox_clustered_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/priorbox_clustered_extender.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  
+from mo.utils.graph import Node
  from mo.utils.ir_reader.extender import Extender
  from mo.utils.ir_reader.extenders.priorbox_extender import PriorBox_extender
-from mo.utils.graph import Node
  
  
  class PriorBoxClustered_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/priorbox_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/priorbox_extender.py

index 82164c4..4b535cc 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/priorbox_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/priorbox_extender.py
@@ -14,10 +14,9 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
-from mo.utils.graph import Node
-
  from mo.front.common.partial_infer.multi_box_prior import multi_box_prior_infer_mxnet
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class PriorBox_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/reorg_yolo_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/reorg_yolo_extender.py

index d444521..d4a0524 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/reorg_yolo_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/reorg_yolo_extender.py
@@ -14,10 +14,9 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
-from mo.utils.graph import Node
-
  from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class ReorgYolo_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/strided_slice_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/strided_slice_extender.py

index 0003f11..dfc7c84 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/strided_slice_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/strided_slice_extender.py
@@ -14,10 +14,9 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
-from mo.utils.graph import Node
-
  from mo.front.common.partial_infer.utils import int64_array
+from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class StridedSlice_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/tensoriterator_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/tensoriterator_extender.py

index 50256d4..ef18451 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/tensoriterator_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/tensoriterator_extender.py
@@ -14,9 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
-
+from mo.utils.ir_reader.extender import Extender
  from mo.utils.ir_reader.layer_to_class import copy_graph_with_ops
  
  
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py

index 195e57c..8f7c6e0 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/topk_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class TopK_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/extenders/variadic_split_extender.py b/model-optimizer/mo/utils/ir_reader/extenders/variadic_split_extender.py

index c7b1cd2..cd80d0d 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/extenders/variadic_split_extender.py
+++ b/model-optimizer/mo/utils/ir_reader/extenders/variadic_split_extender.py
@@ -14,8 +14,8 @@
   limitations under the License.
  """
  
-from mo.utils.ir_reader.extender import Extender
  from mo.utils.graph import Node
+from mo.utils.ir_reader.extender import Extender
  
  
  class VariadicSplit_extender(Extender):
diff --git a/model-optimizer/mo/utils/ir_reader/layer_to_class.py b/model-optimizer/mo/utils/ir_reader/layer_to_class.py

index 3fba047..f91a072 100644 (file)
--- a/model-optimizer/mo/utils/ir_reader/layer_to_class.py
+++ b/model-optimizer/mo/utils/ir_reader/layer_to_class.py
@@ -24,6 +24,7 @@ from extensions.ops.ReduceOps import ReduceOp
  from extensions.ops.activation_ops import Activation
  from extensions.ops.elementwise import Elementwise, LogicalElementwise, BiasAdd, Div, Mul, Pow, Sub
  from extensions.ops.psroipooling import DeformablePSROIPoolingOp
+from extensions.ops.scatter import Scatter
  from extensions.ops.split import Split, VariadicSplit
  from mo.front.common.partial_infer.utils import int64_array
  from mo.graph.graph import Graph, Node
@@ -62,7 +63,7 @@ def collect_ops(path: str):
      """
      import_by_path(os.path.join(path, 'mo', 'ops'), ['mo', 'ops'])
      import_by_path(os.path.join(path, 'extensions', 'ops'), ['extensions', 'ops'])
-    update_registration(classes=[Op, Activation, Elementwise, LogicalElementwise, ReduceOp],
+    update_registration(classes=[Op, Activation, Elementwise, LogicalElementwise, ReduceOp, Scatter],
                          enabled_transforms=[], disabled_transforms=[])
  
  
@@ -142,7 +143,7 @@ def propagate_const_values(op: Node):
              if weights_rounded[elem] == 0:
                  weights_rounded[elem] -= 1  # pylint: disable=unsupported-assignment-operation
          assert len(weights_rounded) % 8 == 0
-        weights_rounded = weights_rounded.reshape([len(weights_rounded) // 8, 8])   # pylint: disable=no-member
+        weights_rounded = weights_rounded.reshape([len(weights_rounded) // 8, 8])  # pylint: disable=no-member
          weights_rounded = np.flip(weights_rounded, axis=1)
          value = weights_rounded.flatten()
  
@@ -157,8 +158,8 @@ def groupconv_to_conv(op: Node):
      :param op:
      :return:
      """
-    assert op.soft_get('type') == 'GroupConvolution', 'Wrong operation type, {} instead of GroupConvolution!' \
-                                                      ''.format(op.soft_get('type'))
+    assert op.soft_get('type') == 'GroupConvolution', \
+        'Wrong operation type, {} instead of GroupConvolution!'.format(op.soft_get('type'))
  
      weights_shape = op.in_port(1).data.get_shape()
      group = weights_shape[0]
@@ -169,13 +170,13 @@ def groupconv_to_conv(op: Node):
          weights_node.value = np.reshape(weights_node.value, new_shape)
      elif weights_node.type == 'Reshape':
          # we remove reshape node added in ConvolutionWithGroupsResolver pass
-        assert weights_node.in_port(0).get_source().data.get_shape() == new_shape, 'Weight shape and calculated ' \
-                                                                'shape mismatch in GroupConv node {}.'.format(op.name)
+        assert weights_node.in_port(0).get_source().data.get_shape() == new_shape, \
+            'Weight shape and calculated shape mismatch in GroupConv node {}.'.format(op.name)
          op.in_port(1).disconnect()
          weights_node.in_port(0).get_source().get_connection().set_destination(op.in_port(1))
      else:
-        assert op.in_port(1).get_source().data.get_shape() == new_shape, 'Weight shape and calculated ' \
-                                                                'shape mismatch in GroupConv node {}.'.format(op.name)
+        assert op.in_port(1).get_source().data.get_shape() == new_shape, \
+            'Weight shape and calculated shape mismatch in GroupConv node {}.'.format(op.name)
      # we need to set this attrs for correct shape infer as convolution
      op['group'] = group
      op.type = 'Convolution'
@@ -187,7 +188,7 @@ def backprop_to_deconv(op: Node):
      :param op:
      :return:
      """
-    assert op.soft_get('type') in ('ConvolutionBackpropData', 'GroupConvolutionBackpropData'),\
+    assert op.soft_get('type') in ('ConvolutionBackpropData', 'GroupConvolutionBackpropData'), \
          'Wrong operation type, {} instead of ConvolutionBackpropData/GroupConvolutionBackpropData!' \
          ''.format(op.soft_get('type'))
  
@@ -217,6 +218,20 @@ def ti_add_edge_attrs(op: Node):
          i += 1
  
  
+def copy_input_blobs(op: Node, copy_op: Node):
+    """
+    Function copy input blob data nodes from restored graph to copied one
+    :param op: Node from restored graph
+    :param copy_op: Node from copied graph
+    :return:
+    """
+    for u, d in op.get_sorted_inputs():
+        if 'bin' in d:
+            Op.create_and_connect_input_data_node(copy_op.graph, copy_op,
+                                                  {'value': op.in_node(d['in']).value,
+                                                   'shape': op.in_node(d['in']).shape}, d)
+
+
  # Map with preprocessing functions
  preprocessing_op_nodes = {
      'Const': propagate_const_values,
@@ -276,6 +291,9 @@ def copy_graph_with_ops(graph: Graph) -> Graph:
                                                   'please check it!'.format(op_type)
              node = Op.get_op_class_by_name(op_type)(new_graph, op.attrs()).create_node()
  
+        if op.has_and_set('need_copy_input_blobs'):
+            copy_input_blobs(op, node)
+
          # Collect node connections
          mapping_of_old_idx_into_new[op.id] = node.id
          node_connections[op.id] = collect_node_outputs(op)
diff --git a/model-optimizer/mo/utils/pipeline_config.py b/model-optimizer/mo/utils/pipeline_config.py

index 88c6d09..340b188 100644 (file)
--- a/model-optimizer/mo/utils/pipeline_config.py
+++ b/model-optimizer/mo/utils/pipeline_config.py
@@ -19,7 +19,6 @@ import re
  from mo.utils.error import Error
  from mo.utils.simple_proto_parser import SimpleProtoParser
  
-
  # The list of rules how to map the value from the pipeline.config file to the dictionary with attributes.
  # The rule is either a string or a tuple with two elements. In the first case the rule string is used as a key to
  # search in the parsed pipeline.config file attributes dictionary and a key to save found value. In the second case the
diff --git a/model-optimizer/mo/utils/replacement_pattern.py b/model-optimizer/mo/utils/replacement_pattern.py

index 914dd95..59948f7 100644 (file)
--- a/model-optimizer/mo/utils/replacement_pattern.py
+++ b/model-optimizer/mo/utils/replacement_pattern.py
@@ -14,8 +14,6 @@
   limitations under the License.
  """
  
-import networkx as nx
-
  from mo.graph.graph import Graph
  from mo.middle.pattern_match import apply_pattern
  
diff --git a/model-optimizer/mo/utils/unittest/graph.py b/model-optimizer/mo/utils/unittest/graph.py

index 887c3c3..9b1a118 100644 (file)
--- a/model-optimizer/mo/utils/unittest/graph.py
+++ b/model-optimizer/mo/utils/unittest/graph.py
@@ -14,9 +14,9 @@
   limitations under the License.
  """
  from argparse import Namespace
+from copy import deepcopy
  
  import networkx as nx
-from copy import deepcopy
  
  from mo.front.common.partial_infer.utils import int64_array
  from mo.front.extractor import extract_port_from_string
diff --git a/model-optimizer/mo/utils/unsupported_ops.py b/model-optimizer/mo/utils/unsupported_ops.py

index 5299159..0f2d9ea 100644 (file)
--- a/model-optimizer/mo/utils/unsupported_ops.py
+++ b/model-optimizer/mo/utils/unsupported_ops.py
@@ -16,8 +16,6 @@
  
  import collections
  
-import networkx as nx
-
  from mo.graph.graph import Node, Graph
  
  
diff --git a/model-optimizer/mo/utils/utils_test.py b/model-optimizer/mo/utils/utils_test.py

index e3c0b55..661244b 100644 (file)
--- a/model-optimizer/mo/utils/utils_test.py
+++ b/model-optimizer/mo/utils/utils_test.py
@@ -15,8 +15,9 @@
  """
  
  import unittest
-import networkx as nx
+
  import numpy as np
+
  from mo.utils.utils import match_shapes
  
  
diff --git a/model-optimizer/mo/utils/versions_checker_test.py b/model-optimizer/mo/utils/versions_checker_test.py

index 8d7d15f..227b74e 100644 (file)
--- a/model-optimizer/mo/utils/versions_checker_test.py
+++ b/model-optimizer/mo/utils/versions_checker_test.py
@@ -16,10 +16,11 @@
  
  import unittest
  import unittest.mock as mock
-
  from unittest.mock import mock_open
+
  from mo.utils.versions_checker import get_module_version_list_from_file, parse_versions_list
  
+
  class TestingVersionsChecker(unittest.TestCase):
      @mock.patch('builtins.open', new_callable=mock_open, create=True)
      def test_get_module_version_list_from_file(self, mock_open):
diff --git a/model-optimizer/requirements_dev.txt b/model-optimizer/requirements_dev.txt

index d325751..38ecd7c 100644 (file)
--- a/model-optimizer/requirements_dev.txt
+++ b/model-optimizer/requirements_dev.txt
@@ -1,6 +1,7 @@
  coverage==4.4.2
  m2r==0.1.12
  pyenchant==1.6.11
+astroid==2.1.0
  pylint==2.1.1
  Sphinx==1.6.5
  safety==1.8.5
diff --git a/ngraph b/ngraph

index edc65ca..eaa6d35 160000 (submodule)
--- a/ngraph
+++ b/ngraph
@@ -1 +1 @@
-Subproject commit edc65ca0111f86a7e63a98f62cb17d153cc2535c
+Subproject commit eaa6d35b7ed415e02b2401b528f31960123e5b71
diff --git a/scripts/demo/README.txt b/scripts/demo/README.txt

new file mode 100644 (file)

index 0000000..a4e8fe7
--- /dev/null
+++ b/scripts/demo/README.txt
@@ -0,0 +1,83 @@
+=====================================================
+Demo Scripts for Model Optimizer and Inference Engine
+=====================================================
+
+The demo scripts illustrate Intel(R) Deep Learning Deployment Toolkit usage to convert and optimize pre-trained models and perform inference.
+
+Setting Up Demos
+================
+If you are behind a proxy, set the following environment variables in the console session:
+
+On Linux* and Mac OS:
+export http_proxy=http://<proxyHost>:<proxyPort>
+export https_proxy=https://<proxyHost>:<proxyPort>
+
+On Windows* OS:
+set http_proxy=http://<proxyHost>:<proxyPort>
+set https_proxy=https://<proxyHost>:<proxyPort>
+
+Running Demos
+=============
+
+The "demo" folder contains three scripts:
+
+1. Classification demo using public SqueezeNet topology (demo_squeezenet_download_convert_run.sh|bat)
+
+2. Security barrier camera demo that showcases three models coming with the product (demo_squeezenet_download_convert_run.sh|bat)
+
+3. Benchmark demo using public SqueezeNet topology (demo_benchmark_app.sh|bat) 
+
+To run the demos, run demo_squeezenet_download_convert_run.sh or demo_security_barrier_camera.sh or demo_benchmark_app.sh (*.bat on Windows) scripts from the console without parameters, for example:
+
+./demo_squeezenet_download_convert_run.sh
+
+The script allows to specify the target device to infer on using -d <CPU|GPU|MYRIAD|FPGA> option.
+
+Classification Demo Using SqueezeNet
+====================================
+
+The demo illustrates the general workflow of using the Intel(R) Deep Learning Deployment Toolkit and performs the following:
+
+  - Downloads a public SqueezeNet model using the Model Downloader (open_model_zoo\tools\downloader\downloader.py)
+  - Installs all prerequisites required for running the Model Optimizer using the scripts from the "model_optimizer\install_prerequisites" folder
+  - Converts SqueezeNet to an IR using the Model Optimizer (model_optimizer\mo.py) via the Model Converter (open_model_zoo\tools\downloader\converter.py)
+  - Builds the Inference Engine classification_sample (inference_engine\samples\classification_sample)
+  - Runs the sample with the car.png picture located in the demo folder
+
+The sample application prints top-10 inference results for the picture.
+ 
+For more information about the Inference Engine classification sample, refer to the documentation available in the sample folder.
+
+
+Security Barrier Camera Demo
+============================
+
+The demo illustrates using the Inference Engine with pre-trained models to perform vehicle detection, vehicle attributes and license-plate recognition tasks. 
+As the sample produces visual output, it should be run in GUI mode.  
+
+The demo script does the following:
+
+- Builds the Inference Engine security barrier camera sample (inference_engine\samples\security_barrier_camera_sample)
+- Runs the sample with the car_1.bmp located in the demo folder
+
+The sample application displays the resulting frame with detections rendered as bounding boxes and text.
+
+For more information about the Inference Engine security barrier camera sample, refer to the documentation available in the sample folder.
+
+
+Benchmark Demo Using SqueezeNet
+===============================
+
+The demo illustrates how to use the Benchmark Application to estimate deep learning inference performance on supported devices.
+
+The demo script does the following:
+
+  - Downloads a public SqueezeNet model using the Model Downloader (open_model_zoo\tools\downloader\downloader.py)
+  - Installs all prerequisites required for running the Model Optimizer using the scripts from the "model_optimizer\install_prerequisites" folder
+  - Converts SqueezeNet to an IR using the Model Optimizer (model_optimizer\mo.py) via the Model Converter (open_model_zoo\tools\downloader\converter.py)
+  - Builds the Inference Engine benchmark tool (inference_engine\samples\demo_benchmark_app)
+  - Runs the tool with the car.png picture located in the demo folder
+
+The benchmark app prints performance counters, resulting latency, and throughput values.
+ 
+For more information about the Inference Engine benchmark app, refer to the documentation available in the sample folder.
+\ No newline at end of file
diff --git a/scripts/demo/car.png b/scripts/demo/car.png

new file mode 100644 (file)

index 0000000..f22d8d6

Binary files /dev/null and b/scripts/demo/car.png differ
diff --git a/scripts/demo/car_1.bmp b/scripts/demo/car_1.bmp

new file mode 100644 (file)

index 0000000..111cee4

Binary files /dev/null and b/scripts/demo/car_1.bmp differ
diff --git a/scripts/demo/demo_benchmark_app.bat b/scripts/demo/demo_benchmark_app.bat

new file mode 100644 (file)

index 0000000..4b29fb3
--- /dev/null
+++ b/scripts/demo/demo_benchmark_app.bat
@@ -0,0 +1,253 @@
+:: Copyright (C) 2018-2019 Intel Corporation
+:: SPDX-License-Identifier: Apache-2.0
+
+@echo off
+setlocal enabledelayedexpansion
+
+set TARGET=CPU
+set BUILD_FOLDER=%USERPROFILE%\Documents\Intel\OpenVINO
+
+:: command line arguments parsing
+:input_arguments_loop
+if not "%1"=="" (
+    if "%1"=="-d" (
+        set TARGET=%2
+        echo target = !TARGET!
+        shift
+    )
+    if "%1"=="-sample-options" (
+        set SAMPLE_OPTIONS=%2 %3 %4 %5 %6
+        echo sample_options = !SAMPLE_OPTIONS!
+        shift
+    )
+    if "%1"=="-help" (
+        echo %~n0%~x0 is benchmark demo using public SqueezeNet topology
+        echo.
+        echo Options:
+        echo -d name     Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified
+        exit /b
+    )
+    shift
+    goto :input_arguments_loop
+)
+
+IF "%SAMPLE_OPTIONS%"=="" (
+      set SAMPLE_OPTIONS=-niter 1000 
+)
+
+set ROOT_DIR=%~dp0
+
+set TARGET_PRECISION=FP16
+
+
+echo target_precision = !TARGET_PRECISION!
+
+set models_path=%BUILD_FOLDER%\openvino_models\models
+set models_cache=%BUILD_FOLDER%\openvino_models\cache
+set irs_path=%BUILD_FOLDER%\openvino_models\ir
+
+set model_name=squeezenet1.1
+
+set target_image_path=%ROOT_DIR%car.png
+
+if exist "%ROOT_DIR%..\..\bin\setupvars.bat" (
+    call "%ROOT_DIR%..\..\bin\setupvars.bat"
+) else (
+    echo setupvars.bat is not found, INTEL_OPENVINO_DIR can't be set
+    goto error
+)
+
+echo INTEL_OPENVINO_DIR is set to %INTEL_OPENVINO_DIR%
+
+:: Check if Python is installed
+python --version 2>NUL
+if errorlevel 1 (
+   echo Error^: Python is not installed. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: Check if Python version is equal or higher 3.4
+for /F "tokens=* USEBACKQ" %%F IN (`python --version 2^>^&1`) DO (
+   set version=%%F
+)
+echo %var%
+
+for /F "tokens=1,2,3 delims=. " %%a in ("%version%") do (
+   set Major=%%b
+   set Minor=%%c
+)
+
+if "%Major%" geq "3" (
+   if "%Minor%" geq "5" (
+       set python_ver=okay
+   )
+)
+if not "%python_ver%"=="okay" (
+   echo Unsupported Python version. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: install yaml python modules required for downloader.py
+pip3 install --user -r "%ROOT_DIR%..\open_model_zoo\tools\downloader\requirements.in"
+if ERRORLEVEL 1 GOTO errorHandling
+
+set downloader_dir=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\downloader
+
+for /F "tokens=* usebackq" %%d in (
+    `python "%downloader_dir%\info_dumper.py" --name "%model_name%" ^|
+        python -c "import sys, json; print(json.load(sys.stdin)[0]['subdirectory'])"`
+) do (
+    set model_dir=%%d
+)
+
+set ir_dir=%irs_path%\%model_dir%\%target_precision%
+
+echo Download public %model_name% model
+echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo %model_name% model downloading completed
+
+timeout 7
+
+if exist %ir_dir% (
+    echo.
+    echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
+    echo If you want to convert a model again, remove the entire %ir_dir% folder.
+    timeout 7
+    GOTO buildSample
+)
+
+echo.
+echo ###############^|^| Install Model Optimizer prerequisites ^|^|###############
+echo.
+timeout 3
+cd "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\install_prerequisites"
+call install_prerequisites_caffe.bat
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+echo.
+echo ###############^|^| Run Model Optimizer ^|^|###############
+echo.
+timeout 3
+
+::set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
+echo python "%downloader_dir%\converter.py" --mo "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\mo.py" --name "%model_name%" -d "%models_path%" -o "%irs_path%" --precisions "%TARGET_PRECISION%"
+python "%downloader_dir%\converter.py" --mo "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\mo.py" --name "%model_name%" -d "%models_path%" -o "%irs_path%" --precisions "%TARGET_PRECISION%"
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+:buildSample
+echo.
+echo ###############^|^| Generate VS solution for Inference Engine samples using cmake ^|^|###############
+echo.
+timeout 3
+
+if "%PROCESSOR_ARCHITECTURE%" == "AMD64" (
+   set "PLATFORM=x64"
+) else (
+   set "PLATFORM=Win32"
+)
+
+set VSWHERE="false"
+if exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+   set VSWHERE="true"
+   cd "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer"
+) else if exist "%ProgramFiles%\Microsoft Visual Studio\Installer\vswhere.exe" (
+      set VSWHERE="true"
+      cd "%ProgramFiles%\Microsoft Visual Studio\Installer"
+) else (
+   echo "vswhere tool is not found"
+)
+
+set MSBUILD_BIN=
+set VS_PATH=
+
+if !VSWHERE! == "true" (
+   for /f "usebackq tokens=*" %%i in (`vswhere -latest -products * -requires Microsoft.Component.MSBuild -property installationPath`) do (
+      set VS_PATH=%%i
+   )
+   if exist "!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe"
+   )
+)
+
+if "!MSBUILD_BIN!" == "" (
+   if exist "C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=14 2015"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+) else (
+   if not "!MSBUILD_BIN:2019=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=16 2019"
+   if not "!MSBUILD_BIN:2017=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=15 2017"
+   if not "!MSBUILD_BIN:2015=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=14 2015"
+)
+
+if "!MSBUILD_BIN!" == "" (
+   echo Build tools for Visual Studio 2015 / 2017 / 2019 cannot be found. If you use Visual Studio 2017, please download and install build tools from https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017
+   GOTO errorHandling
+)
+
+set "SOLUTION_DIR64=%BUILD_FOLDER%\inference_engine_samples_build"
+
+echo Creating Visual Studio !MSBUILD_VERSION! %PLATFORM% files in %SOLUTION_DIR64%... && ^
+if exist "%SOLUTION_DIR64%\CMakeCache.txt" del "%SOLUTION_DIR64%\CMakeCache.txt"
+cd "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\samples\cpp" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio !MSBUILD_VERSION!" -A %PLATFORM% "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\samples\cpp"
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+echo.
+echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
+echo.
+timeout 3
+echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
+"!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:benchmark_app /clp:ErrorsOnly /m
+
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+:runSample
+echo.
+echo ###############^|^| Run Inference Engine benchmark app ^|^|###############
+echo.
+timeout 3
+copy /Y "%ROOT_DIR%%model_name%.labels" "%ir_dir%"
+cd "%SOLUTION_DIR64%\intel64\Release"
+
+echo benchmark_app.exe -i "%target_image_path%" -m "%ir_dir%\%model_name%.xml" -pc  -d  !TARGET! !SAMPLE_OPTIONS!
+benchmark_app.exe -i "%target_image_path%" -m "%ir_dir%\%model_name%.xml" -pc  -d  !TARGET! !SAMPLE_OPTIONS!
+
+if ERRORLEVEL 1 GOTO errorHandling
+
+echo.
+echo ###############^|^| Inference Engine benchmark app completed successfully ^|^|###############
+
+timeout 10
+cd "%ROOT_DIR%"
+
+goto :eof
+
+:errorHandling
+echo Error
+cd "%ROOT_DIR%"
diff --git a/scripts/demo/demo_benchmark_app.sh b/scripts/demo/demo_benchmark_app.sh

new file mode 100644 (file)

index 0000000..6ed2702
--- /dev/null
+++ b/scripts/demo/demo_benchmark_app.sh
@@ -0,0 +1,225 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+. "$ROOT_DIR/utils.sh"
+
+usage() {
+    echo "Benchmark demo using public SqueezeNet topology"
+    echo "-d name     specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified"
+    echo "-help            print help message"
+    exit 1
+}
+
+trap 'error ${LINENO}' ERR
+
+target="CPU"
+
+# parse command line options
+while [[ $# -gt 0 ]]
+do
+key="$1"
+
+case $key in
+    -h | -help | --help)
+    usage
+    ;;
+    -d)
+    target="$2"
+    echo target = "${target}"
+    shift
+    ;;
+    -sample-options)
+    sampleoptions="$2 $3 $4 $5 $6"
+    echo sample-options = "${sampleoptions}"
+    shift
+    ;;
+    *)
+    # unknown option
+    ;;
+esac
+shift
+done
+
+if ([ -z "$sampleoptions" ]); then
+    sampleoptions="-niter 1000"
+fi
+
+target_precision="FP16"
+
+printf "target_precision = ${target_precision}\n"
+
+models_path="$HOME/openvino_models/models"
+models_cache="$HOME/openvino_models/cache"
+irs_path="$HOME/openvino_models/ir"
+
+model_name="squeezenet1.1"
+
+target_image_path="$ROOT_DIR/car.png"
+
+run_again="Then run the script again\n\n"
+dashes="\n\n###################################################\n\n"
+
+
+if [ -e "$ROOT_DIR/../../bin/setupvars.sh" ]; then
+    setupvars_path="$ROOT_DIR/../../bin/setupvars.sh"
+else
+    printf "Error: setupvars.sh is not found\n"
+fi
+
+if ! . $setupvars_path ; then
+    printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}"
+    exit 1
+fi
+
+# Step 1. Download the Caffe model and the prototxt of the model
+printf "${dashes}"
+printf "\n\nDownloading the Caffe model and the prototxt"
+
+cur_path=$PWD
+
+printf "\nInstalling dependencies\n"
+
+if [[ -f /etc/centos-release ]]; then
+    DISTRO="centos"
+elif [[ -f /etc/lsb-release ]]; then
+    DISTRO="ubuntu"
+fi
+
+if [[ $DISTRO == "centos" ]]; then
+    sudo -E yum install -y centos-release-scl epel-release
+    sudo -E yum install -y gcc gcc-c++ make glibc-static glibc-devel libstdc++-static libstdc++-devel libstdc++ libgcc \
+                           glibc-static.i686 glibc-devel.i686 libstdc++-static.i686 libstdc++.i686 libgcc.i686 cmake
+
+    sudo -E rpm -Uvh http://li.nux.ro/download/nux/dextop/el7/x86_64/nux-dextop-release-0-1.el7.nux.noarch.rpm || true
+    sudo -E yum install -y epel-release
+    sudo -E yum install -y cmake ffmpeg gstreamer1 gstreamer1-plugins-base libusbx-devel
+
+    # check installed Python version
+    if command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    fi
+    if command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+    if [ -z "$python_binary" ]; then
+        sudo -E yum install -y rh-python36 || true
+        . scl_source enable rh-python36
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+elif [[ $DISTRO == "ubuntu" ]]; then
+    sudo -E apt update
+    print_and_run sudo -E apt -y install build-essential python3-pip virtualenv cmake libcairo2-dev libpango1.0-dev libglib2.0-dev libgtk2.0-dev libswscale-dev libavcodec-dev libavformat-dev libgstreamer1.0-0 gstreamer1.0-plugins-base
+    python_binary=python3
+    pip_binary=pip3
+
+    system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2`
+    if [ $system_ver = "18.04" ]; then
+        sudo -E apt-get install -y libpng-dev
+    else
+        sudo -E apt-get install -y libpng12-dev
+    fi
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+    # check installed Python version
+    if command -v python3.7 >/dev/null 2>&1; then
+        python_binary=python3.7
+        pip_binary=pip3.7
+    elif command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    elif command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    else
+        python_binary=python3
+        pip_binary=pip3
+    fi
+fi
+
+if ! command -v $python_binary &>/dev/null; then
+    printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}"
+    exit 1
+fi
+
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+else
+    sudo -E $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+fi
+
+downloader_dir="${INTEL_OPENVINO_DIR}/deployment_tools/open_model_zoo/tools/downloader"
+
+model_dir=$("$python_binary" "$downloader_dir/info_dumper.py" --name "$model_name" |
+    "$python_binary" -c 'import sys, json; print(json.load(sys.stdin)[0]["subdirectory"])')
+
+downloader_path="$downloader_dir/downloader.py"
+
+print_and_run "$python_binary" "$downloader_path" --name "$model_name" --output_dir "${models_path}" --cache_dir "${models_cache}"
+
+ir_dir="${irs_path}/${model_dir}/${target_precision}"
+
+if [ ! -e "$ir_dir" ]; then
+    # Step 2. Configure Model Optimizer
+    printf "${dashes}"
+    printf "Install Model Optimizer dependencies\n\n"
+    cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
+    . ./install_prerequisites.sh caffe
+    cd $cur_path
+
+    # Step 3. Convert a model with Model Optimizer
+    printf "${dashes}"
+    printf "Convert a model with Model Optimizer\n\n"
+
+    mo_path="${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/mo.py"
+
+    export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
+    print_and_run "$python_binary" "$downloader_dir/converter.py" --mo "$mo_path" --name "$model_name" -d "$models_path" -o "$irs_path" --precisions "$target_precision"
+else
+    printf "\n\nTarget folder ${ir_dir} already exists. Skipping IR generation  with Model Optimizer."
+    printf "If you want to convert a model again, remove the entire ${ir_dir} folder. ${run_again}"
+fi
+
+# Step 4. Build samples
+printf "${dashes}"
+printf "Build Inference Engine samples\n\n"
+
+OS_PATH=$(uname -m)
+NUM_THREADS="-j2"
+
+if [ $OS_PATH == "x86_64" ]; then
+  OS_PATH="intel64"
+  NUM_THREADS="-j8"
+fi
+
+samples_path="${INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/samples/cpp"
+build_dir="$HOME/inference_engine_samples_build"
+binaries_dir="${build_dir}/${OS_PATH}/Release"
+
+if [ -e $build_dir/CMakeCache.txt ]; then
+       rm -rf $build_dir/CMakeCache.txt
+fi
+mkdir -p $build_dir
+cd $build_dir
+cmake -DCMAKE_BUILD_TYPE=Release $samples_path
+
+make $NUM_THREADS benchmark_app
+
+# Step 5. Run samples
+printf "${dashes}"
+printf "Run Inference Engine benchmark app\n\n"
+
+cd $binaries_dir
+
+cp -f $ROOT_DIR/${model_name}.labels ${ir_dir}/
+
+print_and_run ./benchmark_app -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" -pc ${sampleoptions}
+
+printf "${dashes}"
+
+printf "Inference Engine benchmark app completed successfully.\n\n"
diff --git a/scripts/demo/demo_security_barrier_camera.bat b/scripts/demo/demo_security_barrier_camera.bat

new file mode 100644 (file)

index 0000000..648dae5
--- /dev/null
+++ b/scripts/demo/demo_security_barrier_camera.bat
@@ -0,0 +1,213 @@
+:: Copyright (C) 2018-2019 Intel Corporation
+:: SPDX-License-Identifier: Apache-2.0
+
+@echo off
+setlocal enabledelayedexpansion
+
+set TARGET=CPU
+set SAMPLE_OPTIONS=
+set BUILD_FOLDER=%USERPROFILE%\Documents\Intel\OpenVINO
+
+:: command line arguments parsing
+:input_arguments_loop
+if not "%1"=="" (
+    if "%1"=="-d" (
+        set TARGET=%2
+        echo target = !TARGET!
+        shift
+    )
+    if "%1"=="-sample-options" (
+        set SAMPLE_OPTIONS=%2 %3 %4 %5 %6
+        echo sample_options = !SAMPLE_OPTIONS!
+        shift
+    )
+    if "%1"=="-help" (
+        echo %~n0%~x0 is security barrier camera demo that showcases three models coming with the product
+        echo.
+        echo Options:
+        echo -d name     Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified
+        exit /b
+    )
+    shift
+    goto :input_arguments_loop
+)
+
+set ROOT_DIR=%~dp0
+
+set target_image_path=%ROOT_DIR%car_1.bmp
+
+
+set TARGET_PRECISION=FP16
+
+
+echo target_precision = !TARGET_PRECISION!
+
+if exist "%ROOT_DIR%..\..\bin\setupvars.bat" (
+    call "%ROOT_DIR%..\..\bin\setupvars.bat"
+) else (
+    echo setupvars.bat is not found, INTEL_OPENVINO_DIR can't be set
+    goto error
+)
+
+echo INTEL_OPENVINO_DIR is set to %INTEL_OPENVINO_DIR%
+
+:: Check if Python is installed
+python --version 2>NUL
+if errorlevel 1 (
+   echo Error^: Python is not installed. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: Check if Python version is equal or higher 3.4
+for /F "tokens=* USEBACKQ" %%F IN (`python --version 2^>^&1`) DO (
+   set version=%%F
+)
+echo %var%
+
+for /F "tokens=1,2,3 delims=. " %%a in ("%version%") do (
+   set Major=%%b
+   set Minor=%%c
+)
+
+if "%Major%" geq "3" (
+   if "%Minor%" geq "5" (
+  set python_ver=okay
+   )
+)
+if not "%python_ver%"=="okay" (
+   echo Unsupported Python version. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: install yaml python modules required for downloader.py
+pip3 install --user -r "%ROOT_DIR%..\open_model_zoo\tools\downloader\requirements.in"
+if ERRORLEVEL 1 GOTO errorHandling
+
+
+set models_path=%BUILD_FOLDER%\openvino_models\ir
+set models_cache=%BUILD_FOLDER%\openvino_models\cache
+
+if not exist %models_cache% (
+  mkdir %models_cache%
+)
+
+set downloader_dir=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\downloader
+
+for /F "tokens=1,2 usebackq" %%a in ("%ROOT_DIR%demo_security_barrier_camera.conf") do (
+   echo python "%downloader_dir%\downloader.py" --name "%%b" --output_dir "%models_path%" --cache_dir "%models_cache%"
+   python "%downloader_dir%\downloader.py" --name "%%b" --output_dir "%models_path%" --cache_dir "%models_cache%"
+
+   for /F "tokens=* usebackq" %%d in (
+      `python "%downloader_dir%\info_dumper.py" --name "%%b" ^|
+         python -c "import sys, json; print(json.load(sys.stdin)[0]['subdirectory'])"`
+   ) do (
+      set model_args=!model_args! %%a "%models_path%\%%d\%target_precision%\%%b.xml"
+   )
+)
+
+echo.
+echo ###############^|^| Generate VS solution for Inference Engine demos using cmake ^|^|###############
+echo.
+timeout 3
+
+if "%PROCESSOR_ARCHITECTURE%" == "AMD64" (
+   set "PLATFORM=x64"
+) else (
+   set "PLATFORM=Win32"
+)
+
+set VSWHERE="false"
+if exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+   set VSWHERE="true"
+   cd "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer"
+) else if exist "%ProgramFiles%\Microsoft Visual Studio\Installer\vswhere.exe" (
+      set VSWHERE="true"
+      cd "%ProgramFiles%\Microsoft Visual Studio\Installer"
+) else (
+   echo "vswhere tool is not found"
+)
+
+set MSBUILD_BIN=
+set VS_PATH=
+
+if !VSWHERE! == "true" (
+   for /f "usebackq tokens=*" %%i in (`vswhere -latest -products * -requires Microsoft.Component.MSBuild -property installationPath`) do (
+      set VS_PATH=%%i
+   )
+   if exist "!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe"
+   )
+)
+
+if "!MSBUILD_BIN!" == "" (
+   if exist "C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=14 2015"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+) else (
+   if not "!MSBUILD_BIN:2019=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=16 2019"
+   if not "!MSBUILD_BIN:2017=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=15 2017"
+   if not "!MSBUILD_BIN:2015=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=14 2015"
+)
+
+if "!MSBUILD_BIN!" == "" (
+   echo Build tools for Visual Studio 2015 / 2017 / 2019 cannot be found. If you use Visual Studio 2017 / 2019, please download and install build tools from https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017
+   GOTO errorHandling
+)
+
+set "SOLUTION_DIR64=%BUILD_FOLDER%\inference_engine_demos_build"
+
+echo Creating Visual Studio !MSBUILD_VERSION! %PLATFORM% files in %SOLUTION_DIR64%... && ^
+if exist "%SOLUTION_DIR64%\CMakeCache.txt" del "%SOLUTION_DIR64%\CMakeCache.txt"
+cd "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\demos" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio !MSBUILD_VERSION!" -A %PLATFORM% "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\demos"
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+echo.
+echo ###############^|^| Build Inference Engine demos using MS Visual Studio (MSBuild.exe) ^|^|###############
+echo.
+timeout 3
+echo "!MSBUILD_BIN!" Demos.sln /p:Configuration=Release /t:security_barrier_camera_demo /clp:ErrorsOnly /m
+"!MSBUILD_BIN!" Demos.sln /p:Configuration=Release /t:security_barrier_camera_demo /clp:ErrorsOnly /m
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+:runSample
+echo.
+echo ###############^|^| Run Inference Engine security barrier camera demo ^|^|###############
+echo.
+timeout 3
+cd "%SOLUTION_DIR64%\intel64\Release"
+echo "%SOLUTION_DIR64%\intel64\Release\security_barrier_camera_demo.exe" -i "%target_image_path%" %model_args% -d !TARGET! -d_va !TARGET! -d_lpr !TARGET! !SAMPLE_OPTIONS!
+security_barrier_camera_demo.exe -i "%target_image_path%" %model_args% ^
+                                 -d !TARGET! -d_va !TARGET! -d_lpr !TARGET! !SAMPLE_OPTIONS!
+if ERRORLEVEL 1 GOTO errorHandling
+
+echo.
+echo ###############^|^| Demo completed successfully ^|^|###############
+cd "%ROOT_DIR%"
+
+goto :eof
+
+:errorHandling
+echo Error
+cd "%ROOT_DIR%"
diff --git a/scripts/demo/demo_security_barrier_camera.conf b/scripts/demo/demo_security_barrier_camera.conf

new file mode 100644 (file)

index 0000000..c283abc
--- /dev/null
+++ b/scripts/demo/demo_security_barrier_camera.conf
@@ -0,0 +1,3 @@
+-m     vehicle-license-plate-detection-barrier-0106
+-m_lpr license-plate-recognition-barrier-0001
+-m_va  vehicle-attributes-recognition-barrier-0039
diff --git a/scripts/demo/demo_security_barrier_camera.sh b/scripts/demo/demo_security_barrier_camera.sh

new file mode 100644 (file)

index 0000000..6f3f60e
--- /dev/null
+++ b/scripts/demo/demo_security_barrier_camera.sh
@@ -0,0 +1,201 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+. "$ROOT_DIR/utils.sh"
+
+usage() {
+    echo "Security barrier camera demo that showcases three models coming with the product"
+    echo "-d name     specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified"
+    echo "-help            print help message"
+    exit 1
+}
+
+trap 'error ${LINENO}' ERR
+
+target="CPU"
+
+# parse command line options
+while [[ $# -gt 0 ]]
+do
+key="$1"
+
+case $key in
+    -h | -help | --help)
+    usage
+    ;;
+    -d)
+    target="$2"
+    echo target = "${target}"
+    shift
+    ;;
+    -sample-options)
+    sampleoptions="$2 $3 $4 $5 $6"
+    echo sample-options = "${sampleoptions}"
+    shift
+    ;;
+    *)
+    # unknown option
+    ;;
+esac
+shift
+done
+
+
+target_image_path="$ROOT_DIR/car_1.bmp"
+
+run_again="Then run the script again\n\n"
+dashes="\n\n###################################################\n\n"
+
+if [[ -f /etc/centos-release ]]; then
+    DISTRO="centos"
+elif [[ -f /etc/lsb-release ]]; then
+    DISTRO="ubuntu"
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+    DISTRO="macos"
+fi
+
+if [[ $DISTRO == "centos" ]]; then
+    sudo -E yum install -y centos-release-scl epel-release
+    sudo -E yum install -y gcc gcc-c++ make glibc-static glibc-devel libstdc++-static libstdc++-devel libstdc++ libgcc \
+                           glibc-static.i686 glibc-devel.i686 libstdc++-static.i686 libstdc++.i686 libgcc.i686 cmake
+
+    sudo -E rpm -Uvh http://li.nux.ro/download/nux/dextop/el7/x86_64/nux-dextop-release-0-1.el7.nux.noarch.rpm || true
+    sudo -E yum install -y epel-release
+    sudo -E yum install -y cmake ffmpeg gstreamer1 gstreamer1-plugins-base libusbx-devel
+
+    # check installed Python version
+    if command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    fi
+    if command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+    if [ -z "$python_binary" ]; then
+        sudo -E yum install -y rh-python36 || true
+        . scl_source enable rh-python36
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+elif [[ $DISTRO == "ubuntu" ]]; then
+    sudo -E apt update
+    print_and_run sudo -E apt -y install build-essential python3-pip virtualenv cmake libcairo2-dev libpango1.0-dev libglib2.0-dev libgtk2.0-dev libswscale-dev libavcodec-dev libavformat-dev libgstreamer1.0-0 gstreamer1.0-plugins-base
+    python_binary=python3
+    pip_binary=pip3
+
+    system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2`
+    if [ $system_ver = "18.04" ]; then
+        sudo -E apt-get install -y libpng-dev
+    else
+        sudo -E apt-get install -y libpng12-dev
+    fi
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+    # check installed Python version
+    if command -v python3.7 >/dev/null 2>&1; then
+        python_binary=python3.7
+        pip_binary=pip3.7
+    elif command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    elif command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    else
+        python_binary=python3
+        pip_binary=pip3
+    fi
+fi
+
+if ! command -v $python_binary &>/dev/null; then
+    printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}"
+    exit 1
+fi
+
+if [[ $DISTRO == "macos" ]]; then
+    $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+else
+    sudo -E $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+fi
+
+if [ -e "$ROOT_DIR/../../bin/setupvars.sh" ]; then
+    setupvars_path="$ROOT_DIR/../../bin/setupvars.sh"
+else
+    printf "Error: setupvars.sh is not found\n"
+fi
+if ! . $setupvars_path ; then
+    printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}"
+    exit 1
+fi
+
+# Step 1. Downloading Intel models
+printf "${dashes}"
+printf "Downloading Intel models\n\n"
+
+
+target_precision="FP16"
+
+printf "target_precision = ${target_precision}\n"
+
+downloader_dir="${INTEL_OPENVINO_DIR}/deployment_tools/open_model_zoo/tools/downloader"
+
+downloader_path="$downloader_dir/downloader.py"
+models_path="$HOME/openvino_models/ir"
+models_cache="$HOME/openvino_models/cache"
+
+declare -a model_args
+
+while read -r model_opt model_name; do
+    model_subdir=$("$python_binary" "$downloader_dir/info_dumper.py" --name "$model_name" |
+        "$python_binary" -c 'import sys, json; print(json.load(sys.stdin)[0]["subdirectory"])')
+
+    model_path="$models_path/$model_subdir/$target_precision/$model_name"
+
+    print_and_run "$python_binary" "$downloader_path" --name "$model_name" --output_dir "$models_path" --cache_dir "$models_cache"
+
+    model_args+=("$model_opt" "${model_path}.xml")
+done < "$ROOT_DIR/demo_security_barrier_camera.conf"
+
+# Step 2. Build samples
+printf "${dashes}"
+printf "Build Inference Engine demos\n\n"
+
+demos_path="${INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/demos"
+
+if ! command -v cmake &>/dev/null; then
+    printf "\n\nCMAKE is not installed. It is required to build Inference Engine demos. Please install it. ${run_again}"
+    exit 1
+fi
+
+OS_PATH=$(uname -m)
+NUM_THREADS="-j2"
+
+if [ $OS_PATH == "x86_64" ]; then
+  OS_PATH="intel64"
+  NUM_THREADS="-j8"
+fi
+
+build_dir="$HOME/inference_engine_demos_build"
+if [ -e $build_dir/CMakeCache.txt ]; then
+       rm -rf $build_dir/CMakeCache.txt
+fi
+mkdir -p $build_dir
+cd $build_dir
+cmake -DCMAKE_BUILD_TYPE=Release $demos_path
+make $NUM_THREADS security_barrier_camera_demo
+
+# Step 3. Run samples
+printf "${dashes}"
+printf "Run Inference Engine security_barrier_camera demo\n\n"
+
+binaries_dir="${build_dir}/${OS_PATH}/Release"
+cd $binaries_dir
+
+print_and_run ./security_barrier_camera_demo -d "$target" -d_va "$target" -d_lpr "$target" -i "$target_image_path" "${model_args[@]}" ${sampleoptions}
+
+printf "${dashes}"
+printf "Demo completed successfully.\n\n"
diff --git a/scripts/demo/demo_squeezenet_download_convert_run.bat b/scripts/demo/demo_squeezenet_download_convert_run.bat

new file mode 100644 (file)

index 0000000..f9dd0e2
--- /dev/null
+++ b/scripts/demo/demo_squeezenet_download_convert_run.bat
@@ -0,0 +1,248 @@
+:: Copyright (C) 2018-2019 Intel Corporation
+:: SPDX-License-Identifier: Apache-2.0
+
+@echo off
+setlocal enabledelayedexpansion
+
+set TARGET=CPU
+set BUILD_FOLDER=%USERPROFILE%\Documents\Intel\OpenVINO
+
+:: command line arguments parsing
+:input_arguments_loop
+if not "%1"=="" (
+    if "%1"=="-d" (
+        set TARGET=%2
+        echo target = !TARGET!
+        shift
+    )
+    if "%1"=="-sample-options" (
+        set SAMPLE_OPTIONS=%2 %3 %4 %5 %6
+        echo sample_options = !SAMPLE_OPTIONS!
+        shift
+    )
+    if "%1"=="-help" (
+        echo %~n0%~x0 is classification demo using public SqueezeNet topology
+        echo.
+        echo Options:
+        echo -d name     Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified
+        exit /b
+    )
+    shift
+    goto :input_arguments_loop
+)
+
+set ROOT_DIR=%~dp0
+
+set TARGET_PRECISION=FP16
+
+echo target_precision = !TARGET_PRECISION!
+
+set models_path=%BUILD_FOLDER%\openvino_models\models
+set models_cache=%BUILD_FOLDER%\openvino_models\cache
+set irs_path=%BUILD_FOLDER%\openvino_models\ir
+
+set model_name=squeezenet1.1
+
+set target_image_path=%ROOT_DIR%car.png
+
+if exist "%ROOT_DIR%..\..\bin\setupvars.bat" (
+    call "%ROOT_DIR%..\..\bin\setupvars.bat"
+) else (
+    echo setupvars.bat is not found, INTEL_OPENVINO_DIR can't be set
+    goto error
+)
+
+echo INTEL_OPENVINO_DIR is set to %INTEL_OPENVINO_DIR%
+
+:: Check if Python is installed
+python --version 2>NUL
+if errorlevel 1 (
+   echo Error^: Python is not installed. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: Check if Python version is equal or higher 3.4
+for /F "tokens=* USEBACKQ" %%F IN (`python --version 2^>^&1`) DO (
+   set version=%%F
+)
+echo %var%
+
+for /F "tokens=1,2,3 delims=. " %%a in ("%version%") do (
+   set Major=%%b
+   set Minor=%%c
+)
+
+if "%Major%" geq "3" (
+   if "%Minor%" geq "5" (
+       set python_ver=okay
+   )
+)
+if not "%python_ver%"=="okay" (
+   echo Unsupported Python version. Please install Python 3.5 ^(64-bit^) or higher from https://www.python.org/downloads/
+   goto error
+)
+
+:: install yaml python modules required for downloader.py
+pip3 install --user -r "%ROOT_DIR%..\open_model_zoo\tools\downloader\requirements.in"
+if ERRORLEVEL 1 GOTO errorHandling
+
+set downloader_dir=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\downloader
+
+for /F "tokens=* usebackq" %%d in (
+    `python "%downloader_dir%\info_dumper.py" --name "%model_name%" ^|
+        python -c "import sys, json; print(json.load(sys.stdin)[0]['subdirectory'])"`
+) do (
+    set model_dir=%%d
+)
+
+set ir_dir=%irs_path%\%model_dir%\%target_precision%
+
+echo Download public %model_name% model
+echo python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+python "%downloader_dir%\downloader.py" --name %model_name% --output_dir %models_path% --cache_dir %models_cache%
+echo %model_name% model downloading completed
+
+timeout 7
+
+if exist %ir_dir% (
+    echo.
+    echo Target folder %ir_dir% already exists. Skipping IR generation with Model Optimizer.
+    echo If you want to convert a model again, remove the entire %ir_dir% folder.
+    timeout 7
+    GOTO buildSample
+)
+
+echo.
+echo ###############^|^| Install Model Optimizer prerequisites ^|^|###############
+echo.
+timeout 3
+cd "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\install_prerequisites"
+call install_prerequisites_caffe.bat
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+echo.
+echo ###############^|^| Run Model Optimizer ^|^|###############
+echo.
+timeout 3
+
+::set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
+echo python "%downloader_dir%\converter.py" --mo "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\mo.py" --name "%model_name%" -d "%models_path%" -o "%irs_path%" --precisions "%TARGET_PRECISION%"
+python "%downloader_dir%\converter.py" --mo "%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer\mo.py" --name "%model_name%" -d "%models_path%" -o "%irs_path%" --precisions "%TARGET_PRECISION%"
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+:buildSample
+echo.
+echo ###############^|^| Generate VS solution for Inference Engine samples using cmake ^|^|###############
+echo.
+timeout 3
+
+if "%PROCESSOR_ARCHITECTURE%" == "AMD64" (
+   set "PLATFORM=x64"
+) else (
+   set "PLATFORM=Win32"
+)
+
+set VSWHERE="false"
+if exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
+   set VSWHERE="true"
+   cd "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer"
+) else if exist "%ProgramFiles%\Microsoft Visual Studio\Installer\vswhere.exe" (
+      set VSWHERE="true"
+      cd "%ProgramFiles%\Microsoft Visual Studio\Installer"
+) else (
+   echo "vswhere tool is not found"
+)
+
+set MSBUILD_BIN=
+set VS_PATH=
+
+if !VSWHERE! == "true" (
+   for /f "usebackq tokens=*" %%i in (`vswhere -latest -products * -requires Microsoft.Component.MSBuild -property installationPath`) do (
+      set VS_PATH=%%i
+   )
+   if exist "!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe"
+   )
+   if exist "!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe"
+   )
+)
+
+if "!MSBUILD_BIN!" == "" (
+   if exist "C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=14 2015"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+   if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" (
+      set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe"
+      set "MSBUILD_VERSION=15 2017"
+   )
+) else (
+   if not "!MSBUILD_BIN:2019=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=16 2019"
+   if not "!MSBUILD_BIN:2017=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=15 2017"
+   if not "!MSBUILD_BIN:2015=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=14 2015"
+)
+
+if "!MSBUILD_BIN!" == "" (
+   echo Build tools for Visual Studio 2015 / 2017 / 2019 cannot be found. If you use Visual Studio 2017, please download and install build tools from https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017
+   GOTO errorHandling
+)
+
+set "SOLUTION_DIR64=%BUILD_FOLDER%\inference_engine_samples_build"
+
+echo Creating Visual Studio !MSBUILD_VERSION! %PLATFORM% files in %SOLUTION_DIR64%... && ^
+if exist "%SOLUTION_DIR64%\CMakeCache.txt" del "%SOLUTION_DIR64%\CMakeCache.txt"
+cd "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\samples\cpp" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio !MSBUILD_VERSION!" -A %PLATFORM% "%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\samples\cpp"
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+echo.
+echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|###############
+echo.
+timeout 3
+echo !MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
+"!MSBUILD_BIN!" Samples.sln /p:Configuration=Release /t:classification_sample_async /clp:ErrorsOnly /m
+
+if ERRORLEVEL 1 GOTO errorHandling
+
+timeout 7
+
+:runSample
+echo.
+echo ###############^|^| Run Inference Engine classification sample ^|^|###############
+echo.
+timeout 3
+copy /Y "%ROOT_DIR%%model_name%.labels" "%ir_dir%"
+cd "%SOLUTION_DIR64%\intel64\Release"
+
+echo classification_sample_async.exe -i "%target_image_path%" -m "%ir_dir%\%model_name%.xml" -d !TARGET! !SAMPLE_OPTIONS!
+classification_sample_async.exe -i "%target_image_path%" -m "%ir_dir%\%model_name%.xml" -d !TARGET! !SAMPLE_OPTIONS!
+
+if ERRORLEVEL 1 GOTO errorHandling
+
+echo.
+echo ###############^|^| Classification demo completed successfully ^|^|###############
+
+timeout 10
+cd "%ROOT_DIR%"
+
+goto :eof
+
+:errorHandling
+echo Error
+cd "%ROOT_DIR%"
diff --git a/scripts/demo/demo_squeezenet_download_convert_run.sh b/scripts/demo/demo_squeezenet_download_convert_run.sh

new file mode 100644 (file)

index 0000000..b2acf1a
--- /dev/null
+++ b/scripts/demo/demo_squeezenet_download_convert_run.sh
@@ -0,0 +1,221 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+. "$ROOT_DIR/utils.sh"
+
+usage() {
+    echo "Classification demo using public SqueezeNet topology"
+    echo "-d name     specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD are acceptable. Sample will look for a suitable plugin for device specified"
+    echo "-help            print help message"
+    exit 1
+}
+
+trap 'error ${LINENO}' ERR
+
+target="CPU"
+
+# parse command line options
+while [[ $# -gt 0 ]]
+do
+key="$1"
+
+case $key in
+    -h | -help | --help)
+    usage
+    ;;
+    -d)
+    target="$2"
+    echo target = "${target}"
+    shift
+    ;;
+    -sample-options)
+    sampleoptions="$2 $3 $4 $5 $6"
+    echo sample-options = "${sampleoptions}"
+    shift
+    ;;
+    *)
+    # unknown option
+    ;;
+esac
+shift
+done
+
+target_precision="FP16"
+
+printf "target_precision = ${target_precision}\n"
+
+models_path="$HOME/openvino_models/models"
+models_cache="$HOME/openvino_models/cache"
+irs_path="$HOME/openvino_models/ir"
+
+model_name="squeezenet1.1"
+
+target_image_path="$ROOT_DIR/car.png"
+
+run_again="Then run the script again\n\n"
+dashes="\n\n###################################################\n\n"
+
+
+if [ -e "$ROOT_DIR/../../bin/setupvars.sh" ]; then
+    setupvars_path="$ROOT_DIR/../../bin/setupvars.sh"
+else
+    printf "Error: setupvars.sh is not found\n"
+fi
+
+if ! . $setupvars_path ; then
+    printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}"
+    exit 1
+fi
+
+# Step 1. Download the Caffe model and the prototxt of the model
+printf "${dashes}"
+printf "\n\nDownloading the Caffe model and the prototxt"
+
+cur_path=$PWD
+
+printf "\nInstalling dependencies\n"
+
+if [[ -f /etc/centos-release ]]; then
+    DISTRO="centos"
+elif [[ -f /etc/lsb-release ]]; then
+    DISTRO="ubuntu"
+fi
+
+if [[ $DISTRO == "centos" ]]; then
+    sudo -E yum install -y centos-release-scl epel-release
+    sudo -E yum install -y gcc gcc-c++ make glibc-static glibc-devel libstdc++-static libstdc++-devel libstdc++ libgcc \
+                           glibc-static.i686 glibc-devel.i686 libstdc++-static.i686 libstdc++.i686 libgcc.i686 cmake
+
+    sudo -E rpm -Uvh http://li.nux.ro/download/nux/dextop/el7/x86_64/nux-dextop-release-0-1.el7.nux.noarch.rpm || true
+    sudo -E yum install -y epel-release
+    sudo -E yum install -y cmake ffmpeg gstreamer1 gstreamer1-plugins-base libusbx-devel
+
+    # check installed Python version
+    if command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    fi
+    if command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+    if [ -z "$python_binary" ]; then
+        sudo -E yum install -y rh-python36 || true
+        . scl_source enable rh-python36
+        python_binary=python3.6
+        pip_binary=pip3.6
+    fi
+elif [[ $DISTRO == "ubuntu" ]]; then
+    sudo -E apt update
+    print_and_run sudo -E apt -y install build-essential python3-pip virtualenv cmake libcairo2-dev libpango1.0-dev libglib2.0-dev libgtk2.0-dev libswscale-dev libavcodec-dev libavformat-dev libgstreamer1.0-0 gstreamer1.0-plugins-base
+    python_binary=python3
+    pip_binary=pip3
+
+    system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2`
+    if [ $system_ver = "18.04" ]; then
+        sudo -E apt-get install -y libpng-dev
+    else
+        sudo -E apt-get install -y libpng12-dev
+    fi
+elif [[ "$OSTYPE" == "darwin"* ]]; then
+    # check installed Python version
+    if command -v python3.7 >/dev/null 2>&1; then
+        python_binary=python3.7
+        pip_binary=pip3.7
+    elif command -v python3.6 >/dev/null 2>&1; then
+        python_binary=python3.6
+        pip_binary=pip3.6
+    elif command -v python3.5 >/dev/null 2>&1; then
+        python_binary=python3.5
+        pip_binary=pip3.5
+    else
+        python_binary=python3
+        pip_binary=pip3
+    fi
+fi
+
+if ! command -v $python_binary &>/dev/null; then
+    printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}"
+    exit 1
+fi
+
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+else
+    sudo -E $pip_binary install -r $ROOT_DIR/../open_model_zoo/tools/downloader/requirements.in
+fi
+
+downloader_dir="${INTEL_OPENVINO_DIR}/deployment_tools/open_model_zoo/tools/downloader"
+
+model_dir=$("$python_binary" "$downloader_dir/info_dumper.py" --name "$model_name" |
+    "$python_binary" -c 'import sys, json; print(json.load(sys.stdin)[0]["subdirectory"])')
+
+downloader_path="$downloader_dir/downloader.py"
+
+print_and_run "$python_binary" "$downloader_path" --name "$model_name" --output_dir "${models_path}" --cache_dir "${models_cache}"
+
+ir_dir="${irs_path}/${model_dir}/${target_precision}"
+
+if [ ! -e "$ir_dir" ]; then
+    # Step 2. Configure Model Optimizer
+    printf "${dashes}"
+    printf "Install Model Optimizer dependencies\n\n"
+    cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites"
+    . ./install_prerequisites.sh caffe
+    cd $cur_path
+
+    # Step 3. Convert a model with Model Optimizer
+    printf "${dashes}"
+    printf "Convert a model with Model Optimizer\n\n"
+
+    mo_path="${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/mo.py"
+
+    export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp
+    print_and_run "$python_binary" "$downloader_dir/converter.py" --mo "$mo_path" --name "$model_name" -d "$models_path" -o "$irs_path" --precisions "$target_precision"
+else
+    printf "\n\nTarget folder ${ir_dir} already exists. Skipping IR generation  with Model Optimizer."
+    printf "If you want to convert a model again, remove the entire ${ir_dir} folder. ${run_again}"
+fi
+
+# Step 4. Build samples
+printf "${dashes}"
+printf "Build Inference Engine samples\n\n"
+
+OS_PATH=$(uname -m)
+NUM_THREADS="-j2"
+
+if [ $OS_PATH == "x86_64" ]; then
+  OS_PATH="intel64"
+  NUM_THREADS="-j8"
+fi
+
+samples_path="${INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/samples/cpp"
+build_dir="$HOME/inference_engine_samples_build"
+binaries_dir="${build_dir}/${OS_PATH}/Release"
+
+if [ -e $build_dir/CMakeCache.txt ]; then
+       rm -rf $build_dir/CMakeCache.txt
+fi
+mkdir -p $build_dir
+cd $build_dir
+cmake -DCMAKE_BUILD_TYPE=Release $samples_path
+
+make $NUM_THREADS classification_sample_async
+
+# Step 5. Run samples
+printf "${dashes}"
+printf "Run Inference Engine classification sample\n\n"
+
+cd $binaries_dir
+
+cp -f $ROOT_DIR/${model_name}.labels ${ir_dir}/
+
+print_and_run ./classification_sample_async -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" ${sampleoptions}
+
+printf "${dashes}"
+
+printf "Demo completed successfully.\n\n"
diff --git a/scripts/demo/squeezenet1.1.labels b/scripts/demo/squeezenet1.1.labels

new file mode 100644 (file)

index 0000000..a509c00
--- /dev/null
+++ b/scripts/demo/squeezenet1.1.labels
@@ -0,0 +1,1000 @@
+tench, Tinca tinca
+goldfish, Carassius auratus
+great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
+tiger shark, Galeocerdo cuvieri
+hammerhead, hammerhead shark
+electric ray, crampfish, numbfish, torpedo
+stingray
+cock
+hen
+ostrich, Struthio camelus
+brambling, Fringilla montifringilla
+goldfinch, Carduelis carduelis
+house finch, linnet, Carpodacus mexicanus
+junco, snowbird
+indigo bunting, indigo finch, indigo bird, Passerina cyanea
+robin, American robin, Turdus migratorius
+bulbul
+jay
+magpie
+chickadee
+water ouzel, dipper
+kite
+bald eagle, American eagle, Haliaeetus leucocephalus
+vulture
+great grey owl, great gray owl, Strix nebulosa
+European fire salamander, Salamandra salamandra
+common newt, Triturus vulgaris
+eft
+spotted salamander, Ambystoma maculatum
+axolotl, mud puppy, Ambystoma mexicanum
+bullfrog, Rana catesbeiana
+tree frog, tree-frog
+tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
+loggerhead, loggerhead turtle, Caretta caretta
+leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
+mud turtle
+terrapin
+box turtle, box tortoise
+banded gecko
+common iguana, iguana, Iguana iguana
+American chameleon, anole, Anolis carolinensis
+whiptail, whiptail lizard
+agama
+frilled lizard, Chlamydosaurus kingi
+alligator lizard
+Gila monster, Heloderma suspectum
+green lizard, Lacerta viridis
+African chameleon, Chamaeleo chamaeleon
+Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
+African crocodile, Nile crocodile, Crocodylus niloticus
+American alligator, Alligator mississipiensis
+triceratops
+thunder snake, worm snake, Carphophis amoenus
+ringneck snake, ring-necked snake, ring snake
+hognose snake, puff adder, sand viper
+green snake, grass snake
+king snake, kingsnake
+garter snake, grass snake
+water snake
+vine snake
+night snake, Hypsiglena torquata
+boa constrictor, Constrictor constrictor
+rock python, rock snake, Python sebae
+Indian cobra, Naja naja
+green mamba
+sea snake
+horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
+diamondback, diamondback rattlesnake, Crotalus adamanteus
+sidewinder, horned rattlesnake, Crotalus cerastes
+trilobite
+harvestman, daddy longlegs, Phalangium opilio
+scorpion
+black and gold garden spider, Argiope aurantia
+barn spider, Araneus cavaticus
+garden spider, Aranea diademata
+black widow, Latrodectus mactans
+tarantula
+wolf spider, hunting spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse, partridge, Bonasa umbellus
+prairie chicken, prairie grouse, prairie fowl
+peacock
+quail
+partridge
+African grey, African gray, Psittacus erithacus
+macaw
+sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser, Mergus serrator
+goose
+black swan, Cygnus atratus
+tusker
+echidna, spiny anteater, anteater
+platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
+wallaby, brush kangaroo
+koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
+wombat
+jellyfish
+sea anemone, anemone
+brain coral
+flatworm, platyhelminth
+nematode, nematode worm, roundworm
+conch
+snail
+slug
+sea slug, nudibranch
+chiton, coat-of-mail shell, sea cradle, polyplacophore
+chambered nautilus, pearly nautilus, nautilus
+Dungeness crab, Cancer magister
+rock crab, Cancer irroratus
+fiddler crab
+king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
+American lobster, Northern lobster, Maine lobster, Homarus americanus
+spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
+crayfish, crawfish, crawdad, crawdaddy
+hermit crab
+isopod
+white stork, Ciconia ciconia
+black stork, Ciconia nigra
+spoonbill
+flamingo
+little blue heron, Egretta caerulea
+American egret, great white heron, Egretta albus
+bittern
+crane
+limpkin, Aramus pictus
+European gallinule, Porphyrio porphyrio
+American coot, marsh hen, mud hen, water hen, Fulica americana
+bustard
+ruddy turnstone, Arenaria interpres
+red-backed sandpiper, dunlin, Erolia alpina
+redshank, Tringa totanus
+dowitcher
+oystercatcher, oyster catcher
+pelican
+king penguin, Aptenodytes patagonica
+albatross, mollymawk
+grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
+killer whale, killer, orca, grampus, sea wolf, Orcinus orca
+dugong, Dugong dugon
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog, Maltese terrier, Maltese
+Pekinese, Pekingese, Peke
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound, Afghan
+basset, basset hound
+beagle
+bloodhound, sleuthhound
+bluetick
+black-and-tan coonhound
+Walker hound, Walker foxhound
+English foxhound
+redbone
+borzoi, Russian wolfhound
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound, Ibizan Podenco
+Norwegian elkhound, elkhound
+otterhound, otter hound
+Saluki, gazelle hound
+Scottish deerhound, deerhound
+Weimaraner
+Staffordshire bullterrier, Staffordshire bull terrier
+American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier, Sealyham
+Airedale, Airedale terrier
+cairn, cairn terrier
+Australian terrier
+Dandie Dinmont, Dandie Dinmont terrier
+Boston bull, Boston terrier
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier, Scottish terrier, Scottie
+Tibetan terrier, chrysanthemum dog
+silky terrier, Sydney silky
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa, Lhasa apso
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla, Hungarian pointer
+English setter
+Irish setter, red setter
+Gordon setter
+Brittany spaniel
+clumber, clumber spaniel
+English springer, English springer spaniel
+Welsh springer spaniel
+cocker spaniel, English cocker spaniel, cocker
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog, bobtail
+Shetland sheepdog, Shetland sheep dog, Shetland
+collie
+Border collie
+Bouvier des Flandres, Bouviers des Flandres
+Rottweiler
+German shepherd, German shepherd dog, German police dog, alsatian
+Doberman, Doberman pinscher
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard, St Bernard
+Eskimo dog, husky
+malamute, malemute, Alaskan malamute
+Siberian husky
+dalmatian, coach dog, carriage dog
+affenpinscher, monkey pinscher, monkey dog
+basenji
+pug, pug-dog
+Leonberg
+Newfoundland, Newfoundland dog
+Great Pyrenees
+Samoyed, Samoyede
+Pomeranian
+chow, chow chow
+keeshond
+Brabancon griffon
+Pembroke, Pembroke Welsh corgi
+Cardigan, Cardigan Welsh corgi
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf, grey wolf, gray wolf, Canis lupus
+white wolf, Arctic wolf, Canis lupus tundrarum
+red wolf, maned wolf, Canis rufus, Canis niger
+coyote, prairie wolf, brush wolf, Canis latrans
+dingo, warrigal, warragal, Canis dingo
+dhole, Cuon alpinus
+African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
+hyena, hyaena
+red fox, Vulpes vulpes
+kit fox, Vulpes macrotis
+Arctic fox, white fox, Alopex lagopus
+grey fox, gray fox, Urocyon cinereoargenteus
+tabby, tabby cat
+tiger cat
+Persian cat
+Siamese cat, Siamese
+Egyptian cat
+cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
+lynx, catamount
+leopard, Panthera pardus
+snow leopard, ounce, Panthera uncia
+jaguar, panther, Panthera onca, Felis onca
+lion, king of beasts, Panthera leo
+tiger, Panthera tigris
+cheetah, chetah, Acinonyx jubatus
+brown bear, bruin, Ursus arctos
+American black bear, black bear, Ursus americanus, Euarctos americanus
+ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
+sloth bear, Melursus ursinus, Ursus ursinus
+mongoose
+meerkat, mierkat
+tiger beetle
+ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
+ground beetle, carabid beetle
+long-horned beetle, longicorn, longicorn beetle
+leaf beetle, chrysomelid
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant, emmet, pismire
+grasshopper, hopper
+cricket
+walking stick, walkingstick, stick insect
+cockroach, roach
+mantis, mantid
+cicada, cicala
+leafhopper
+lacewing, lacewing fly
+dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
+damselfly
+admiral
+ringlet, ringlet butterfly
+monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
+cabbage butterfly
+sulphur butterfly, sulfur butterfly
+lycaenid, lycaenid butterfly
+starfish, sea star
+sea urchin
+sea cucumber, holothurian
+wood rabbit, cottontail, cottontail rabbit
+hare
+Angora, Angora rabbit
+hamster
+porcupine, hedgehog
+fox squirrel, eastern fox squirrel, Sciurus niger
+marmot
+beaver
+guinea pig, Cavia cobaya
+sorrel
+zebra
+hog, pig, grunter, squealer, Sus scrofa
+wild boar, boar, Sus scrofa
+warthog
+hippopotamus, hippo, river horse, Hippopotamus amphibius
+ox
+water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
+bison
+ram, tup
+bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
+ibex, Capra ibex
+hartebeest
+impala, Aepyceros melampus
+gazelle
+Arabian camel, dromedary, Camelus dromedarius
+llama
+weasel
+mink
+polecat, fitch, foulmart, foumart, Mustela putorius
+black-footed ferret, ferret, Mustela nigripes
+otter
+skunk, polecat, wood pussy
+badger
+armadillo
+three-toed sloth, ai, Bradypus tridactylus
+orangutan, orang, orangutang, Pongo pygmaeus
+gorilla, Gorilla gorilla
+chimpanzee, chimp, Pan troglodytes
+gibbon, Hylobates lar
+siamang, Hylobates syndactylus, Symphalangus syndactylus
+guenon, guenon monkey
+patas, hussar monkey, Erythrocebus patas
+baboon
+macaque
+langur
+colobus, colobus monkey
+proboscis monkey, Nasalis larvatus
+marmoset
+capuchin, ringtail, Cebus capucinus
+howler monkey, howler
+titi, titi monkey
+spider monkey, Ateles geoffroyi
+squirrel monkey, Saimiri sciureus
+Madagascar cat, ring-tailed lemur, Lemur catta
+indri, indris, Indri indri, Indri brevicaudatus
+Indian elephant, Elephas maximus
+African elephant, Loxodonta africana
+lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
+giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
+barracouta, snoek
+eel
+coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
+rock beauty, Holocanthus tricolor
+anemone fish
+sturgeon
+gar, garfish, garpike, billfish, Lepisosteus osseus
+lionfish
+puffer, pufferfish, blowfish, globefish
+abacus
+abaya
+academic gown, academic robe, judge's robe
+accordion, piano accordion, squeeze box
+acoustic guitar
+aircraft carrier, carrier, flattop, attack aircraft carrier
+airliner
+airship, dirigible
+altar
+ambulance
+amphibian, amphibious vehicle
+analog clock
+apiary, bee house
+apron
+ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
+assault rifle, assault gun
+backpack, back pack, knapsack, packsack, rucksack, haversack
+bakery, bakeshop, bakehouse
+balance beam, beam
+balloon
+ballpoint, ballpoint pen, ballpen, Biro
+Band Aid
+banjo
+bannister, banister, balustrade, balusters, handrail
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel, cask
+barrow, garden cart, lawn cart, wheelbarrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap, swimming cap
+bath towel
+bathtub, bathing tub, bath, tub
+beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
+beacon, lighthouse, beacon light, pharos
+beaker
+bearskin, busby, shako
+beer bottle
+beer glass
+bell cote, bell cot
+bib
+bicycle-built-for-two, tandem bicycle, tandem
+bikini, two-piece
+binder, ring-binder
+binoculars, field glasses, opera glasses
+birdhouse
+boathouse
+bobsled, bobsleigh, bob
+bolo tie, bolo, bola tie, bola
+bonnet, poke bonnet
+bookcase
+bookshop, bookstore, bookstall
+bottlecap
+bow
+bow tie, bow-tie, bowtie
+brass, memorial tablet, plaque
+brassiere, bra, bandeau
+breakwater, groin, groyne, mole, bulwark, seawall, jetty
+breastplate, aegis, egis
+broom
+bucket, pail
+buckle
+bulletproof vest
+bullet train, bullet
+butcher shop, meat market
+cab, hack, taxi, taxicab
+caldron, cauldron
+candle, taper, wax light
+cannon
+canoe
+can opener, tin opener
+cardigan
+car mirror
+carousel, carrousel, merry-go-round, roundabout, whirligig
+carpenter's kit, tool kit
+carton
+car wheel
+cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello, violoncello
+cellular telephone, cellular phone, cellphone, cell, mobile phone
+chain
+chainlink fence
+chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
+chain saw, chainsaw
+chest
+chiffonier, commode
+chime, bell, gong
+china cabinet, china closet
+Christmas stocking
+church, church building
+cinema, movie theater, movie theatre, movie house, picture palace
+cleaver, meat cleaver, chopper
+cliff dwelling
+cloak
+clog, geta, patten, sabot
+cocktail shaker
+coffee mug
+coffeepot
+coil, spiral, volute, whorl, helix
+combination lock
+computer keyboard, keypad
+confectionery, confectionary, candy store
+container ship, containership, container vessel
+convertible
+corkscrew, bottle screw
+cornet, horn, trumpet, trump
+cowboy boot
+cowboy hat, ten-gallon hat
+cradle
+crane
+crash helmet
+crate
+crib, cot
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam, dike, dyke
+desk
+desktop computer
+dial telephone, dial phone
+diaper, nappy, napkin
+digital clock
+digital watch
+dining table, board
+dishrag, dishcloth
+dishwasher, dish washer, dishwashing machine
+disk brake, disc brake
+dock, dockage, docking facility
+dogsled, dog sled, dog sleigh
+dome
+doormat, welcome mat
+drilling platform, offshore rig
+drum, membranophone, tympan
+drumstick
+dumbbell
+Dutch oven
+electric fan, blower
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa, boa
+file, file cabinet, filing cabinet
+fireboat
+fire engine, fire truck
+fire screen, fireguard
+flagpole, flagstaff
+flute, transverse flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn, horn
+frying pan, frypan, skillet
+fur coat
+garbage truck, dustcart
+gasmask, respirator, gas helmet
+gas pump, gasoline pump, petrol pump, island dispenser
+goblet
+go-kart
+golf ball
+golfcart, golf cart
+gondola
+gong, tam-tam
+gown
+grand piano, grand
+greenhouse, nursery, glasshouse
+grille, radiator grille
+grocery store, grocery, food market, market
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower, blow dryer, blow drier, hair dryer, hair drier
+hand-held computer, hand-held microcomputer
+handkerchief, hankie, hanky, hankey
+hard disc, hard disk, fixed disk
+harmonica, mouth organ, harp, mouth harp
+harp
+harvester, reaper
+hatchet
+holster
+home theater, home theatre
+honeycomb
+hook, claw
+hoopskirt, crinoline
+horizontal bar, high bar
+horse cart, horse-cart
+hourglass
+iPod
+iron, smoothing iron
+jack-o'-lantern
+jean, blue jean, denim
+jeep, landrover
+jersey, T-shirt, tee shirt
+jigsaw puzzle
+jinrikisha, ricksha, rickshaw
+joystick
+kimono
+knee pad
+knot
+lab coat, laboratory coat
+ladle
+lampshade, lamp shade
+laptop, laptop computer
+lawn mower, mower
+lens cap, lens cover
+letter opener, paper knife, paperknife
+library
+lifeboat
+lighter, light, igniter, ignitor
+limousine, limo
+liner, ocean liner
+lipstick, lip rouge
+Loafer
+lotion
+loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
+loupe, jeweler's loupe
+lumbermill, sawmill
+magnetic compass
+mailbag, postbag
+mailbox, letter box
+maillot
+maillot, tank suit
+manhole cover
+maraca
+marimba, xylophone
+mask
+matchstick
+maypole
+maze, labyrinth
+measuring cup
+medicine chest, medicine cabinet
+megalith, megalithic structure
+microphone, mike
+microwave, microwave oven
+military uniform
+milk can
+minibus
+miniskirt, mini
+minivan
+missile
+mitten
+mixing bowl
+mobile home, manufactured home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter, scooter
+mountain bike, all-terrain bike, off-roader
+mountain tent
+mouse, computer mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook, notebook computer
+obelisk
+oboe, hautboy, hautbois
+ocarina, sweet potato
+odometer, hodometer, mileometer, milometer
+oil filter
+organ, pipe organ
+oscilloscope, scope, cathode-ray oscilloscope, CRO
+overskirt
+oxcart
+oxygen mask
+packet
+paddle, boat paddle
+paddlewheel, paddle wheel
+padlock
+paintbrush
+pajama, pyjama, pj's, jammies
+palace
+panpipe, pandean pipe, syrinx
+paper towel
+parachute, chute
+parallel bars, bars
+park bench
+parking meter
+passenger car, coach, carriage
+patio, terrace
+pay-phone, pay-station
+pedestal, plinth, footstall
+pencil box, pencil case
+pencil sharpener
+perfume, essence
+Petri dish
+photocopier
+pick, plectrum, plectron
+pickelhaube
+picket fence, paling
+pickup, pickup truck
+pier
+piggy bank, penny bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate, pirate ship
+pitcher, ewer
+plane, carpenter's plane, woodworking plane
+planetarium
+plastic bag
+plate rack
+plow, plough
+plunger, plumber's helper
+Polaroid camera, Polaroid Land camera
+pole
+police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
+poncho
+pool table, billiard table, snooker table
+pop bottle, soda bottle
+pot, flowerpot
+potter's wheel
+power drill
+prayer rug, prayer mat
+printer
+prison, prison house
+projectile, missile
+projector
+puck, hockey puck
+punching bag, punch bag, punching ball, punchball
+purse
+quill, quill pen
+quilt, comforter, comfort, puff
+racer, race car, racing car
+racket, racquet
+radiator
+radio, wireless
+radio telescope, radio reflector
+rain barrel
+recreational vehicle, RV, R.V.
+reel
+reflex camera
+refrigerator, icebox
+remote control, remote
+restaurant, eating house, eating place, eatery
+revolver, six-gun, six-shooter
+rifle
+rocking chair, rocker
+rotisserie
+rubber eraser, rubber, pencil eraser
+rugby ball
+rule, ruler
+running shoe
+safe
+safety pin
+saltshaker, salt shaker
+sandal
+sarong
+sax, saxophone
+scabbard
+scale, weighing machine
+school bus
+schooner
+scoreboard
+screen, CRT screen
+screw
+screwdriver
+seat belt, seatbelt
+sewing machine
+shield, buckler
+shoe shop, shoe-shop, shoe store
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule, slipstick
+sliding door
+slot, one-armed bandit
+snorkel
+snowmobile
+snowplow, snowplough
+soap dispenser
+soccer ball
+sock
+solar dish, solar collector, solar furnace
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web, spider's web
+spindle
+sports car, sport car
+spotlight, spot
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch, stop watch
+stove
+strainer
+streetcar, tram, tramcar, trolley, trolley car
+stretcher
+studio couch, day bed
+stupa, tope
+submarine, pigboat, sub, U-boat
+suit, suit of clothes
+sundial
+sunglass
+sunglasses, dark glasses, shades
+sunscreen, sunblock, sun blocker
+suspension bridge
+swab, swob, mop
+sweatshirt
+swimming trunks, bathing trunks
+swing
+switch, electric switch, electrical switch
+syringe
+table lamp
+tank, army tank, armored combat vehicle, armoured combat vehicle
+tape player
+teapot
+teddy, teddy bear
+television, television system
+tennis ball
+thatch, thatched roof
+theater curtain, theatre curtain
+thimble
+thresher, thrasher, threshing machine
+throne
+tile roof
+toaster
+tobacco shop, tobacconist shop, tobacconist
+toilet seat
+torch
+totem pole
+tow truck, tow car, wrecker
+toyshop
+tractor
+trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
+tray
+trench coat
+tricycle, trike, velocipede
+trimaran
+tripod
+triumphal arch
+trolleybus, trolley coach, trackless trolley
+trombone
+tub, vat
+turnstile
+typewriter keyboard
+umbrella
+unicycle, monocycle
+upright, upright piano
+vacuum, vacuum cleaner
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin, fiddle
+volleyball
+waffle iron
+wall clock
+wallet, billfold, notecase, pocketbook
+wardrobe, closet, press
+warplane, military plane
+washbasin, handbasin, washbowl, lavabo, wash-hand basin
+washer, automatic washer, washing machine
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool, woolen, woollen
+worm fence, snake fence, snake-rail fence, Virginia fence
+wreck
+yawl
+yurt
+web site, website, internet site, site
+comic book
+crossword puzzle, crossword
+street sign
+traffic light, traffic signal, stoplight
+book jacket, dust cover, dust jacket, dust wrapper
+menu
+plate
+guacamole
+consomme
+hot pot, hotpot
+trifle
+ice cream, icecream
+ice lolly, lolly, lollipop, popsicle
+French loaf
+bagel, beigel
+pretzel
+cheeseburger
+hotdog, hot dog, red hot
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini, courgette
+spaghetti squash
+acorn squash
+butternut squash
+cucumber, cuke
+artichoke, globe artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple, ananas
+banana
+jackfruit, jak, jack
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce, chocolate syrup
+dough
+meat loaf, meatloaf
+pizza, pizza pie
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff, drop, drop-off
+coral reef
+geyser
+lakeside, lakeshore
+promontory, headland, head, foreland
+sandbar, sand bar
+seashore, coast, seacoast, sea-coast
+valley, vale
+volcano
+ballplayer, baseball player
+groom, bridegroom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
+corn
+acorn
+hip, rose hip, rosehip
+buckeye, horse chestnut, conker
+coral fungus
+agaric
+gyromitra
+stinkhorn, carrion fungus
+earthstar
+hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
+bolete
+ear, spike, capitulum
+toilet tissue, toilet paper, bathroom tissue
diff --git a/scripts/demo/utils.sh b/scripts/demo/utils.sh

new file mode 100644 (file)

index 0000000..f4d0ea9
--- /dev/null
+++ b/scripts/demo/utils.sh
@@ -0,0 +1,16 @@
+error() {
+    local code="${3:-1}"
+    if [[ -n "$2" ]]; then
+        echo "Error on or near line $1: $2; exiting with status ${code}"
+    else
+        echo "Error on or near line $1; exiting with status ${code}"
+    fi
+    exit "${code}"
+}
+
+print_and_run() {
+    printf 'Run'
+    printf ' %q' "$@"
+    printf '\n\n'
+    "$@"
+}
diff --git a/scripts/install_dependencies/install_4_14_kernel.sh b/scripts/install_dependencies/install_4_14_kernel.sh

new file mode 100644 (file)

index 0000000..cbab52c
--- /dev/null
+++ b/scripts/install_dependencies/install_4_14_kernel.sh
@@ -0,0 +1,44 @@
+#!/bin/bash -x
+
+# Copyright (c) 2018-2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script installs Linux kernel 4.14 required for Intel NEO OpenCL driver on Ubuntu and CentOS
+
+if grep -i "rhel" /etc/os-release &>/dev/null; then
+       # Cent OS
+       echo "install kernel build dependencies"
+       sudo -E yum install -y git gcc gcc-c++ ncurses-devel openssl-devel bc xz elfutils-libelf-devel xorg-x11-drv-nouveau rpm-build
+
+       echo "download 4.14.20 kernel"
+       if [ ! -f ./linux-4.14.20.tar.xz ]; then
+               wget https://www.kernel.org/pub/linux/kernel/v4.x/linux-4.14.20.tar.xz
+       fi
+
+       tar -xJf linux-4.14.20.tar.xz
+       cd linux-4.14.20
+       echo "build 4.14.20 kernel"
+       make olddefconfig
+
+       make -j 8 binrpm-pkg
+       cd ~/rpmbuild/RPMS/x86_64
+       sudo -E yum -y localinstall *.rpm
+       sudo -E grub2-set-default 0
+
+elif grep -i "ubuntu" /etc/os-release &>/dev/null; then
+       # Ubuntu
+       sudo -E add-apt-repository ppa:teejee2008/ppa
+       sudo -E apt-get update && sudo apt-get install -y ukuu
+       sudo -E ukuu --install v4.14.20
+fi
+\ No newline at end of file
diff --git a/scripts/install_dependencies/install_GST_dependencies.sh b/scripts/install_dependencies/install_GST_dependencies.sh

new file mode 100644 (file)

index 0000000..994bd15
--- /dev/null
+++ b/scripts/install_dependencies/install_GST_dependencies.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+
+# Copyright (c) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+if [ $EUID -ne 0 ]; then
+    echo "ERROR: this script must be run as root to install 3rd party packages." >&2
+    echo "Please try again with \"sudo -E $0\", or as root." >&2
+    exit 1
+fi
+
+params=$@
+
+yes_or_no() {
+    if [ "$params" == "-y" ]; then
+        return 0
+    fi
+
+    while true; do
+        read -p "Add third-party repositories and install GStreamer Plugins (y/n): " yn
+        case $yn in
+            [Yy]*) return 0  ;;
+            [Nn]*) return  1 ;;
+        esac
+    done
+}
+
+echo
+echo "This script installs the following GStreamer 3rd-party dependencies:"
+echo "  1. build dependencies for GStreamer plugin bad"
+echo "  2. build dependencies for GStreamer plugin ugly"
+echo "  3. build dependencies for GStreamer plugin vaapi"
+echo
+
+if [ -f /etc/lsb-release ]; then
+    # Ubuntu
+    PKGS=(
+        libbluetooth-dev
+        libusb-1.0.0-dev
+        libass-dev
+        libbs2b-dev
+        libchromaprint-dev
+        liblcms2-dev
+        libssh2-1-dev
+        libdc1394-22-dev
+        libdirectfb-dev
+        libssh-dev
+        libdca-dev
+        libfaac-dev
+        libfaad-dev
+        libfdk-aac-dev
+        flite1-dev
+        libfluidsynth-dev
+        libgme-dev
+        libgsm1-dev
+        nettle-dev
+        libkate-dev
+        liblrdf0-dev
+        libde265-dev
+        libmjpegtools-dev
+        libmms-dev
+        libmodplug-dev
+        libmpcdec-dev
+        libneon27-dev
+        libofa0-dev
+        libopenal-dev
+        libopenexr-dev
+        libopenjp2-7-dev
+        libopenmpt-dev
+        libopenni2-dev
+        libdvdnav-dev
+        librtmp-dev
+        librsvg2-dev
+        libsbc-dev
+        libsndfile1-dev
+        libsoundtouch-dev
+        libspandsp-dev
+        libsrtp2-dev
+        libzvbi-dev
+        libvo-aacenc-dev
+        libvo-amrwbenc-dev
+        libwebrtc-audio-processing-dev
+        libwebp-dev
+        libwildmidi-dev
+        libzbar-dev
+        libnice-dev
+        libx265-dev
+        libxkbcommon-dev
+        libx264-dev
+        libmpeg2-4-dev
+        libdvdread-dev
+        libcdio-dev
+        libopencore-amrnb-dev
+        libopencore-amrwb-dev
+        liba52-0.7.4-dev
+        libsidplay1-dev
+        libva-dev
+        libxrandr-dev
+        libudev-dev
+        python-gi-dev \
+        python3-dev
+    )
+    apt update
+    apt install -y ${PKGS[@]}
+else
+    # CentOS
+    PKGS=(
+        bluez-libs-devel
+        libusb-devel
+        libass-devel
+        libbs2b-devel
+        libchromaprint-devel
+        lcms2-devel
+        libssh2-devel
+        libdc1394-devel
+        libXext-devel
+        libssh-devel
+        libdca-devel
+        faac-devel
+        faad2-devel
+        fdk-aac-devel
+        flite-devel
+        fluidsynth-devel
+        game-music-emu-devel
+        gsm-devel
+        nettle-devel
+        kate-devel
+        liblrdf-devel
+        libde265-devel
+        mjpegtools-devel
+        libmms-devel
+        libmodplug-devel
+        libmpcdec-devel
+        neon-devel
+        libofa-devel
+        openal-soft-devel
+        OpenEXR-devel
+        openjpeg2-devel
+        openni-devel
+        libdvdnav-devel
+        librtmp-devel
+        librsvg2-devel
+        sbc-devel
+        libsndfile-devel
+        soundtouch-devel
+        spandsp-devel
+        libsrtp-devel
+        zvbi-devel
+        vo-amrwbenc-devel
+        webrtc-audio-processing-devel
+        wildmidi-devel
+        zbar-devel
+        libnice-devel
+        x265-devel
+        libxkbcommon-devel
+        x264-devel
+        libmpeg2-devel
+        libcdio-devel
+        opencore-amr-devel
+        libva-devel
+        python36-gobject-devel
+        python3-devel
+    )
+    if yes_or_no; then
+        rpm --import http://li.nux.ro/download/nux/RPM-GPG-KEY-nux.ro
+        yum install -y epel-release
+        rpm -Uvh http://li.nux.ro/download/nux/dextop/el7/x86_64/nux-dextop-release-0-5.el7.nux.noarch.rpm
+        yum install -y ${PKGS[@]}
+    else
+        echo
+        echo "Plugins cannot be installed without adding repositories:"
+        echo "     PM-GPG-KEY-nux, epel-release, nux-dextop-release-0-5."
+        echo
+    fi
+    exit
+fi
diff --git a/scripts/install_dependencies/install_NCS_udev_rules.sh b/scripts/install_dependencies/install_NCS_udev_rules.sh

new file mode 100644 (file)

index 0000000..029e12d
--- /dev/null
+++ b/scripts/install_dependencies/install_NCS_udev_rules.sh
@@ -0,0 +1,35 @@
+# Copyright (c) 2018-2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Updating udev rules..."
+
+if [ -z "$INTEL_OPENVINO_DIR" ]; then
+    echo "Please set up your environment. Run 'source <OPENVINO_INSTALLDIR>/bin/setupvars.sh'."
+    exit -1
+fi
+
+if [ -f "$INTEL_OPENVINO_DIR/deployment_tools/inference_engine/external/97-myriad-usbboot.rules" ]; then
+    sudo usermod -a -G users "$(whoami)"
+
+    sudo cp "$INTEL_OPENVINO_DIR/deployment_tools/inference_engine/external/97-myriad-usbboot.rules" /etc/udev/rules.d/
+    sudo udevadm control --reload-rules
+    sudo udevadm trigger
+    sudo ldconfig
+    echo "Udev rules have been successfully installed."
+else
+    echo "File '97-myriad-usbboot.rules' is missing. Please make sure you installed 'Inference Engine Runtime for Intel® Movidius™ VPU'."
+    exit -1
+fi 
+
+
diff --git a/scripts/install_dependencies/install_NEO_OCL_driver.sh b/scripts/install_dependencies/install_NEO_OCL_driver.sh

new file mode 100644 (file)

index 0000000..c196d27
--- /dev/null
+++ b/scripts/install_dependencies/install_NEO_OCL_driver.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+
+# Copyright (c) 2018 - 2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Installs the Graphics Driver for OpenCL on Linux.
+#
+# Usage: sudo -E ./install_NEO_OCL_driver.sh
+#
+# Supported platforms:
+#     6th, 7th, 8th or 9th generation Intel® processor with Intel(R)
+#     Processor Graphics Technology not previously disabled by the BIOS
+#     or motherboard settings
+#
+EXIT_FAILURE=1
+UBUNTU_VERSION=
+DISTRO=
+
+
+params=$@
+yes_or_no() {
+    if [ "$params" == "-y" ]; then
+        return 1
+    fi
+
+    while true; do
+        read -p "Do you want to continue: " yn
+        case $yn in
+            [Yy]*) return 1 ;;
+            [Nn]*) return 0 ;;
+        esac
+    done
+}
+
+
+_install_prerequisites_centos()
+{
+    # yum doesn't accept timeout in seconds as parameter
+    echo
+    echo "Note: if yum becomes non-responsive, try aborting the script and run:"
+    echo "      sudo -E $0"
+    echo
+
+    CMDS=("yum -y install tar libpciaccess numactl-libs"
+          "yum -y groupinstall 'Development Tools'"
+          "yum -y install rpmdevtools openssl openssl-devel bc numactl ocl-icd ocl-icd-devel")
+
+    for cmd in "${CMDS[@]}"; do
+        echo $cmd
+        eval $cmd
+        if [[ $? -ne 0 ]]; then
+            echo ERROR: failed to run $cmd >&2
+            echo Problem \(or disk space\)? >&2
+            echo . Verify that you have enough disk space, and run the script again. >&2
+            exit $EXIT_FAILURE
+        fi
+    done
+
+}
+
+_install_prerequisites_ubuntu()
+{
+    CMDS=("apt-get -y update"
+          "apt-get -y install libnuma1 ocl-icd-libopencl1")
+
+    for cmd in "${CMDS[@]}"; do
+        echo $cmd
+        eval $cmd
+        if [[ $? -ne 0 ]]; then
+            echo ERROR: failed to run $cmd >&2
+            echo Problem \(or disk space\)? >&2
+            echo "                sudo -E $0" >&2
+            echo 2. Verify that you have enough disk space, and run the script again. >&2
+            exit $EXIT_FAILURE
+        fi
+    done
+}
+
+install_prerequisites()
+{
+    if [[ $DISTRO == "centos" ]]; then
+        echo Installing prerequisites...
+        _install_prerequisites_centos
+    elif [[ $DISTRO == "ubuntu" ]]; then
+        echo Installing prerequisites...
+        _install_prerequisites_ubuntu
+    else
+        echo Unknown OS
+    fi
+}
+
+_deploy_rpm()
+{
+    # On a CentOS 7.2 machine with Intel Parallel Composer XE 2017
+    # installed we got conflicts when trying to deploy these rpms.
+    # If that happens to you too, try again with:
+    # IGFX_RPM_FLAGS="--force" sudo -E ./install_NEO_OCL_driver.sh install
+    #
+    cmd="rpm $IGFX_RPM_FLAGS -ivh --nodeps --force $1"
+    echo $cmd
+    eval $cmd
+}
+
+_deploy_deb()
+{
+    cmd="dpkg -i $1"
+    echo $cmd
+    eval $cmd
+}
+
+_install_user_mode_centos()
+{
+    _deploy_rpm "intel*.rpm"
+    if [[ $? -ne 0 ]]; then
+        echo ERROR: failed to install rpms $cmd error  >&2
+        echo Make sure you have enough disk space or fix the problem manually and try again. >&2
+        exit $EXIT_FAILURE
+    fi
+}
+
+_install_user_mode_ubuntu()
+{
+    _deploy_deb "intel*.deb"
+    if [[ $? -ne 0 ]]; then
+        echo ERROR: failed to install rpms $cmd error  >&2
+        echo Make sure you have enough disk space or fix the problem manually and try again. >&2
+        exit $EXIT_FAILURE
+    fi
+}
+
+install_user_mode()
+{
+    echo Installing user mode driver...
+
+    if [[ $DISTRO == "centos" ]]; then
+        _install_user_mode_centos
+    else
+        _install_user_mode_ubuntu
+    fi
+
+}
+
+_uninstall_user_mode_centos()
+{
+    echo Looking for previously installed user-mode driver...
+    PACKAGES=("intel-opencl"
+           "intel-ocloc"
+           "intel-gmmlib"
+           "intel-igc-core"
+           "intel-igc-opencl")
+    for package in "${PACKAGES[@]}"; do      
+        echo "rpm -qa | grep $package"
+        found_package=$(rpm -qa | grep $package)
+        if [[ $? -eq 0 ]]; then
+            echo Found installed user-mode driver, performing uninstall...
+            cmd="rpm -e --nodeps ${found_package}"
+            echo $cmd
+            eval $cmd
+            if [[ $? -ne 0 ]]; then
+                echo ERROR: failed to uninstall existing user-mode driver. >&2
+                echo Please try again manually and run the script again. >&2
+                exit $EXIT_FAILURE
+            fi
+        fi
+    done
+}
+
+_uninstall_user_mode_ubuntu()
+{
+    echo Looking for previously installed user-mode driver...
+
+    PACKAGES=("intel-opencl"
+           "intel-ocloc"
+           "intel-gmmlib"
+           "intel-igc-core"
+           "intel-igc-opencl")
+
+    for package in "${PACKAGES[@]}"; do
+        found_package=$(dpkg-query -W -f='${binary:Package}\n' ${package})
+        if [[ $? -eq 0 ]]; then
+            echo Found $found_package installed, uninstalling...
+            dpkg --purge $found_package
+            if [[ $? -ne 0 ]]; then
+                echo "ERROR: unable to remove $found_package" >&2
+                echo "       please resolve it manually and try to launch the script again." >&2
+                exit $EXIT_FAILURE
+            fi
+        fi
+    done
+}
+
+uninstall_user_mode()
+{
+    if [[ $DISTRO == "centos" ]]; then
+        _uninstall_user_mode_centos
+    else
+        _uninstall_user_mode_ubuntu
+    fi
+}
+
+version_gt() {
+    # check if first version is greater than second version
+    test "$(printf '%s\n' "$@" | sort -V | head -n 1)" != "$1";
+}
+
+summary()
+{
+    kernel_version=$(uname -r)
+
+    echo
+    echo Installation completed successfully.
+    echo
+    echo Next steps:
+    echo "Add OpenCL users to the video group: 'sudo usermod -a -G video USERNAME'"
+    echo "   e.g. if the user running OpenCL host applications is foo, run: sudo usermod -a -G video foo"
+    echo "   Current user has been already added to the video group"
+    echo
+
+    # ask to install kernel 4.14 if current kernel version < 4.13 (GPU NEO driver supports only kernels 4.13.x and higher)
+    if version_gt "4.13" "$kernel_version" ; then
+        echo "Install 4.14 kernel using install_4_14_kernel.sh script and reboot into this kernel"
+        echo
+    fi
+
+    echo "If you use 8th Generation Intel® Core™ processor, you will need to add:"
+    echo "   i915.alpha_support=1"
+    echo "   to the 4.14 kernel command line, in order to enable OpenCL functionality for this platform."
+    echo
+ 
+}
+
+check_root_access()
+{
+    if [[ $EUID -ne 0 ]]; then
+        echo "ERROR: you must run this script as root." >&2
+        echo "Please try again with "sudo -E $0", or as root." >&2
+        exit $EXIT_FAILURE
+    fi
+}
+
+add_user_to_video_group()
+{
+    local real_user=$(logname 2>/dev/null || echo ${SUDO_USER:-${USER}})
+    echo
+    echo Adding $real_user to the video group...
+    usermod -a -G video $real_user
+    if [[ $? -ne 0 ]]; then
+        echo WARNING: unable to add $real_user to the video group >&2
+    fi
+}
+
+_check_distro_version()
+{
+    if [[ $DISTRO == centos ]]; then
+        if ! grep -q 'CentOS Linux release 7\.' /etc/centos-release; then
+            echo ERROR: this script is supported only on CentOS 7 >&2
+            exit $EXIT_FAILURE
+        fi
+    elif [[ $DISTRO == ubuntu ]]; then
+        grep -q -E "18.04" /etc/lsb-release && UBUNTU_VERSION="18.04"
+        if [[ -z $UBUNTU_VERSION ]]; then
+            echo "Warning: The driver was validated only on Ubuntu 18.04 LTS with stock kernel. \nMore info https://github.com/intel/compute-runtime/releases" >&2
+            if [ ! yes_or_no ]; then
+                echo "Installation of GFX driver interrupted"
+                exit $EXIT_FAILURE
+            fi
+        fi
+    fi
+}
+
+distro_init()
+{
+    if [[ -f /etc/centos-release ]]; then
+        DISTRO="centos"
+    elif [[ -f /etc/lsb-release ]]; then
+        DISTRO="ubuntu"
+    fi
+
+    _check_distro_version
+}
+
+install()
+{
+    uninstall_user_mode
+    install_prerequisites
+    install_user_mode
+    add_user_to_video_group
+}
+
+main()
+{
+    echo "Intel OpenCL graphics driver installer"
+    distro_init
+    check_root_access
+    install
+    summary
+}
+
+[[ "$0" == "$BASH_SOURCE" ]] && main "$@"
diff --git a/scripts/install_dependencies/install_guide.html b/scripts/install_dependencies/install_guide.html

new file mode 100644 (file)

index 0000000..5ddb7c0
--- /dev/null
+++ b/scripts/install_dependencies/install_guide.html
@@ -0,0 +1 @@
+<meta http-equiv="REFRESH" content="0;URL=http://docs.openvinotoolkit.org/2019_R1/_docs_install_guides_installing_openvino_linux.html#set-the-environment-variables">
+\ No newline at end of file
diff --git a/scripts/install_dependencies/install_openvino_dependencies.sh b/scripts/install_dependencies/install_openvino_dependencies.sh

new file mode 100644 (file)

index 0000000..38e855a
--- /dev/null
+++ b/scripts/install_dependencies/install_openvino_dependencies.sh
@@ -0,0 +1,351 @@
+#!/bin/bash
+
+# Copyright (c) 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+if [ $EUID -ne 0 ]; then
+    echo "ERROR: this script must be run as root to install 3rd party packages." >&2
+    echo "Please try again with \"sudo -E $0\", or as root." >&2
+    exit 1
+fi
+
+params=$@
+
+yes_or_no_ffmpeg() {
+    if [ "$params" == "-y" ]; then
+        return 0
+    fi
+
+    while true; do
+        read -p "Add third-party RPM Fusion repository and install FFmpeg package (y/n): " yn
+        case $yn in
+            [Yy]*) return 0  ;;
+            [Nn]*) return  1 ;;
+        esac
+    done
+}
+
+yes_or_no_gst_bad_ugly() {
+    if [ "$params" == "-y" ]; then
+        return 0
+    fi
+
+    while true; do
+        read -p "Add third-party RPM Epel, Nux, Fusion, Forensics repositories and install dependencies for GStreamer Bad & Ugly Plugins (y/n): " yn
+        case $yn in
+            [Yy]*) return 0  ;;
+            [Nn]*) return  1 ;;
+        esac
+    done
+}
+
+if [ -f /etc/lsb-release ]; then
+    # Ubuntu
+    echo
+    echo "This script installs the following OpenVINO 3rd-party dependencies:"
+    echo "  1. GTK+, FFmpeg and GStreamer libraries used by OpenCV"
+    echo "  2. libusb library required for Myriad plugin for Inference Engine"
+    echo "  3. build dependencies for OpenVINO samples"
+    echo "  4. build dependencies for GStreamer Plugins"
+    echo
+    PKGS=(
+        cpio
+        build-essential
+        cmake
+        libusb-1.0-0-dev
+        libdrm-dev
+        libgstreamer1.0-0
+        gstreamer1.0-plugins-base
+        gstreamer1.0-plugins-good
+        gstreamer1.0-plugins-bad
+        ffmpeg
+    )
+    system_ver=$(cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2)
+    if [ "$system_ver" = "16.04" ]; then
+        PKGS+=( libgtk2.0-0 )
+    else
+        PKGS+=( libgtk-3-0
+                libglib2.0-0
+                flex
+                bison
+                libgmp10
+                libgsl23
+                gobject-introspection
+                libcap2
+                libcap2-bin
+                gettext
+                libgirepository-1.0-1
+                libx11-6
+                iso-codes
+                libgl1-mesa-dri
+                libgles2
+                libgl-dev
+                gudev-1.0
+                libtheora0
+                libcdparanoia0
+                libpango-1.0-0
+                libgbm1
+                libasound2
+                libjpeg8
+                libvisual-0.4-0
+                libxv1
+                libopus0
+                libgraphene-1.0-0
+                libvorbis0a
+                libbz2-1.0
+                libv4l-0
+                libaa1
+                libflac8
+                libgdk-pixbuf2.0-0
+                libmp3lame0
+                libcaca0
+                libdv4
+                libmpg123-0
+                libraw1394-11
+                libavc1394-0
+                libiec61883-0
+                libpulse0
+                libsoup2.4-1
+                libspeex1
+                libtag-extras1
+                libtwolame0
+                libwavpack1
+                libbluetooth3
+                libusb-1.0.0-dev
+                libass9
+                libbs2b0
+                libchromaprint1
+                liblcms2-2
+                libssh2-1
+                libdc1394-22
+                libdirectfb-1.7-7
+                libssh-4
+                libdca0
+                libfaac0
+                libfdk-aac1
+                libflite1
+                libfluidsynth1
+                libgme0
+                libgsm1
+                libnettle6
+                libkate1
+                liblrdf0
+                libde265-0
+                libmjpegtools-dev
+                libmms0
+                libmodplug1
+                libmpcdec6
+                libneon27
+                libopenal1
+                libopenexr22
+                libopenjp2-7
+                libopenmpt0
+                libopenni2-0
+                libdvdnav4
+                librtmp1
+                librsvg2-2
+                libsbc1
+                libsndfile1
+                libsoundtouch1
+                libspandsp2
+                libsrtp2-1
+                libzvbi0
+                libvo-aacenc0
+                libvo-amrwbenc0
+                libwebrtc-audio-processing1
+                libwebp6
+                libwildmidi2
+                libzbar0
+                libnice10
+                libxkbcommon0
+                libmpeg2-4
+                libopencore-amrnb0
+                libopencore-amrwb0
+                liba52-0.7.4
+                libva2
+                libxrandr2
+                libudev1
+                python3.6
+                libpython3.6
+                python3-gi
+        )
+    fi
+    apt update
+    apt install -y ${PKGS[@]}
+else
+    # CentOS
+    echo
+    echo "This script installs the following OpenVINO 3rd-party dependencies:"
+    echo "  1. GTK+ and GStreamer libraries used by OpenCV"
+    echo "  2. libusb library required for Myriad plugin for Inference Engine"
+    echo "  3. Python 3.6 for Model Optimizer"
+    echo "  4. gcc 4.8.5 and other build dependencies for OpenVINO samples"
+    echo "  5. build dependencies for GStreamer Plugins"
+    echo
+    PKGS=(
+        libusbx-devel
+        gtk2
+        gstreamer1
+        gstreamer1-plugins-good
+        gstreamer1-plugins-bad-free
+        gcc
+        gcc-c++
+        make
+        glibc-static
+        glibc
+        libstdc++-static
+        libstdc++
+        libstdc++
+        libgcc
+        cmake
+        python36
+        python36-pip
+        glib2-devel
+        flex
+        bison
+        gmp
+        gsl
+        gobject-introspection
+        libcap
+        libcap
+        gettext
+        libXrandr
+        libX11
+        iso-codes
+        mesa-libEGL
+        mesa-libGLES
+        mesa-libGL
+        libgudev1
+        libtheora
+        cdparanoia
+        pango
+        mesa-libgbm
+        alsa-lib
+        libjpeg-turbo
+        libvisual
+        libXv
+        opus
+        libvorbis
+        patch
+        bzip2
+        libv4l
+        flac
+        gdk-pixbuf2
+        libdv
+        mpg123
+        libraw1394
+        libavc1394
+        libiec61883
+        pulseaudio-libs
+        libsoup
+        speex
+        wavpack
+        boost-regex-1.53.0
+    )
+    yum install -y ${PKGS[@]}
+
+    # Thirdparty repositories for installing GStreamer Bad & Ugly Plugins dependencies.
+    if yes_or_no_gst_bad_ugly; then
+        GST_BAD_UGLY_PKGS=(
+            bluez-libs
+            libusb
+            libass
+            libbs2b
+            libchromaprint
+            lcms2
+            libssh2
+            libdc1394
+            libXext
+            libssh
+            libdca
+            faac
+            fdk-aac
+            flite
+            fluidsynth
+            game-music-emu
+            gsm
+            nettle
+            kate
+            liblrdf
+            libde265
+            mjpegtools
+            libmms
+            libmodplug
+            libmpcdec
+            neon
+            openal-soft
+            OpenEXR
+            openjpeg2
+            openni
+            libdvdnav
+            librtmp
+            librsvg2
+            sbc
+            libsndfile
+            soundtouch
+            spandsp
+            libsrtp
+            zvbi
+            vo-amrwbenc
+            webrtc-audio-processing
+            wildmidi
+            zbar
+            libnice
+            libxkbcommon
+            libmpeg2
+            libcdio
+            opencore-amr
+            libva
+            python36-gobject
+            python3-devel
+        )
+        yum install -y epel-release
+        rpm -Uvh https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm
+        RPMFUSION_IS_INSTALLED=1
+        yum install -y https://forensics.cert.org/cert-forensics-tools-release-el7.rpm
+        yum install -y ${GST_BAD_UGLY_PKGS[@]}
+    else
+        echo "Dependencies for GStreamer Ugly & Bad plugins installation skipped."
+        echo
+    fi
+
+    echo
+    echo "Intel(R) Distribution of OpenVINO(TM) toolkit can use FFmpeg for processing video streams with OpenCV. Please select your preferred method for installing FFmpeg:"
+    echo
+    echo "Option 1: Allow installer script to add a third party repository, RPM Fusion (https://rpmfusion.org/), which contains FFmpeg. FFmpeg rpm package will be installed from this repository. "
+    echo "WARNING: This repository is NOT PROVIDED OR SUPPORTED by Intel or CentOS. Neither Intel nor CentOS has control over this repository. Terms governing your use of FFmpeg can be found here: https://www.ffmpeg.org/legal.html "
+    echo "Once added, this repository will be enabled on your operating system and can thus receive updates to all packages installed from it. "
+    echo
+    echo "Consider the following ways to prevent unintended 'updates' from this third party repository from over-writing some core part of CentOS:"
+    echo "a) Only enable these archives from time to time, and generally leave them disabled. See: man yum"
+    echo "b) Use the exclude= and includepkgs= options on a per sub-archive basis, in the matching .conf file found in /etc/yum.repos.d/ See: man yum.conf"
+    echo "c) The yum Priorities plug-in can prevent a 3rd party repository from replacing base packages, or prevent base/updates from replacing a 3rd party package."
+    echo
+    echo "Option 2: Skip FFmpeg installation."
+    echo
+
+    if yes_or_no_ffmpeg; then
+        if [[ -z $RPMFUSION_IS_INSTALLED ]]; then
+            yum install -y epel-release
+            rpm -Uvh https://download1.rpmfusion.org/free/el/rpmfusion-free-release-7.noarch.rpm
+        fi
+        yum install -y ffmpeg
+    else
+        echo "FFmpeg installation skipped. You may build FFmpeg from sources as described here: https://trac.ffmpeg.org/wiki/CompilationGuide/Centos"
+        echo
+    fi
+    exit
+fi
diff --git a/scripts/setupvars/setupvars.bat b/scripts/setupvars/setupvars.bat

new file mode 100644 (file)

index 0000000..1f08c14
--- /dev/null
+++ b/scripts/setupvars/setupvars.bat
@@ -0,0 +1,111 @@
+@echo off
+
+:: Copyright (c) 2018-2019 Intel Corporation
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::      http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+
+set ROOT=%~dp0
+call :GetFullPath "%ROOT%\.." ROOT
+set SCRIPT_NAME=%~nx0
+
+set "INTEL_OPENVINO_DIR=%ROOT%"
+set "INTEL_CVSDK_DIR=%INTEL_OPENVINO_DIR%"
+
+where /q libmmd.dll || echo Warning: libmmd.dll couldn't be found in %%PATH%%. Please check if the redistributable package for Intel(R) C++ Compiler is installed and the library path is added to the PATH environment variable. System reboot can be required to update the system environment.
+
+:: OpenCV
+if exist "%INTEL_OPENVINO_DIR%\opencv\setupvars.bat" (
+call "%INTEL_OPENVINO_DIR%\opencv\setupvars.bat"
+) else (
+set "OpenCV_DIR=%INTEL_OPENVINO_DIR%\opencv\x64\vc14\lib"
+set "PATH=%INTEL_OPENVINO_DIR%\opencv\x64\vc14\bin;%PATH%"
+)
+
+:: Model Optimizer
+if exist %INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer (
+set PYTHONPATH=%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer;%PYTHONPATH%
+set "PATH=%INTEL_OPENVINO_DIR%\deployment_tools\model_optimizer;%PATH%"
+)
+
+:: Inference Engine
+set "InferenceEngine_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share"
+set "HDDL_INSTALL_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl"
+set "PATH=%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%HDDL_INSTALL_DIR%\bin;%PATH%"
+if exist %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\arch_descriptions (
+set ARCH_ROOT_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\arch_descriptions
+)
+
+:: nGraph
+if exist %INTEL_OPENVINO_DIR%\deployment_tools\ngraph (
+set "PATH=%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib;%PATH%"
+set "ngraph_DIR=%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake"
+)
+
+:: Check if Python is installed
+python --version 2>NUL
+if errorlevel 1 (
+   echo Error^: Python is not installed. Please install Python 3.5. or 3.6  ^(64-bit^) from https://www.python.org/downloads/
+   exit /B 1
+)
+
+:: Check Python version
+for /F "tokens=* USEBACKQ" %%F IN (`python --version 2^>^&1`) DO (
+   set version=%%F
+)
+
+for /F "tokens=1,2,3 delims=. " %%a in ("%version%") do (
+   set Major=%%b
+   set Minor=%%c
+)
+
+if "%Major%" geq "3" (
+   if "%Minor%" geq "5" (
+      set python_ver=okay
+   )
+)
+
+if not "%python_ver%"=="okay" (
+   echo Unsupported Python version. Please install Python 3.5 or 3.6  ^(64-bit^) from https://www.python.org/downloads/
+   exit /B 1
+)
+
+:: Check Python bitness
+python -c "import sys; print(64 if sys.maxsize > 2**32 else 32)" 2 > NUL
+if errorlevel 1 (
+   echo Error^: Error during installed Python bitness detection
+   exit /B 1
+)
+
+for /F "tokens=* USEBACKQ" %%F IN (`python -c "import sys; print(64 if sys.maxsize > 2**32 else 32)" 2^>^&1`) DO (
+   set bitness=%%F
+)
+
+if not "%bitness%"=="64" (
+   echo Unsupported Python bitness. Please install Python 3.5 or 3.6  ^(64-bit^) from https://www.python.org/downloads/
+   exit /B 1
+)
+
+set PYTHONPATH=%INTEL_OPENVINO_DIR%\python\python%Major%.%Minor%;%INTEL_OPENVINO_DIR%\python\python3;%PYTHONPATH%
+
+if exist %INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\accuracy_checker (
+    set PYTHONPATH=%INTEL_OPENVINO_DIR%\deployment_tools\open_model_zoo\tools\accuracy_checker;%PYTHONPATH%
+)
+
+echo [setupvars.bat] OpenVINO environment initialized
+
+exit /B 0
+
+:GetFullPath
+SET %2=%~f1
+
+GOTO :EOF
diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh

new file mode 100644 (file)

index 0000000..3ce0d70
--- /dev/null
+++ b/scripts/setupvars/setupvars.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+# Copyright (c) 2018-2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INSTALLDIR="${INTEL_OPENVINO_DIR:-<INSTALLDIR>}"
+if [[ ! -d "${INSTALLDIR}" ]]; then
+  # Script has not been processed by installer, so INSTALLDIR is not valid
+  # Using autodetection assuming:
+  # - current shell is "bash"
+  # - location of the current script is in "INSTALLDIR/bin"
+  SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+  BASE_DIR="$( dirname "$SCRIPT_DIR" )"
+
+  INSTALLDIR="${BASE_DIR}"
+fi
+
+export INTEL_OPENVINO_DIR="$INSTALLDIR"
+export INTEL_CVSDK_DIR="$INTEL_OPENVINO_DIR"
+
+# parse command line options
+while [[ $# -gt 0 ]]
+do
+key="$1"
+case $key in
+    -pyver)
+    python_version=$2
+    echo python_version = "${python_version}"
+    shift
+    ;;
+    *)
+    # unknown option
+    ;;
+esac
+shift
+done
+
+if [ -e $INSTALLDIR/deployment_tools/inference_engine ]; then
+    export InferenceEngine_DIR=$INTEL_OPENVINO_DIR/deployment_tools/inference_engine/share
+    system_type=$(\ls $INTEL_OPENVINO_DIR/deployment_tools/inference_engine/lib/)
+    IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/deployment_tools/inference_engine/lib/$system_type
+
+    if [[ -e ${IE_PLUGINS_PATH}/arch_descriptions ]]; then
+        export ARCH_ROOT_DIR=${IE_PLUGINS_PATH}/arch_descriptions
+    fi
+
+    export HDDL_INSTALL_DIR=$INSTALLDIR/deployment_tools/inference_engine/external/hddl
+    if [[ "$OSTYPE" == "darwin"* ]]; then
+        export DYLD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/inference_engine/external/mkltiny_mac/lib:$INSTALLDIR/deployment_tools/inference_engine/external/tbb/lib:$IE_PLUGINS_PATH:$DYLD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/inference_engine/external/mkltiny_mac/lib:$INSTALLDIR/deployment_tools/inference_engine/external/tbb/lib:$IE_PLUGINS_PATH:$LD_LIBRARY_PATH
+    else
+        export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:$INSTALLDIR/deployment_tools/inference_engine/external/gna/lib:$INSTALLDIR/deployment_tools/inference_engine/external/mkltiny_lnx/lib:$INSTALLDIR/deployment_tools/inference_engine/external/tbb/lib:$IE_PLUGINS_PATH:$LD_LIBRARY_PATH
+    fi
+fi
+
+if [ -e $INSTALLDIR/deployment_tools/ngraph ]; then
+    export LD_LIBRARY_PATH=$INSTALLDIR/deployment_tools/ngraph/lib:$LD_LIBRARY_PATH
+    export ngraph_DIR=$INSTALLDIR/deployment_tools/ngraph/cmake
+fi
+    
+if [ -e "$INSTALLDIR/opencv" ]; then
+    if [ -f "$INSTALLDIR/opencv/setupvars.sh" ]; then
+        source "$INSTALLDIR/opencv/setupvars.sh"
+    else
+        export OpenCV_DIR="$INSTALLDIR/opencv/share/OpenCV"
+        export LD_LIBRARY_PATH="$INSTALLDIR/opencv/lib:$LD_LIBRARY_PATH"
+        export LD_LIBRARY_PATH="$INSTALLDIR/opencv/share/OpenCV/3rdparty/lib:$LD_LIBRARY_PATH"
+    fi
+fi
+
+
+if [ -f "$INTEL_OPENVINO_DIR/data_processing/dl_streamer/bin/setupvars.sh" ]; then
+    source "$INTEL_OPENVINO_DIR/data_processing/dl_streamer/bin/setupvars.sh"
+fi
+
+export PATH="$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer:$PATH"
+export PYTHONPATH="$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer:$PYTHONPATH"
+
+if [ -e $INTEL_OPENVINO_DIR/deployment_tools/open_model_zoo/tools/accuracy_checker ]; then
+    export PYTHONPATH="$INTEL_OPENVINO_DIR/deployment_tools/open_model_zoo/tools/accuracy_checker:$PYTHONPATH"
+fi
+
+if [ -z "$python_version" ]; then
+    if command -v python3.7 >/dev/null 2>&1; then
+        python_version=3.7
+        python_bitness=$(python3.7 -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
+    elif command -v python3.6 >/dev/null 2>&1; then
+        python_version=3.6
+        python_bitness=$(python3.6 -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
+    elif command -v python3.5 >/dev/null 2>&1; then
+        python_version=3.5
+        python_bitness=$(python3.5 -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
+    elif command -v python3.4 >/dev/null 2>&1; then
+        python_version=3.4
+        python_bitness=$(python3.4 -c 'import sys; print(64 if sys.maxsize > 2**32 else 32)')
+    elif command -v python2.7 >/dev/null 2>&1; then
+        python_version=2.7
+    elif command -v python >/dev/null 2>&1; then
+        python_version=$(python -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
+    fi
+fi
+
+OS_NAME=""
+if command -v lsb_release >/dev/null 2>&1; then
+    OS_NAME=$(lsb_release -i -s)
+fi
+
+if [ "$python_bitness" != "" ] && [ "$python_bitness" != "64" ] && [ "$OS_NAME" != "Raspbian" ]; then
+    echo "[setupvars.sh] 64 bitness for Python" $python_version "is requred"
+fi
+
+if [ ! -z "$python_version" ]; then
+    if [ "$python_version" != "2.7" ]; then
+        # add path to OpenCV API for Python 3.x
+        export PYTHONPATH="$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH"
+    fi
+    # add path to Inference Engine Python API
+    export PYTHONPATH="$INTEL_OPENVINO_DIR/python/python$python_version:$PYTHONPATH"
+fi
+
+echo "[setupvars.sh] OpenVINO environment initialized"
diff --git a/scripts/utils/create_package.py b/scripts/utils/create_package.py

new file mode 100644 (file)

index 0000000..6eb53ad
--- /dev/null
+++ b/scripts/utils/create_package.py
@@ -0,0 +1,15 @@
+import argparse
+import os
+from shutil import rmtree
+
+from utils import Automation
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--build_number", type=int, help="Build number to be added to package version", default=0, )
+args = parser.parse_args()
+
+auto = Automation()
+base_dir = os.path.dirname(__file__)
+bom_path = os.path.join(base_dir, "package_BOM.txt")
+bom = auto.parse_bom(bom_path=bom_path)
+dir_to_tar = auto.copy_files_from_bom(root_path=os.path.join(os.path.dirname(__file__), ".."), bom=bom)
diff --git a/scripts/utils/utils.py b/scripts/utils/utils.py

new file mode 100644 (file)

index 0000000..7d33e9e
--- /dev/null
+++ b/scripts/utils/utils.py
@@ -0,0 +1,56 @@
+import os
+import subprocess
+import tarfile
+from datetime import datetime
+from shutil import copyfile, copytree, rmtree
+
+major_version = 0
+minor_version = 3
+
+
+class Automation:
+    @staticmethod
+    def parse_bom(bom_path):
+        files = []
+        for file in open(bom_path):
+            files.append(file)
+        return files
+
+    @staticmethod
+    def copy_files_from_bom(root_path, bom):
+        target_dir = os.path.join(os.path.dirname(__file__), "tools_package")
+        if os.path.exists(target_dir):
+            rmtree(target_dir)
+        os.makedirs(target_dir)
+        for file in bom:
+            src = os.path.join(root_path, file.strip('\n'))
+            dst = os.path.join(target_dir, file.strip('\n'))
+            if not os.path.exists(os.path.dirname(dst)):
+                os.makedirs(os.path.dirname(dst))
+            if os.path.isdir(src):
+                copytree(src, dst)
+            else:
+                copyfile(src, dst)
+        return target_dir
+
+    @staticmethod
+    def add_version_txt(dst_path, build_number, git_hash_short):
+        git_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip("\n")
+        if git_hash_short == "0":
+            git_hash_short = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("utf-8").strip(
+                "\n")
+        verson = "{0}.{1}.{2}.{3}".format(major_version, minor_version, build_number, git_hash_short)
+        timestamp = datetime.now().strftime("%I:%M%p %B %d, %Y")
+        with open(os.path.join(dst_path, "version.txt"), 'w') as f:
+            f.write(timestamp + '\n')
+            f.write(verson + '\n')
+            f.write(git_hash + '\n')
+        return verson
+
+    @staticmethod
+    def make_tarfile(out_file_name, source_dir):
+        archive_path = os.path.join(os.path.dirname(__file__), out_file_name)
+        if os.path.exists(archive_path):
+            os.remove(archive_path)
+        with tarfile.open(out_file_name, "w:gz") as tar:
+            tar.add(source_dir, arcname=os.path.basename(source_dir))
diff --git a/tests/stress_tests/.automation/env_config.xml b/tests/stress_tests/.automation/env_config.xml

new file mode 100644 (file)

index 0000000..7d356d0
--- /dev/null
+++ b/tests/stress_tests/.automation/env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml

new file mode 100644 (file)

index 0000000..82a6c6c
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml
@@ -0,0 +1,533 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753847" vmpeak="1528832" vmrss="14005" vmhwm="814655" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="580025" vmpeak="1743759" vmrss="234704" vmhwm="1462062" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1339971" vmpeak="1528828" vmrss="555262" vmhwm="814805" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1389159" vmpeak="1741154" vmrss="1036169" vmhwm="1460052" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="753843" vmpeak="1545451" vmrss="14234" vmhwm="821334" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="602206" vmpeak="1511325" vmrss="257501" vmhwm="1230284" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1368206" vmpeak="1545456" vmrss="576774" vmhwm="821739" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1423096" vmpeak="1511373" vmrss="1074752" vmhwm="1230732" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772626" vmpeak="985754" vmrss="95260" vmhwm="151496" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1044604" vmpeak="1154709" vmrss="699168" vmhwm="811104" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985525" vmpeak="1057614" vmrss="159306" vmhwm="159306" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1163289" vmpeak="1235379" vmrss="812961" vmhwm="812961" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="762770" vmpeak="1212248" vmrss="93570" vmhwm="426817" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1127847" vmpeak="1586310" vmrss="782029" vmhwm="1304679" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1351816" vmpeak="1423906" vmrss="353738" vmhwm="427644" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1660304" vmpeak="1660304" vmrss="1309215" vmhwm="1309215" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="791863" vmpeak="998329" vmrss="123059" vmhwm="240160" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1309598" vmpeak="1428944" vmrss="964066" vmhwm="1086751" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1060303" vmpeak="1132392" vmrss="238924" vmhwm="240416" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1435214" vmpeak="1507303" vmrss="1084969" vmhwm="1084969" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="864639" vmpeak="1153900" vmrss="147906" vmhwm="322590" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1541161" vmpeak="1686282" vmrss="1195972" vmhwm="1337595" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1181479" vmpeak="1253568" vmrss="315581" vmhwm="322700" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1706760" vmpeak="1778849" vmrss="1356533" vmhwm="1356533" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="754428" vmpeak="3004311" vmrss="17613" vmhwm="1856210" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="710569" vmpeak="3363879" vmrss="365380" vmhwm="3081751" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2487130" vmpeak="3004311" vmrss="1687936" vmhwm="1856448" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="2951748" vmpeak="3363804" vmrss="2597940" vmhwm="3080968" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="767157" vmpeak="1369376" vmrss="63338" vmhwm="540166" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1155101" vmpeak="1701180" vmrss="809938" vmhwm="1420152" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1299262" vmpeak="1373882" vmrss="431758" vmhwm="540214" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1647738" vmpeak="1719828" vmrss="1296350" vmhwm="1419092" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="1642832" vmrss="14014" vmhwm="789109" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="595430" vmpeak="1690484" vmrss="250496" vmhwm="1409205" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1494464" vmpeak="1642832" vmrss="679214" vmhwm="789412" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1450746" vmpeak="1693172" vmrss="1097681" vmhwm="1412254" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="919740" vmpeak="1521955" vmrss="234520" vmhwm="792022" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1666363" vmpeak="2175012" vmrss="1321245" vmhwm="1893936" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1436982" vmpeak="1521955" vmrss="643614" vmhwm="793218" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2138818" vmpeak="2210907" vmrss="1786162" vmhwm="1893760" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="757262" vmpeak="978832" vmrss="81408" vmhwm="124238" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="810590" vmpeak="929139" vmrss="464868" vmhwm="503813" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="928637" vmpeak="1000727" vmrss="130719" vmhwm="130719" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="859478" vmpeak="931568" vmrss="507540" vmhwm="507540" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="766726" vmpeak="925245" vmrss="33382" vmhwm="180268" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="775117" vmpeak="913347" vmrss="430157" vmhwm="605598" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="927163" vmpeak="999253" vmrss="141869" vmhwm="181156" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="924752" vmpeak="996842" vmrss="571590" vmhwm="602839" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="767003" vmpeak="1090526" vmrss="34900" vmhwm="348172" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="948046" vmpeak="1182082" vmrss="602624" vmhwm="900169" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1051481" vmpeak="1123570" vmrss="257219" vmhwm="348541" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1187106" vmpeak="1259196" vmrss="834438" vmhwm="902800" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764315" vmpeak="1326938" vmrss="63725" vmhwm="603213" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1183410" vmpeak="1680448" vmrss="837953" vmhwm="1398870" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1227798" vmpeak="1326908" vmrss="438160" vmhwm="602434" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1633997" vmpeak="1706086" vmrss="1281693" vmhwm="1395878" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="753605" vmpeak="876330" vmrss="15571" vmhwm="29106" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="566693" vmpeak="658486" vmrss="220783" vmhwm="232452" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="808486" vmpeak="880576" vmrss="29084" vmhwm="29084" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="586401" vmpeak="658490" vmrss="232764" vmhwm="232764" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="754864" vmpeak="893692" vmrss="54617" vmhwm="81584" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="642527" vmpeak="750424" vmrss="296678" vmhwm="362300" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="831336" vmpeak="903425" vmrss="85654" vmhwm="85654" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="716047" vmpeak="788136" vmrss="364434" vmhwm="364434" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="756813" vmpeak="819698" vmrss="54410" vmhwm="78289" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="758705" vmpeak="862466" vmrss="412966" vmhwm="437131" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="840967" vmpeak="840967" vmrss="82860" vmhwm="82860" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="787182" vmpeak="859271" vmrss="436801" vmhwm="436801" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="753715" vmpeak="876299" vmrss="17512" vmhwm="28402" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="583092" vmpeak="674744" vmrss="238220" vmhwm="249722" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="808209" vmpeak="808209" vmrss="27865" vmhwm="27865" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="600714" vmpeak="672804" vmrss="246967" vmhwm="246967" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="763677" vmpeak="874535" vmrss="13318" vmhwm="35327" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="570521" vmpeak="662182" vmrss="224774" vmhwm="351410" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="901260" vmpeak="973350" vmrss="108037" vmhwm="108037" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="685115" vmpeak="757204" vmrss="331421" vmhwm="351529" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="803228" vmrss="14806" vmhwm="25911" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="577280" vmpeak="667673" vmrss="232029" vmhwm="242580" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="806102" vmpeak="806102" vmrss="25352" vmhwm="25352" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="593340" vmpeak="665429" vmrss="240200" vmhwm="240200" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="764711" vmpeak="1279238" vmrss="23544" vmhwm="528431" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="890428" vmpeak="1316884" vmrss="544882" vmhwm="1035192" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1187529" vmpeak="1279207" vmrss="398512" vmhwm="528730" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1288707" vmpeak="1360796" vmrss="935778" vmhwm="1038888" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="755634" vmpeak="1259024" vmrss="23342" vmhwm="507980" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="845886" vmpeak="1297898" vmrss="500957" vmhwm="1016822" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1327246" vmpeak="1327246" vmrss="384634" vmhwm="507522" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1277117" vmpeak="1300490" vmrss="923674" vmhwm="1018956" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="757556" vmpeak="1471373" vmrss="32780" vmhwm="716861" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1153103" vmpeak="1684306" vmrss="807426" vmhwm="1402513" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1397686" vmpeak="1471373" vmrss="528620" vmhwm="717728" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597785" vmpeak="1680465" vmrss="1244672" vmhwm="1399217" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="1485853" vmrss="14330" vmhwm="773766" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="604573" vmpeak="1684861" vmrss="259556" vmhwm="1403600" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1311107" vmpeak="1485862" vmrss="528448" vmhwm="773656" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1346840" vmpeak="1684896" vmrss="993942" vmhwm="1403886" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="757187" vmpeak="831362" vmrss="78795" vmhwm="113814" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="805270" vmpeak="920321" vmrss="460319" vmhwm="495638" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="852781" vmpeak="852781" vmrss="119033" vmhwm="119033" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="847052" vmpeak="919142" vmrss="494916" vmhwm="494916" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="754248" vmpeak="925443" vmrss="16878" vmhwm="177663" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="657659" vmpeak="799510" vmrss="312070" vmhwm="466153" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="920163" vmpeak="920163" vmrss="131859" vmhwm="176726" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="775350" vmpeak="847440" vmrss="422919" vmhwm="467610" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760584" vmpeak="1338202" vmrss="43243" vmhwm="616928" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1104862" vmpeak="1557006" vmrss="759030" vmhwm="1275071" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1224172" vmpeak="1338172" vmrss="434944" vmhwm="616849" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1452145" vmpeak="1558106" vmrss="1099428" vmhwm="1276787" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="764878" vmpeak="1551919" vmrss="58638" vmhwm="828383" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1315120" vmpeak="1977250" vmrss="968858" vmhwm="1694796" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1526166" vmpeak="1598256" vmrss="582401" vmhwm="829598" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1804748" vmpeak="1975855" vmrss="1451397" vmhwm="1693419" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="927665" vmpeak="2236845" vmrss="224034" vmhwm="1396458" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="1988676" vmpeak="3156291" vmrss="1643919" vmhwm="2874946" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2016999" vmpeak="2236955" vmrss="1117754" vmhwm="1396128" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2845849" vmpeak="3165219" vmrss="2493550" vmhwm="2883091" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="766101" vmpeak="1079971" vmrss="27359" vmhwm="362142" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="834856" vmpeak="1080094" vmrss="490089" vmhwm="799312" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1046381" vmpeak="1118471" vmrss="260528" vmhwm="362203" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1060109" vmpeak="1132199" vmrss="707876" vmhwm="804108" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="758516" vmpeak="930397" vmrss="40572" vmhwm="194062" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="873061" vmpeak="1013430" vmrss="528167" vmhwm="692564" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="957620" vmpeak="1029710" vmrss="152754" vmhwm="194656" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="1014305" vmpeak="1086395" vmrss="662525" vmhwm="694821" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="759382" vmpeak="1174707" vmrss="39265" vmhwm="401856" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="983083" vmpeak="1257471" vmrss="637335" vmhwm="975444" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1140730" vmpeak="1174672" vmrss="315977" vmhwm="401508" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1251214" vmpeak="1323304" vmrss="899034" vmhwm="976474" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="754890" vmpeak="815095" vmrss="28833" vmhwm="43881" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="651974" vmpeak="746719" vmrss="306455" vmhwm="321345" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="824942" vmpeak="897032" vmrss="48567" vmhwm="48567" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="676328" vmpeak="748418" vmrss="324860" vmhwm="324860" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="758212" vmpeak="813208" vmrss="29691" vmhwm="44220" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="611789" vmpeak="706534" vmrss="266244" vmhwm="324007" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818549" vmpeak="890639" vmrss="47141" vmhwm="47141" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677705" vmpeak="749795" vmrss="326163" vmhwm="326163" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="757534" vmpeak="911495" vmrss="36445" vmhwm="182050" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="835683" vmpeak="973280" vmrss="490613" vmhwm="658640" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="941076" vmpeak="1013166" vmrss="148222" vmhwm="183185" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="989608" vmpeak="1061698" vmrss="637709" vmhwm="661746" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="757174" vmpeak="901648" vmrss="73409" vmhwm="106537" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="801644" vmpeak="915186" vmrss="456517" vmhwm="490520" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="847932" vmpeak="847932" vmrss="116410" vmhwm="116410" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="843022" vmpeak="915112" vmrss="490864" vmhwm="490864" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="765393" vmpeak="900402" vmrss="71544" vmhwm="105032" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="759668" vmpeak="872762" vmrss="414493" vmhwm="497701" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="848438" vmpeak="900754" vmrss="113590" vmhwm="113590" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="847620" vmpeak="919710" vmrss="495730" vmhwm="495730" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="755374" vmpeak="1146156" vmrss="22026" vmhwm="370176" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="768451" vmpeak="1074730" vmrss="423662" vmhwm="794266" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1113609" vmpeak="1185698" vmrss="313513" vmhwm="370035" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1134227" vmpeak="1206317" vmrss="783006" vmhwm="795000" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="755796" vmpeak="1267802" vmrss="23746" vmhwm="383983" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="794565" vmpeak="1272634" vmrss="449394" vmhwm="991632" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1234050" vmpeak="1306140" vmrss="421194" vmhwm="421194" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1348960" vmpeak="1421050" vmrss="999050" vmhwm="999050" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754006" vmpeak="2548497" vmrss="15598" vmhwm="1808624" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="668602" vmpeak="3326708" vmrss="323791" vmhwm="3045328" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2027181" vmpeak="2548497" vmrss="1242560" vmhwm="1808730" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2441076" vmpeak="3326708" vmrss="2088055" vmhwm="3045050" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754212" vmpeak="2618030" vmrss="15510" vmhwm="1877383" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="739222" vmpeak="3397112" vmrss="393866" vmhwm="3115085" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2073794" vmpeak="2618030" vmrss="1289741" vmhwm="1878289" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2518340" vmpeak="3397081" vmrss="2165196" vmhwm="3114975" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="764940" vmpeak="947157" vmrss="27988" vmhwm="223726" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="789223" vmpeak="941683" vmrss="443788" vmhwm="641476" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="962187" vmpeak="1034277" vmrss="177848" vmhwm="224180" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="969069" vmpeak="1041158" vmrss="616990" vmhwm="641977" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="755651" vmpeak="1654985" vmrss="24921" vmhwm="920400" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="936892" vmpeak="1838610" vmrss="590994" vmhwm="1556526" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1433352" vmpeak="1654989" vmrss="639456" vmhwm="918693" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1613176" vmpeak="1824922" vmrss="1259940" vmhwm="1543031" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="754692" vmpeak="4259393" vmrss="18013" vmhwm="3532412" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="719105" vmpeak="5906194" vmrss="373648" vmhwm="5623600" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3167040" vmpeak="4259380" vmrss="2378362" vmhwm="3531237" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4165801" vmpeak="5903801" vmrss="3812393" vmhwm="5621585" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753860" vmpeak="1101161" vmrss="14599" vmhwm="375399" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="577640" vmpeak="1037480" vmrss="232443" vmhwm="755972" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1059828" vmpeak="1131917" vmrss="272879" vmhwm="374721" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="957453" vmpeak="1037445" vmrss="605026" vmhwm="756606" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1422647" vmrss="16790" vmhwm="680072" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="678964" vmpeak="1435790" vmrss="334017" vmhwm="1154573" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1279823" vmpeak="1422647" vmrss="490692" vmhwm="680526" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1325156" vmpeak="1438571" vmrss="972140" vmhwm="1157138" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753733" vmpeak="954430" vmrss="14278" vmhwm="229913" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="568880" vmpeak="814976" vmrss="223907" vmhwm="533808" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1032882" vmpeak="1032882" vmrss="174631" vmhwm="230243" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="810031" vmpeak="816178" vmrss="456856" vmhwm="534503" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="756852" vmpeak="1587154" vmrss="31460" vmhwm="837570" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1159840" vmpeak="1822444" vmrss="813969" vmhwm="1540343" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1554462" vmpeak="1626552" vmrss="609677" vmhwm="836655" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1735610" vmpeak="1821749" vmrss="1383285" vmhwm="1540598" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="753856" vmpeak="1528538" vmrss="14414" vmhwm="815491" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="580030" vmpeak="1741062" vmrss="235624" vmhwm="1460386" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1339681" vmpeak="1528538" vmrss="556146" vmhwm="815262" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1389097" vmpeak="1741093" vmrss="1036178" vmhwm="1460060" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772622" vmpeak="985749" vmrss="95431" vmhwm="151087" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1141962" vmpeak="1252068" vmrss="796734" vmhwm="827217" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985239" vmpeak="1057328" vmrss="158532" vmhwm="158532" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1171425" vmpeak="1243514" vmrss="818624" vmhwm="818624" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="762731" vmpeak="1211720" vmrss="93486" vmhwm="426896" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1312801" vmpeak="1592839" vmrss="967252" vmhwm="1311569" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1198124" vmpeak="1270214" vmrss="353051" vmhwm="427319" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1657339" vmpeak="1729428" vmrss="1304820" vmhwm="1304820" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="796360" vmpeak="1002408" vmrss="123094" vmhwm="239945" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1352916" vmpeak="1472262" vmrss="1007630" vmhwm="1084727" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1059880" vmpeak="1059880" vmrss="239307" vmhwm="241753" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1437656" vmpeak="1509745" vmrss="1084828" vmhwm="1084828" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="864635" vmpeak="1154040" vmrss="148830" vmhwm="322528" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1505042" vmpeak="1650162" vmrss="1159906" vmhwm="1343711" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1181056" vmpeak="1253146" vmrss="315048" vmhwm="322282" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1719256" vmpeak="1791345" vmrss="1366767" vmhwm="1366767" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="767976" vmpeak="1370195" vmrss="63456" vmhwm="539897" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1313452" vmpeak="1701664" vmrss="968145" vmhwm="1420434" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1295571" vmpeak="1370195" vmrss="430610" vmhwm="539536" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1651421" vmpeak="1723510" vmrss="1299738" vmhwm="1422326" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754212" vmpeak="3124338" vmrss="17362" vmhwm="1770388" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="669583" vmpeak="3628222" vmrss="324363" vmhwm="3347071" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2705824" vmpeak="3124338" vmrss="1906933" vmhwm="1906933" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="3710449" vmpeak="3782539" vmrss="3356861" vmhwm="3356861" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="756870" vmpeak="1192276" vmrss="32300" vmhwm="470417" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="772970" vmpeak="1363872" vmrss="428054" vmhwm="1079412" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1123746" vmpeak="1195836" vmrss="335288" vmhwm="470162" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1219618" vmpeak="1362376" vmrss="875415" vmhwm="1077560" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="848157" vmpeak="1522730" vmrss="178424" vmhwm="792470" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1549574" vmpeak="2182501" vmrss="1203804" vmhwm="1900742" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1437730" vmpeak="1522730" vmrss="644402" vmhwm="794024" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2145426" vmpeak="2217516" vmrss="1793162" vmhwm="1899854" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="756584" vmpeak="925636" vmrss="32982" vmhwm="182529" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="769230" vmpeak="907847" vmrss="423874" vmhwm="604982" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="928659" vmpeak="928659" vmrss="142304" vmhwm="182353" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="926103" vmpeak="998192" vmrss="572985" vmhwm="603592" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="757851" vmpeak="1078682" vmrss="34751" vmhwm="348154" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="911473" vmpeak="1183102" vmrss="565549" vmhwm="900992" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1051652" vmpeak="1123742" vmrss="258231" vmhwm="349131" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1182570" vmpeak="1254660" vmrss="829659" vmhwm="899540" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764319" vmpeak="1327506" vmrss="61375" vmhwm="601048" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1206559" vmpeak="1676272" vmrss="860362" vmhwm="1393906" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1228396" vmpeak="1327475" vmrss="441135" vmhwm="603394" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1637486" vmpeak="1709576" vmrss="1285376" vmhwm="1398377" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="761046" vmpeak="1754029" vmrss="43916" vmhwm="1002368" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1026110" vmpeak="2108686" vmrss="680191" vmhwm="1826792" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1512095" vmpeak="1753998" vmrss="701483" vmhwm="1002333" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1880973" vmpeak="2110306" vmrss="1532348" vmhwm="1828952" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="759695" vmpeak="1636430" vmrss="38011" vmhwm="883225" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="1118880" vmpeak="1994964" vmrss="773102" vmhwm="1713034" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1430871" vmpeak="1636434" vmrss="617078" vmhwm="882886" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1804484" vmpeak="1993530" vmrss="1450724" vmhwm="1711340" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="754872" vmpeak="821893" vmrss="55070" vmhwm="82354" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="626304" vmpeak="734201" vmrss="280918" vmhwm="362925" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="831344" vmpeak="903434" vmrss="86495" vmhwm="86495" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="718357" vmpeak="790446" vmrss="367096" vmhwm="367096" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="756826" vmpeak="819711" vmrss="53961" vmhwm="77206" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="758023" vmpeak="861784" vmrss="412702" vmhwm="436805" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="836470" vmpeak="891765" vmrss="83050" vmhwm="83050" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="788986" vmpeak="861075" vmrss="437646" vmhwm="437646" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="762731" vmpeak="804491" vmrss="17490" vmhwm="28454" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="578894" vmpeak="670546" vmrss="233547" vmhwm="245172" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="808209" vmpeak="808209" vmrss="28314" vmhwm="28314" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="600507" vmpeak="672597" vmrss="247596" vmhwm="247596" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="753530" vmpeak="881588" vmrss="13208" vmhwm="35261" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="570042" vmpeak="661702" vmrss="224870" vmhwm="353003" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="901260" vmpeak="901260" vmrss="107390" vmhwm="107390" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="686408" vmpeak="758498" vmrss="332895" vmhwm="351907" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="803228" vmrss="14546" vmhwm="25586" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="577288" vmpeak="667682" vmrss="231642" vmhwm="242167" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="806102" vmpeak="806102" vmrss="24468" vmhwm="24468" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="595588" vmpeak="667678" vmrss="242246" vmhwm="242246" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="753838" vmpeak="907420" vmrss="80674" vmhwm="122086" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="675633" vmpeak="798283" vmrss="330184" vmhwm="372754" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="841390" vmpeak="913479" vmrss="123776" vmhwm="123776" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="726066" vmpeak="798155" vmrss="390764" vmhwm="390764" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754080" vmpeak="884950" vmrss="35930" vmhwm="56368" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="613082" vmpeak="713020" vmrss="267753" vmhwm="358019" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="847726" vmpeak="919815" vmrss="83300" vmhwm="83300" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="710754" vmpeak="782843" vmrss="357442" vmhwm="357442" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760821" vmpeak="1370292" vmrss="44242" vmhwm="618965" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1077643" vmpeak="1594964" vmrss="731733" vmhwm="1313127" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1256200" vmpeak="1370261" vmrss="444043" vmhwm="617852" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1494732" vmpeak="1596218" vmrss="1141690" vmhwm="1314187" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="765322" vmpeak="1593790" vmrss="61120" vmhwm="831661" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1339184" vmpeak="2040148" vmrss="993968" vmhwm="1758746" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1414652" vmpeak="1593754" vmrss="594426" vmhwm="832220" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1871271" vmpeak="2037904" vmrss="1518501" vmhwm="1756343" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="760650" vmpeak="1369557" vmrss="43384" vmhwm="618015" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1022863" vmpeak="1592206" vmrss="676698" vmhwm="1309880" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1255557" vmpeak="1369522" vmrss="445350" vmhwm="618750" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1490077" vmpeak="1591563" vmrss="1137444" vmhwm="1309910" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="765204" vmpeak="1593108" vmrss="61124" vmhwm="831353" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="1340754" vmpeak="2034586" vmrss="995636" vmhwm="1753100" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1413992" vmpeak="1593077" vmrss="592710" vmhwm="831098" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1867096" vmpeak="2036610" vmrss="1514532" vmhwm="1755089" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="766911" vmpeak="1356080" vmrss="64389" vmhwm="623026" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1105068" vmpeak="1552320" vmrss="759990" vmhwm="1271340" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1258699" vmpeak="1356084" vmrss="468780" vmhwm="623788" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1478730" vmpeak="1553591" vmrss="1126364" vmhwm="1272167" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="761239" vmpeak="1894468" vmrss="40691" vmhwm="1139410" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="1418938" vmpeak="2248351" vmrss="1073886" vmhwm="1967262" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1618592" vmpeak="1894499" vmrss="810946" vmhwm="1140422" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="1996112" vmpeak="2247322" vmrss="1660700" vmhwm="1965405" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="754987" vmpeak="880664" vmrss="29475" vmhwm="43832" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="616360" vmpeak="711106" vmrss="270859" vmhwm="322498" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818562" vmpeak="818562" vmrss="47141" vmhwm="47141" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674124" vmpeak="746213" vmrss="322731" vmhwm="322731" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="755224" vmpeak="1146433" vmrss="21806" vmhwm="370044" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="775324" vmpeak="1077709" vmrss="430342" vmhwm="796857" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1113904" vmpeak="1185993" vmrss="312527" vmhwm="370946" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1137391" vmpeak="1137391" vmrss="785391" vmhwm="793201" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754133" vmpeak="2548906" vmrss="14955" vmhwm="1807044" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="668619" vmpeak="3326725" vmrss="322691" vmhwm="3044404" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2027476" vmpeak="2548906" vmrss="1242678" vmhwm="1808470" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2438563" vmpeak="3326725" vmrss="2085028" vmhwm="3044505" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754226" vmpeak="2618325" vmrss="15708" vmhwm="1877977" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="741092" vmpeak="3397116" vmrss="396074" vmhwm="3115345" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2074089" vmpeak="2618325" vmrss="1290049" vmhwm="1878672" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2518436" vmpeak="3397178" vmrss="2165728" vmhwm="3115459" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="754701" vmpeak="4259684" vmrss="17626" vmhwm="3531853" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="747582" vmpeak="5921322" vmrss="402490" vmhwm="5639084" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3095241" vmpeak="4259670" vmrss="2379062" vmhwm="3530652" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4163667" vmpeak="5923566" vmrss="3810193" vmhwm="5640967" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="754023" vmpeak="1334414" vmrss="15254" vmhwm="608322" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="600701" vmpeak="1330978" vmrss="255912" vmhwm="1049844" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1215838" vmpeak="1334383" vmrss="428331" vmhwm="607442" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1199972" vmpeak="1330384" vmrss="847391" vmhwm="1049228" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="755387" vmpeak="1175570" vmrss="25374" vmhwm="306904" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="805222" vmpeak="1346307" vmrss="460781" vmhwm="1065873" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1188580" vmpeak="1260670" vmrss="336036" vmhwm="336036" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1449408" vmpeak="1521498" vmrss="1096792" vmhwm="1096792" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="756822" vmpeak="1181615" vmrss="28468" vmhwm="309716" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="819271" vmpeak="2432738" vmrss="474764" vmhwm="1101047" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1189117" vmpeak="1261207" vmrss="333788" vmhwm="333788" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2539222" vmpeak="2611312" vmrss="2191604" vmhwm="2191604" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="757878" vmpeak="1077934" vmrss="35261" vmhwm="348964" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="899610" vmpeak="1179116" vmrss="553863" vmhwm="896997" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1050878" vmpeak="1077876" vmrss="256506" vmhwm="347974" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1179239" vmpeak="1251329" vmrss="826553" vmhwm="897714" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="760456" vmpeak="1096708" vmrss="27315" vmhwm="361944" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="834275" vmpeak="1073569" vmrss="489086" vmhwm="792343" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1058622" vmpeak="1130712" vmrss="267682" vmhwm="362749" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1050852" vmpeak="1122941" vmrss="697576" vmhwm="791040" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="755950" vmpeak="1092203" vmrss="27640" vmhwm="362740" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="835951" vmpeak="1073516" vmrss="490674" vmhwm="792224" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1058626" vmpeak="1130716" vmrss="266516" vmhwm="361992" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1050218" vmpeak="1071435" vmrss="696669" vmhwm="789848" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="754872" vmpeak="880550" vmrss="29603" vmhwm="43212" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="648881" vmpeak="743626" vmrss="303424" vmhwm="318348" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="818246" vmpeak="818246" vmrss="46534" vmhwm="46534" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="674146" vmpeak="746235" vmrss="320315" vmhwm="320315" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="764755" vmpeak="2092574" vmrss="38016" vmhwm="1352450" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="1578328" vmpeak="3355976" vmrss="1233474" vmhwm="3074953" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="1802838" vmpeak="2092587" vmrss="994188" vmhwm="1352709" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2958472" vmpeak="3352694" vmrss="2607677" vmhwm="3072185" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="765124" vmpeak="2035453" vmrss="39745" vmhwm="1292420" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="1939801" vmpeak="3261715" vmrss="1594617" vmhwm="2980577" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="1750196" vmpeak="2039945" vmrss="935774" vmhwm="1291963" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2902235" vmpeak="3265460" vmrss="2551727" vmhwm="2984352" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="757587" vmpeak="1547678" vmrss="33004" vmhwm="718973" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1154670" vmpeak="1678943" vmrss="809811" vmhwm="1398284" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1553134" vmpeak="1553134" vmrss="606232" vmhwm="719791" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1753910" vmpeak="1826000" vmrss="1400234" vmhwm="1400234" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="757160" vmpeak="867486" vmrss="41307" vmhwm="62678" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="743283" vmpeak="841055" vmrss="398604" vmhwm="537209" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="888087" vmpeak="960176" vmrss="114166" vmhwm="114166" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="894339" vmpeak="966429" vmrss="541912" vmhwm="541912" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772728" vmpeak="951218" vmrss="95840" vmhwm="151676" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1135195" vmpeak="1245301" vmrss="789848" vmhwm="820410" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985450" vmpeak="1057540" vmrss="159046" vmhwm="159046" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1171152" vmpeak="1243242" vmrss="818598" vmhwm="818598" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="864168" vmpeak="998263" vmrss="126266" vmhwm="241604" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1353237" vmpeak="1472583" vmrss="1007978" vmhwm="1094614" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1060316" vmpeak="1132406" vmrss="238326" vmhwm="240724" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1447146" vmpeak="1519236" vmrss="1094759" vmhwm="1097835" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="757156" vmpeak="826843" vmrss="69031" vmhwm="100887" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="796250" vmpeak="906813" vmrss="451171" vmhwm="482077" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="849041" vmpeak="849041" vmrss="104464" vmhwm="104464" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="833984" vmpeak="906074" vmrss="481786" vmhwm="481786" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="760786" vmpeak="1139173" vmrss="66413" vmhwm="353346" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1055560" vmpeak="1255601" vmrss="710595" vmhwm="974815" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1097984" vmpeak="1170074" vmrss="281050" vmhwm="352228" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1259253" vmpeak="1331343" vmrss="906562" vmhwm="976483" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="920884" vmpeak="2443892" vmrss="237186" vmhwm="851215" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="1751376" vmpeak="4164239" vmrss="1406411" vmhwm="3883422" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="757323" vmpeak="986519" vmrss="35006" vmhwm="212911" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="862219" vmpeak="1179283" vmrss="516881" vmhwm="897930" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="761538" vmpeak="1491811" vmrss="45667" vmhwm="671554" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="1126884" vmpeak="1800550" vmrss="781739" vmhwm="1519302" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="766964" vmpeak="1233342" vmrss="29568" vmhwm="415509" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="897432" vmpeak="1347007" vmrss="553357" vmhwm="1067290" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="756562" vmpeak="1099533" vmrss="30078" vmhwm="245590" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="764170" vmpeak="1353149" vmrss="419267" vmhwm="1072244" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1478496" vmpeak="1478496" vmrss="332820" vmhwm="332820" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1423364" vmpeak="1495454" vmrss="1070973" vmhwm="1172441" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="755092" vmpeak="815298" vmrss="28811" vmhwm="43687" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="620734" vmpeak="715479" vmrss="274991" vmhwm="324935" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="825268" vmpeak="825268" vmrss="48439" vmhwm="48439" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="680592" vmpeak="752681" vmrss="326972" vmhwm="326972" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="765182" vmpeak="880712" vmrss="29827" vmhwm="44149" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="612620" vmpeak="707366" vmrss="266855" vmhwm="323734" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818879" vmpeak="818879" vmrss="46534" vmhwm="46534" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="681010" vmpeak="753099" vmrss="326902" vmhwm="326902" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="848056" vmpeak="1522360" vmrss="147382" vmhwm="794481" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1699992" vmpeak="2187231" vmrss="1354892" vmhwm="1906344" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1437365" vmpeak="1522364" vmrss="643724" vmhwm="793755" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2152515" vmpeak="2224604" vmrss="1800026" vmhwm="1900395" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="757526" vmpeak="905132" vmrss="83195" vmhwm="119653" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="815988" vmpeak="932663" vmrss="470742" vmhwm="507760" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1007820" vmpeak="1007820" vmrss="123926" vmhwm="123926" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="861520" vmpeak="933609" vmrss="507870" vmhwm="507870" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="756756" vmpeak="925425" vmrss="34007" vmhwm="180769" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="824168" vmpeak="962403" vmrss="478737" vmhwm="610280" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="927669" vmpeak="999759" vmrss="141772" vmhwm="181966" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="936755" vmpeak="1008845" vmrss="583963" vmhwm="611516" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="759013" vmpeak="1063559" vmrss="51255" vmhwm="349113" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="925958" vmpeak="1184101" vmrss="580056" vmhwm="902325" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1043583" vmpeak="1115672" vmrss="263520" vmhwm="349034" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1189548" vmpeak="1261638" vmrss="836646" vmhwm="903676" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764574" vmpeak="1327493" vmrss="64108" vmhwm="603842" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1221717" vmpeak="1686643" vmrss="875617" vmhwm="1404475" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1381556" vmpeak="1403402" vmrss="440356" vmhwm="602751" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1641921" vmpeak="1714011" vmrss="1289340" vmhwm="1405430" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="762119" vmpeak="2738828" vmrss="47203" vmhwm="947557" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="1295483" vmpeak="4189812" vmrss="949788" vmhwm="3908550" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="763840" vmpeak="805556" vmrss="21938" vmhwm="33264" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="652572" vmpeak="744180" vmrss="306754" vmhwm="318432" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="814000" vmpeak="814000" vmrss="33391" vmhwm="33391" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="672144" vmpeak="744233" vmrss="319026" vmhwm="319026" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="754705" vmpeak="881188" vmrss="29282" vmhwm="44836" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="614209" vmpeak="709759" vmrss="268778" vmhwm="326845" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="818228" vmpeak="890318" vmrss="45513" vmhwm="45513" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="682484" vmpeak="754573" vmrss="328966" vmhwm="328966" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="754903" vmpeak="821928" vmrss="55237" vmhwm="82768" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="643887" vmpeak="751788" vmrss="298685" vmhwm="367602" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="831111" vmpeak="831111" vmrss="86732" vmhwm="86732" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="720979" vmpeak="793069" vmrss="367584" vmhwm="367584" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="756870" vmpeak="819759" vmrss="54586" vmhwm="78570" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="705724" vmpeak="809490" vmrss="360267" vmhwm="435512" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="835978" vmpeak="835978" vmrss="82583" vmhwm="82583" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="788902" vmpeak="860992" vmrss="435727" vmhwm="435727" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="756725" vmpeak="831080" vmrss="76414" vmhwm="111914" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="787058" vmpeak="902290" vmrss="441399" vmhwm="476911" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="847299" vmpeak="847299" vmrss="120969" vmhwm="120969" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="828920" vmpeak="901010" vmrss="475939" vmhwm="475939" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="760988" vmpeak="1018754" vmrss="14484" vmhwm="296612" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="600859" vmpeak="965967" vmrss="255569" vmhwm="685150" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1095155" vmpeak="1167245" vmrss="304607" vmhwm="304607" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1004577" vmpeak="1076666" vmrss="651943" vmhwm="689915" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="756096" vmpeak="1100136" vmrss="27812" vmhwm="362344" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="822830" vmpeak="1073947" vmrss="477193" vmhwm="792264" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1060571" vmpeak="1132661" vmrss="269808" vmhwm="362771" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1054684" vmpeak="1075272" vmrss="702310" vmhwm="794314" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760764" vmpeak="1338383" vmrss="42706" vmhwm="617047" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1108602" vmpeak="1561885" vmrss="762616" vmhwm="1279700" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1279819" vmpeak="1338409" vmrss="435102" vmhwm="617865" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1455146" vmpeak="1561388" vmrss="1101755" vmhwm="1279845" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="765221" vmpeak="1552262" vmrss="59875" vmhwm="829250" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1322098" vmpeak="1985359" vmrss="976223" vmhwm="1703319" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1373006" vmpeak="1552293" vmrss="581891" vmhwm="829848" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814348" vmpeak="1986380" vmrss="1461099" vmhwm="1704714" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="766088" vmpeak="1079958" vmrss="27324" vmhwm="362155" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="838965" vmpeak="1085884" vmrss="493407" vmhwm="804324" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1046157" vmpeak="1118246" vmrss="260515" vmhwm="362810" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1057223" vmpeak="1080772" vmrss="704066" vmhwm="799440" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="761754" vmpeak="1365104" vmrss="45179" vmhwm="620879" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1120737" vmpeak="1613546" vmrss="774637" vmhwm="1331308" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1251346" vmpeak="1365135" vmrss="446415" vmhwm="620241" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1515817" vmpeak="1613858" vmrss="1162572" vmhwm="1331968" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="839823" vmpeak="1569361" vmrss="155029" vmhwm="833157" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="1363960" vmpeak="2068752" vmrss="1018507" vmhwm="1787042" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1476041" vmpeak="1569392" vmrss="679918" vmhwm="833914" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1904799" vmpeak="2060317" vmrss="1551756" vmhwm="1778167" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="756602" vmpeak="1096774" vmrss="28393" vmhwm="363391" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="845226" vmpeak="1103374" vmrss="500051" vmhwm="821986" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1063304" vmpeak="1135393" vmrss="271220" vmhwm="364399" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1092159" vmpeak="1105997" vmrss="738276" vmhwm="823983" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="838816" vmpeak="1561762" vmrss="116930" vmhwm="752906" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="1674490" vmpeak="2318250" vmrss="1329842" vmhwm="2034986" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="755062" vmpeak="880739" vmrss="28415" vmhwm="43480" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="609298" vmpeak="704044" vmrss="263868" vmhwm="323488" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="825048" vmpeak="897138" vmrss="49108" vmhwm="49108" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="675844" vmpeak="747934" vmrss="322753" vmhwm="322753" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="756804" vmpeak="978252" vmrss="70514" vmhwm="120370" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="831318" vmpeak="949744" vmrss="485619" vmhwm="524550" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="925689" vmpeak="997779" vmrss="130244" vmhwm="130244" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="878099" vmpeak="950188" vmrss="525395" vmhwm="525395" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="759435" vmpeak="1442861" vmrss="34680" vmhwm="509454" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1012906" vmpeak="1460487" vmrss="667977" vmhwm="1179833" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1368043" vmpeak="1442861" vmrss="427737" vmhwm="509533" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1542648" vmpeak="1542648" vmrss="1195304" vmhwm="1195304" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="759558" vmpeak="1426185" vmrss="33862" vmhwm="507768" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1010358" vmpeak="1414454" vmrss="665451" vmhwm="1133941" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1350650" vmpeak="1426185" vmrss="421828" vmhwm="509168" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1493681" vmpeak="1565770" vmrss="1145416" vmhwm="1145416" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="761433" vmpeak="985784" vmrss="41514" vmhwm="254610" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="876933" vmpeak="1078919" vmrss="531814" vmhwm="798001" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="1028508" vmpeak="1064698" vmrss="201212" vmhwm="254390" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1091807" vmpeak="1163896" vmrss="739525" vmhwm="798023" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="754067" vmpeak="1169247" vmrss="15686" vmhwm="429523" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="682413" vmpeak="1130109" vmrss="337194" vmhwm="848733" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1106463" vmpeak="1178553" vmrss="321428" vmhwm="429871" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1083904" vmpeak="1155994" vmrss="730976" vmhwm="845882" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754010" vmpeak="2548502" vmrss="15452" vmhwm="1807863" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="686602" vmpeak="3327385" vmrss="340982" vmhwm="3045398" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2026776" vmpeak="2548502" vmrss="1241011" vmhwm="1808730" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2438568" vmpeak="3312188" vmrss="2084328" vmhwm="3029980" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754168" vmpeak="2617986" vmrss="16073" vmhwm="1877000" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="612194" vmpeak="3415310" vmrss="266732" vmhwm="3133363" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2145479" vmpeak="2617885" vmrss="1287272" vmhwm="1877568" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2521367" vmpeak="3415297" vmrss="2167426" vmhwm="3133059" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1426625" vmrss="17173" vmhwm="684173" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="684424" vmpeak="1460949" vmrss="339600" vmhwm="1180036" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1282802" vmpeak="1426625" vmrss="493737" vmhwm="684802" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1331783" vmpeak="1443006" vmrss="978560" vmhwm="1161124" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="753724" vmpeak="954421" vmrss="14414" vmhwm="229578" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="569179" vmpeak="816648" vmrss="224250" vmhwm="535449" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="960810" vmpeak="960810" vmrss="174231" vmhwm="229807" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="808627" vmpeak="880717" vmrss="455677" vmhwm="533002" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1422647" vmrss="17437" vmhwm="680666" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="686316" vmpeak="1436296" vmrss="340586" vmhwm="1154617" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1279797" vmpeak="1422616" vmrss="490982" vmhwm="680147" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1330780" vmpeak="1442570" vmrss="978392" vmhwm="1161490" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="756958" vmpeak="1587260" vmrss="31108" vmhwm="836506" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1163712" vmpeak="1824596" vmrss="819011" vmhwm="1543559" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1405879" vmpeak="1591766" vmrss="610302" vmhwm="836594" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1734233" vmpeak="1823470" vmrss="1381925" vmhwm="1542178" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753975" vmpeak="895633" vmrss="15637" vmhwm="140927" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="599332" vmpeak="728939" vmrss="254029" vmhwm="412566" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="903469" vmpeak="975559" vmrss="116124" vmhwm="141182" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="741738" vmpeak="813828" vmrss="389259" vmhwm="413476" />
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..1a50d72
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml
@@ -0,0 +1,156 @@
+<?xml version="1.0"?>
+<attributes>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>caffe/FP32/caffenet/caffenet.xml</value>
+        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+        <value>caffe/FP32/lenet/lenet.xml</value>
+        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+        <value>caffe/FP32/vgg16/vgg16.xml</value>
+        <value>caffe/FP32/vgg19/vgg19.xml</value>
+        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>caffe/FP32/dilation/dilation.xml</value>
+        <value>caffe/FP32/dssd/dssd.xml</value>
+        <value>caffe/FP32/fcn8/fcn8.xml</value>
+        <value>caffe/FP32/fcn32/fcn32.xml</value>
+        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+        <value>caffe/FP32/vnect/vnect.xml</value>
+        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+        <value>tf/1.14.0/FP32/east/east.xml</value>
+        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/caffenet/caffenet.xml</value>
+        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>mxnet/FP32/vgg16/vgg16.xml</value>
+        <value>mxnet/FP32/vgg19/vgg19.xml</value>
+        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+        <value>mxnet/FP32/location_net/location_net.xml</value>
+        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>mxnet/FP32/nin/nin.xml</value>
+        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+        <value>onnx/FP32/retina_net/retina_net.xml</value>
+        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml

new file mode 100644 (file)

index 0000000..82a6c6c
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_references_config.xml
@@ -0,0 +1,533 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753847" vmpeak="1528832" vmrss="14005" vmhwm="814655" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="create_exenetwork" device="GPU" vmsize="580025" vmpeak="1743759" vmrss="234704" vmhwm="1462062" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1339971" vmpeak="1528828" vmrss="555262" vmhwm="814805" />
+        <model path="caffe/FP32/alexnet/alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1389159" vmpeak="1741154" vmrss="1036169" vmhwm="1460052" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="753843" vmpeak="1545451" vmrss="14234" vmhwm="821334" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="602206" vmpeak="1511325" vmrss="257501" vmhwm="1230284" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1368206" vmpeak="1545456" vmrss="576774" vmhwm="821739" />
+        <model path="caffe/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1423096" vmpeak="1511373" vmrss="1074752" vmhwm="1230732" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772626" vmpeak="985754" vmrss="95260" vmhwm="151496" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1044604" vmpeak="1154709" vmrss="699168" vmhwm="811104" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985525" vmpeak="1057614" vmrss="159306" vmhwm="159306" />
+        <model path="caffe/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1163289" vmpeak="1235379" vmrss="812961" vmhwm="812961" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="762770" vmpeak="1212248" vmrss="93570" vmhwm="426817" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1127847" vmpeak="1586310" vmrss="782029" vmhwm="1304679" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1351816" vmpeak="1423906" vmrss="353738" vmhwm="427644" />
+        <model path="caffe/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1660304" vmpeak="1660304" vmrss="1309215" vmhwm="1309215" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="791863" vmpeak="998329" vmrss="123059" vmhwm="240160" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1309598" vmpeak="1428944" vmrss="964066" vmhwm="1086751" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1060303" vmpeak="1132392" vmrss="238924" vmhwm="240416" />
+        <model path="caffe/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1435214" vmpeak="1507303" vmrss="1084969" vmhwm="1084969" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="864639" vmpeak="1153900" vmrss="147906" vmhwm="322590" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1541161" vmpeak="1686282" vmrss="1195972" vmhwm="1337595" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1181479" vmpeak="1253568" vmrss="315581" vmhwm="322700" />
+        <model path="caffe/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1706760" vmpeak="1778849" vmrss="1356533" vmhwm="1356533" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="CPU" vmsize="754428" vmpeak="3004311" vmrss="17613" vmhwm="1856210" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="create_exenetwork" device="GPU" vmsize="710569" vmpeak="3363879" vmrss="365380" vmhwm="3081751" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="CPU" vmsize="2487130" vmpeak="3004311" vmrss="1687936" vmhwm="1856448" />
+        <model path="caffe/FP32/dilation/dilation.xml" test="infer_request_inference" device="GPU" vmsize="2951748" vmpeak="3363804" vmrss="2597940" vmhwm="3080968" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="767157" vmpeak="1369376" vmrss="63338" vmhwm="540166" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1155101" vmpeak="1701180" vmrss="809938" vmhwm="1420152" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1299262" vmpeak="1373882" vmrss="431758" vmhwm="540214" />
+        <model path="caffe/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1647738" vmpeak="1719828" vmrss="1296350" vmhwm="1419092" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="1642832" vmrss="14014" vmhwm="789109" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="595430" vmpeak="1690484" vmrss="250496" vmhwm="1409205" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1494464" vmpeak="1642832" vmrss="679214" vmhwm="789412" />
+        <model path="caffe/FP32/fcn_alexnet/fcn_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1450746" vmpeak="1693172" vmrss="1097681" vmhwm="1412254" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="919740" vmpeak="1521955" vmrss="234520" vmhwm="792022" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1666363" vmpeak="2175012" vmrss="1321245" vmhwm="1893936" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1436982" vmpeak="1521955" vmrss="643614" vmhwm="793218" />
+        <model path="caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2138818" vmpeak="2210907" vmrss="1786162" vmhwm="1893760" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="757262" vmpeak="978832" vmrss="81408" vmhwm="124238" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="810590" vmpeak="929139" vmrss="464868" vmhwm="503813" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="928637" vmpeak="1000727" vmrss="130719" vmhwm="130719" />
+        <model path="caffe/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="859478" vmpeak="931568" vmrss="507540" vmhwm="507540" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="766726" vmpeak="925245" vmrss="33382" vmhwm="180268" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="775117" vmpeak="913347" vmrss="430157" vmhwm="605598" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="927163" vmpeak="999253" vmrss="141869" vmhwm="181156" />
+        <model path="caffe/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="924752" vmpeak="996842" vmrss="571590" vmhwm="602839" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="767003" vmpeak="1090526" vmrss="34900" vmhwm="348172" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="948046" vmpeak="1182082" vmrss="602624" vmhwm="900169" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1051481" vmpeak="1123570" vmrss="257219" vmhwm="348541" />
+        <model path="caffe/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1187106" vmpeak="1259196" vmrss="834438" vmhwm="902800" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764315" vmpeak="1326938" vmrss="63725" vmhwm="603213" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1183410" vmpeak="1680448" vmrss="837953" vmhwm="1398870" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1227798" vmpeak="1326908" vmrss="438160" vmhwm="602434" />
+        <model path="caffe/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1633997" vmpeak="1706086" vmrss="1281693" vmhwm="1395878" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="CPU" vmsize="753605" vmpeak="876330" vmrss="15571" vmhwm="29106" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="create_exenetwork" device="GPU" vmsize="566693" vmpeak="658486" vmrss="220783" vmhwm="232452" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="CPU" vmsize="808486" vmpeak="880576" vmrss="29084" vmhwm="29084" />
+        <model path="caffe/FP32/lenet/lenet.xml" test="infer_request_inference" device="GPU" vmsize="586401" vmpeak="658490" vmrss="232764" vmhwm="232764" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="754864" vmpeak="893692" vmrss="54617" vmhwm="81584" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="642527" vmpeak="750424" vmrss="296678" vmhwm="362300" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="831336" vmpeak="903425" vmrss="85654" vmhwm="85654" />
+        <model path="caffe/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="716047" vmpeak="788136" vmrss="364434" vmhwm="364434" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="756813" vmpeak="819698" vmrss="54410" vmhwm="78289" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="758705" vmpeak="862466" vmrss="412966" vmhwm="437131" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="840967" vmpeak="840967" vmrss="82860" vmhwm="82860" />
+        <model path="caffe/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="787182" vmpeak="859271" vmrss="436801" vmhwm="436801" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="753715" vmpeak="876299" vmrss="17512" vmhwm="28402" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="583092" vmpeak="674744" vmrss="238220" vmhwm="249722" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="808209" vmpeak="808209" vmrss="27865" vmhwm="27865" />
+        <model path="caffe/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="600714" vmpeak="672804" vmrss="246967" vmhwm="246967" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="763677" vmpeak="874535" vmrss="13318" vmhwm="35327" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="570521" vmpeak="662182" vmrss="224774" vmhwm="351410" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="901260" vmpeak="973350" vmrss="108037" vmhwm="108037" />
+        <model path="caffe/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="685115" vmpeak="757204" vmrss="331421" vmhwm="351529" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="803228" vmrss="14806" vmhwm="25911" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="577280" vmpeak="667673" vmrss="232029" vmhwm="242580" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="806102" vmpeak="806102" vmrss="25352" vmhwm="25352" />
+        <model path="caffe/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="593340" vmpeak="665429" vmrss="240200" vmhwm="240200" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="CPU" vmsize="764711" vmpeak="1279238" vmrss="23544" vmhwm="528431" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="create_exenetwork" device="GPU" vmsize="890428" vmpeak="1316884" vmrss="544882" vmhwm="1035192" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="CPU" vmsize="1187529" vmpeak="1279207" vmrss="398512" vmhwm="528730" />
+        <model path="caffe/FP32/openpose_face/openpose_face.xml" test="infer_request_inference" device="GPU" vmsize="1288707" vmpeak="1360796" vmrss="935778" vmhwm="1038888" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="CPU" vmsize="755634" vmpeak="1259024" vmrss="23342" vmhwm="507980" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="create_exenetwork" device="GPU" vmsize="845886" vmpeak="1297898" vmrss="500957" vmhwm="1016822" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="CPU" vmsize="1327246" vmpeak="1327246" vmrss="384634" vmhwm="507522" />
+        <model path="caffe/FP32/openpose_hand/openpose_hand.xml" test="infer_request_inference" device="GPU" vmsize="1277117" vmpeak="1300490" vmrss="923674" vmhwm="1018956" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="CPU" vmsize="757556" vmpeak="1471373" vmrss="32780" vmhwm="716861" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="create_exenetwork" device="GPU" vmsize="1153103" vmpeak="1684306" vmrss="807426" vmhwm="1402513" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="CPU" vmsize="1397686" vmpeak="1471373" vmrss="528620" vmhwm="717728" />
+        <model path="caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml" test="infer_request_inference" device="GPU" vmsize="1597785" vmpeak="1680465" vmrss="1244672" vmhwm="1399217" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="1485853" vmrss="14330" vmhwm="773766" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="create_exenetwork" device="GPU" vmsize="604573" vmpeak="1684861" vmrss="259556" vmhwm="1403600" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="CPU" vmsize="1311107" vmpeak="1485862" vmrss="528448" vmhwm="773656" />
+        <model path="caffe/FP32/places205_alexnet/places205_alexnet.xml" test="infer_request_inference" device="GPU" vmsize="1346840" vmpeak="1684896" vmrss="993942" vmhwm="1403886" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="757187" vmpeak="831362" vmrss="78795" vmhwm="113814" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="805270" vmpeak="920321" vmrss="460319" vmhwm="495638" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="852781" vmpeak="852781" vmrss="119033" vmhwm="119033" />
+        <model path="caffe/FP32/places205_googlenet/places205_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="847052" vmpeak="919142" vmrss="494916" vmhwm="494916" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="CPU" vmsize="754248" vmpeak="925443" vmrss="16878" vmhwm="177663" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="create_exenetwork" device="GPU" vmsize="657659" vmpeak="799510" vmrss="312070" vmhwm="466153" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="CPU" vmsize="920163" vmpeak="920163" vmrss="131859" vmhwm="176726" />
+        <model path="caffe/FP32/resnet_18/resnet_18.xml" test="infer_request_inference" device="GPU" vmsize="775350" vmpeak="847440" vmrss="422919" vmhwm="467610" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760584" vmpeak="1338202" vmrss="43243" vmhwm="616928" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1104862" vmpeak="1557006" vmrss="759030" vmhwm="1275071" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1224172" vmpeak="1338172" vmrss="434944" vmhwm="616849" />
+        <model path="caffe/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1452145" vmpeak="1558106" vmrss="1099428" vmhwm="1276787" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="764878" vmpeak="1551919" vmrss="58638" vmhwm="828383" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1315120" vmpeak="1977250" vmrss="968858" vmhwm="1694796" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1526166" vmpeak="1598256" vmrss="582401" vmhwm="829598" />
+        <model path="caffe/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1804748" vmpeak="1975855" vmrss="1451397" vmhwm="1693419" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="CPU" vmsize="927665" vmpeak="2236845" vmrss="224034" vmhwm="1396458" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="create_exenetwork" device="GPU" vmsize="1988676" vmpeak="3156291" vmrss="1643919" vmhwm="2874946" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="CPU" vmsize="2016999" vmpeak="2236955" vmrss="1117754" vmhwm="1396128" />
+        <model path="caffe/FP32/resnet_v1_269/resnet_v1_269.xml" test="infer_request_inference" device="GPU" vmsize="2845849" vmpeak="3165219" vmrss="2493550" vmhwm="2883091" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="766101" vmpeak="1079971" vmrss="27359" vmhwm="362142" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="834856" vmpeak="1080094" vmrss="490089" vmhwm="799312" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1046381" vmpeak="1118471" vmrss="260528" vmhwm="362203" />
+        <model path="caffe/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1060109" vmpeak="1132199" vmrss="707876" vmhwm="804108" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="CPU" vmsize="758516" vmpeak="930397" vmrss="40572" vmhwm="194062" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="create_exenetwork" device="GPU" vmsize="873061" vmpeak="1013430" vmrss="528167" vmhwm="692564" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="CPU" vmsize="957620" vmpeak="1029710" vmrss="152754" vmhwm="194656" />
+        <model path="caffe/FP32/se_bn_inception/se_bn_inception.xml" test="infer_request_inference" device="GPU" vmsize="1014305" vmpeak="1086395" vmrss="662525" vmhwm="694821" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="CPU" vmsize="759382" vmpeak="1174707" vmrss="39265" vmhwm="401856" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="create_exenetwork" device="GPU" vmsize="983083" vmpeak="1257471" vmrss="637335" vmhwm="975444" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="CPU" vmsize="1140730" vmpeak="1174672" vmrss="315977" vmhwm="401508" />
+        <model path="caffe/FP32/se_resnext_50/se_resnext_50.xml" test="infer_request_inference" device="GPU" vmsize="1251214" vmpeak="1323304" vmrss="899034" vmhwm="976474" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="754890" vmpeak="815095" vmrss="28833" vmhwm="43881" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="651974" vmpeak="746719" vmrss="306455" vmhwm="321345" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="824942" vmpeak="897032" vmrss="48567" vmhwm="48567" />
+        <model path="caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="676328" vmpeak="748418" vmrss="324860" vmhwm="324860" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="758212" vmpeak="813208" vmrss="29691" vmhwm="44220" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="611789" vmpeak="706534" vmrss="266244" vmhwm="324007" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818549" vmpeak="890639" vmrss="47141" vmhwm="47141" />
+        <model path="caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="677705" vmpeak="749795" vmrss="326163" vmhwm="326163" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="CPU" vmsize="757534" vmpeak="911495" vmrss="36445" vmhwm="182050" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="create_exenetwork" device="GPU" vmsize="835683" vmpeak="973280" vmrss="490613" vmhwm="658640" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="CPU" vmsize="941076" vmpeak="1013166" vmrss="148222" vmhwm="183185" />
+        <model path="caffe/FP32/ssd_googlenet/ssd_googlenet.xml" test="infer_request_inference" device="GPU" vmsize="989608" vmpeak="1061698" vmrss="637709" vmhwm="661746" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="757174" vmpeak="901648" vmrss="73409" vmhwm="106537" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="801644" vmpeak="915186" vmrss="456517" vmhwm="490520" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="847932" vmpeak="847932" vmrss="116410" vmhwm="116410" />
+        <model path="caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="843022" vmpeak="915112" vmrss="490864" vmhwm="490864" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="CPU" vmsize="765393" vmpeak="900402" vmrss="71544" vmhwm="105032" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="create_exenetwork" device="GPU" vmsize="759668" vmpeak="872762" vmrss="414493" vmhwm="497701" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="CPU" vmsize="848438" vmpeak="900754" vmrss="113590" vmhwm="113590" />
+        <model path="caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml" test="infer_request_inference" device="GPU" vmsize="847620" vmpeak="919710" vmrss="495730" vmhwm="495730" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="755374" vmpeak="1146156" vmrss="22026" vmhwm="370176" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="768451" vmpeak="1074730" vmrss="423662" vmhwm="794266" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1113609" vmpeak="1185698" vmrss="313513" vmhwm="370035" />
+        <model path="caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1134227" vmpeak="1206317" vmrss="783006" vmhwm="795000" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="CPU" vmsize="755796" vmpeak="1267802" vmrss="23746" vmhwm="383983" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="create_exenetwork" device="GPU" vmsize="794565" vmpeak="1272634" vmrss="449394" vmhwm="991632" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="CPU" vmsize="1234050" vmpeak="1306140" vmrss="421194" vmhwm="421194" />
+        <model path="caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml" test="infer_request_inference" device="GPU" vmsize="1348960" vmpeak="1421050" vmrss="999050" vmhwm="999050" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754006" vmpeak="2548497" vmrss="15598" vmhwm="1808624" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="668602" vmpeak="3326708" vmrss="323791" vmhwm="3045328" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2027181" vmpeak="2548497" vmrss="1242560" vmhwm="1808730" />
+        <model path="caffe/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2441076" vmpeak="3326708" vmrss="2088055" vmhwm="3045050" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754212" vmpeak="2618030" vmrss="15510" vmhwm="1877383" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="739222" vmpeak="3397112" vmrss="393866" vmhwm="3115085" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2073794" vmpeak="2618030" vmrss="1289741" vmhwm="1878289" />
+        <model path="caffe/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2518340" vmpeak="3397081" vmrss="2165196" vmhwm="3114975" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="CPU" vmsize="764940" vmpeak="947157" vmrss="27988" vmhwm="223726" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="create_exenetwork" device="GPU" vmsize="789223" vmpeak="941683" vmrss="443788" vmhwm="641476" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="CPU" vmsize="962187" vmpeak="1034277" vmrss="177848" vmhwm="224180" />
+        <model path="caffe/FP32/vnect/vnect.xml" test="infer_request_inference" device="GPU" vmsize="969069" vmpeak="1041158" vmrss="616990" vmhwm="641977" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="CPU" vmsize="755651" vmpeak="1654985" vmrss="24921" vmhwm="920400" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="create_exenetwork" device="GPU" vmsize="936892" vmpeak="1838610" vmrss="590994" vmhwm="1556526" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="CPU" vmsize="1433352" vmpeak="1654989" vmrss="639456" vmhwm="918693" />
+        <model path="caffe/FP32/wrn_50_2/wrn_50_2.xml" test="infer_request_inference" device="GPU" vmsize="1613176" vmpeak="1824922" vmrss="1259940" vmhwm="1543031" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="754692" vmpeak="4259393" vmrss="18013" vmhwm="3532412" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="719105" vmpeak="5906194" vmrss="373648" vmhwm="5623600" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3167040" vmpeak="4259380" vmrss="2378362" vmhwm="3531237" />
+        <model path="caffe/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4165801" vmpeak="5903801" vmrss="3812393" vmhwm="5621585" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753860" vmpeak="1101161" vmrss="14599" vmhwm="375399" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="577640" vmpeak="1037480" vmrss="232443" vmhwm="755972" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1059828" vmpeak="1131917" vmrss="272879" vmhwm="374721" />
+        <model path="caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="957453" vmpeak="1037445" vmrss="605026" vmhwm="756606" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1422647" vmrss="16790" vmhwm="680072" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="678964" vmpeak="1435790" vmrss="334017" vmhwm="1154573" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1279823" vmpeak="1422647" vmrss="490692" vmhwm="680526" />
+        <model path="caffe/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1325156" vmpeak="1438571" vmrss="972140" vmhwm="1157138" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753733" vmpeak="954430" vmrss="14278" vmhwm="229913" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="create_exenetwork" device="GPU" vmsize="568880" vmpeak="814976" vmrss="223907" vmhwm="533808" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1032882" vmpeak="1032882" vmrss="174631" vmhwm="230243" />
+        <model path="caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml" test="infer_request_inference" device="GPU" vmsize="810031" vmpeak="816178" vmrss="456856" vmhwm="534503" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="756852" vmpeak="1587154" vmrss="31460" vmhwm="837570" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1159840" vmpeak="1822444" vmrss="813969" vmhwm="1540343" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1554462" vmpeak="1626552" vmrss="609677" vmhwm="836655" />
+        <model path="caffe/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1735610" vmpeak="1821749" vmrss="1383285" vmhwm="1540598" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="CPU" vmsize="753856" vmpeak="1528538" vmrss="14414" vmhwm="815491" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="create_exenetwork" device="GPU" vmsize="580030" vmpeak="1741062" vmrss="235624" vmhwm="1460386" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="CPU" vmsize="1339681" vmpeak="1528538" vmrss="556146" vmhwm="815262" />
+        <model path="mxnet/FP32/caffenet/caffenet.xml" test="infer_request_inference" device="GPU" vmsize="1389097" vmpeak="1741093" vmrss="1036178" vmhwm="1460060" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772622" vmpeak="985749" vmrss="95431" vmhwm="151087" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1141962" vmpeak="1252068" vmrss="796734" vmhwm="827217" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985239" vmpeak="1057328" vmrss="158532" vmhwm="158532" />
+        <model path="mxnet/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1171425" vmpeak="1243514" vmrss="818624" vmhwm="818624" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="CPU" vmsize="762731" vmpeak="1211720" vmrss="93486" vmhwm="426896" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="create_exenetwork" device="GPU" vmsize="1312801" vmpeak="1592839" vmrss="967252" vmhwm="1311569" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="CPU" vmsize="1198124" vmpeak="1270214" vmrss="353051" vmhwm="427319" />
+        <model path="mxnet/FP32/densenet_161/densenet_161.xml" test="infer_request_inference" device="GPU" vmsize="1657339" vmpeak="1729428" vmrss="1304820" vmhwm="1304820" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="796360" vmpeak="1002408" vmrss="123094" vmhwm="239945" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1352916" vmpeak="1472262" vmrss="1007630" vmhwm="1084727" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1059880" vmpeak="1059880" vmrss="239307" vmhwm="241753" />
+        <model path="mxnet/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1437656" vmpeak="1509745" vmrss="1084828" vmhwm="1084828" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="CPU" vmsize="864635" vmpeak="1154040" vmrss="148830" vmhwm="322528" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="create_exenetwork" device="GPU" vmsize="1505042" vmpeak="1650162" vmrss="1159906" vmhwm="1343711" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="CPU" vmsize="1181056" vmpeak="1253146" vmrss="315048" vmhwm="322282" />
+        <model path="mxnet/FP32/densenet_201/densenet_201.xml" test="infer_request_inference" device="GPU" vmsize="1719256" vmpeak="1791345" vmrss="1366767" vmhwm="1366767" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="CPU" vmsize="767976" vmpeak="1370195" vmrss="63456" vmhwm="539897" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="create_exenetwork" device="GPU" vmsize="1313452" vmpeak="1701664" vmrss="968145" vmhwm="1420434" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="CPU" vmsize="1295571" vmpeak="1370195" vmrss="430610" vmhwm="539536" />
+        <model path="mxnet/FP32/dpn_92/dpn_92.xml" test="infer_request_inference" device="GPU" vmsize="1651421" vmpeak="1723510" vmrss="1299738" vmhwm="1422326" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754212" vmpeak="3124338" vmrss="17362" vmhwm="1770388" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="create_exenetwork" device="GPU" vmsize="669583" vmpeak="3628222" vmrss="324363" vmhwm="3347071" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2705824" vmpeak="3124338" vmrss="1906933" vmhwm="1906933" />
+        <model path="mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml" test="infer_request_inference" device="GPU" vmsize="3710449" vmpeak="3782539" vmrss="3356861" vmhwm="3356861" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="CPU" vmsize="756870" vmpeak="1192276" vmrss="32300" vmhwm="470417" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="create_exenetwork" device="GPU" vmsize="772970" vmpeak="1363872" vmrss="428054" vmhwm="1079412" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="CPU" vmsize="1123746" vmpeak="1195836" vmrss="335288" vmhwm="470162" />
+        <model path="mxnet/FP32/full_imagenet_network/full_imagenet_network.xml" test="infer_request_inference" device="GPU" vmsize="1219618" vmpeak="1362376" vmrss="875415" vmhwm="1077560" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="848157" vmpeak="1522730" vmrss="178424" vmhwm="792470" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1549574" vmpeak="2182501" vmrss="1203804" vmhwm="1900742" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1437730" vmpeak="1522730" vmrss="644402" vmhwm="794024" />
+        <model path="mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2145426" vmpeak="2217516" vmrss="1793162" vmhwm="1899854" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="756584" vmpeak="925636" vmrss="32982" vmhwm="182529" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="769230" vmpeak="907847" vmrss="423874" vmhwm="604982" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="928659" vmpeak="928659" vmrss="142304" vmhwm="182353" />
+        <model path="mxnet/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="926103" vmpeak="998192" vmrss="572985" vmhwm="603592" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="CPU" vmsize="757851" vmpeak="1078682" vmrss="34751" vmhwm="348154" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="create_exenetwork" device="GPU" vmsize="911473" vmpeak="1183102" vmrss="565549" vmhwm="900992" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="CPU" vmsize="1051652" vmpeak="1123742" vmrss="258231" vmhwm="349131" />
+        <model path="mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml" test="infer_request_inference" device="GPU" vmsize="1182570" vmpeak="1254660" vmrss="829659" vmhwm="899540" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764319" vmpeak="1327506" vmrss="61375" vmhwm="601048" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1206559" vmpeak="1676272" vmrss="860362" vmhwm="1393906" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1228396" vmpeak="1327475" vmrss="441135" vmhwm="603394" />
+        <model path="mxnet/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1637486" vmpeak="1709576" vmrss="1285376" vmhwm="1398377" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="CPU" vmsize="761046" vmpeak="1754029" vmrss="43916" vmhwm="1002368" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="create_exenetwork" device="GPU" vmsize="1026110" vmpeak="2108686" vmrss="680191" vmhwm="1826792" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="CPU" vmsize="1512095" vmpeak="1753998" vmrss="701483" vmhwm="1002333" />
+        <model path="mxnet/FP32/location_net/location_net.xml" test="infer_request_inference" device="GPU" vmsize="1880973" vmpeak="2110306" vmrss="1532348" vmhwm="1828952" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="CPU" vmsize="759695" vmpeak="1636430" vmrss="38011" vmhwm="883225" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="create_exenetwork" device="GPU" vmsize="1118880" vmpeak="1994964" vmrss="773102" vmhwm="1713034" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="CPU" vmsize="1430871" vmpeak="1636434" vmrss="617078" vmhwm="882886" />
+        <model path="mxnet/FP32/lresnet100e/lresnet100e.xml" test="infer_request_inference" device="GPU" vmsize="1804484" vmpeak="1993530" vmrss="1450724" vmhwm="1711340" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="CPU" vmsize="754872" vmpeak="821893" vmrss="55070" vmhwm="82354" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="create_exenetwork" device="GPU" vmsize="626304" vmpeak="734201" vmrss="280918" vmhwm="362925" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="CPU" vmsize="831344" vmpeak="903434" vmrss="86495" vmhwm="86495" />
+        <model path="mxnet/FP32/mobilenet/mobilenet.xml" test="infer_request_inference" device="GPU" vmsize="718357" vmpeak="790446" vmrss="367096" vmhwm="367096" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="CPU" vmsize="756826" vmpeak="819711" vmrss="53961" vmhwm="77206" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="create_exenetwork" device="GPU" vmsize="758023" vmpeak="861784" vmrss="412702" vmhwm="436805" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="CPU" vmsize="836470" vmpeak="891765" vmrss="83050" vmhwm="83050" />
+        <model path="mxnet/FP32/mobilenet_v2/mobilenet_v2.xml" test="infer_request_inference" device="GPU" vmsize="788986" vmpeak="861075" vmrss="437646" vmhwm="437646" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="CPU" vmsize="762731" vmpeak="804491" vmrss="17490" vmhwm="28454" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="create_exenetwork" device="GPU" vmsize="578894" vmpeak="670546" vmrss="233547" vmhwm="245172" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="CPU" vmsize="808209" vmpeak="808209" vmrss="28314" vmhwm="28314" />
+        <model path="mxnet/FP32/mtcnn_o/mtcnn_o.xml" test="infer_request_inference" device="GPU" vmsize="600507" vmpeak="672597" vmrss="247596" vmhwm="247596" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="CPU" vmsize="753530" vmpeak="881588" vmrss="13208" vmhwm="35261" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="create_exenetwork" device="GPU" vmsize="570042" vmpeak="661702" vmrss="224870" vmhwm="353003" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="CPU" vmsize="901260" vmpeak="901260" vmrss="107390" vmhwm="107390" />
+        <model path="mxnet/FP32/mtcnn_p/mtcnn_p.xml" test="infer_request_inference" device="GPU" vmsize="686408" vmpeak="758498" vmrss="332895" vmhwm="351907" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="CPU" vmsize="753711" vmpeak="803228" vmrss="14546" vmhwm="25586" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="create_exenetwork" device="GPU" vmsize="577288" vmpeak="667682" vmrss="231642" vmhwm="242167" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="CPU" vmsize="806102" vmpeak="806102" vmrss="24468" vmhwm="24468" />
+        <model path="mxnet/FP32/mtcnn_r/mtcnn_r.xml" test="infer_request_inference" device="GPU" vmsize="595588" vmpeak="667678" vmrss="242246" vmhwm="242246" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="CPU" vmsize="753838" vmpeak="907420" vmrss="80674" vmhwm="122086" />
+        <model path="mxnet/FP32/nin/nin.xml" test="create_exenetwork" device="GPU" vmsize="675633" vmpeak="798283" vmrss="330184" vmhwm="372754" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="CPU" vmsize="841390" vmpeak="913479" vmrss="123776" vmhwm="123776" />
+        <model path="mxnet/FP32/nin/nin.xml" test="infer_request_inference" device="GPU" vmsize="726066" vmpeak="798155" vmrss="390764" vmhwm="390764" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754080" vmpeak="884950" vmrss="35930" vmhwm="56368" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="create_exenetwork" device="GPU" vmsize="613082" vmpeak="713020" vmrss="267753" vmhwm="358019" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="CPU" vmsize="847726" vmpeak="919815" vmrss="83300" vmhwm="83300" />
+        <model path="mxnet/FP32/nst_vgg19/nst_vgg19.xml" test="infer_request_inference" device="GPU" vmsize="710754" vmpeak="782843" vmrss="357442" vmhwm="357442" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760821" vmpeak="1370292" vmrss="44242" vmhwm="618965" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1077643" vmpeak="1594964" vmrss="731733" vmhwm="1313127" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1256200" vmpeak="1370261" vmrss="444043" vmhwm="617852" />
+        <model path="mxnet/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1494732" vmpeak="1596218" vmrss="1141690" vmhwm="1314187" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="765322" vmpeak="1593790" vmrss="61120" vmhwm="831661" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1339184" vmpeak="2040148" vmrss="993968" vmhwm="1758746" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1414652" vmpeak="1593754" vmrss="594426" vmhwm="832220" />
+        <model path="mxnet/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1871271" vmpeak="2037904" vmrss="1518501" vmhwm="1756343" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="760650" vmpeak="1369557" vmrss="43384" vmhwm="618015" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1022863" vmpeak="1592206" vmrss="676698" vmhwm="1309880" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1255557" vmpeak="1369522" vmrss="445350" vmhwm="618750" />
+        <model path="mxnet/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1490077" vmpeak="1591563" vmrss="1137444" vmhwm="1309910" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="765204" vmpeak="1593108" vmrss="61124" vmhwm="831353" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="1340754" vmpeak="2034586" vmrss="995636" vmhwm="1753100" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1413992" vmpeak="1593077" vmrss="592710" vmhwm="831098" />
+        <model path="mxnet/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1867096" vmpeak="2036610" vmrss="1514532" vmhwm="1755089" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="CPU" vmsize="766911" vmpeak="1356080" vmrss="64389" vmhwm="623026" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="create_exenetwork" device="GPU" vmsize="1105068" vmpeak="1552320" vmrss="759990" vmhwm="1271340" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="CPU" vmsize="1258699" vmpeak="1356084" vmrss="468780" vmhwm="623788" />
+        <model path="mxnet/FP32/resnext_101/resnext_101.xml" test="infer_request_inference" device="GPU" vmsize="1478730" vmpeak="1553591" vmrss="1126364" vmhwm="1272167" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="CPU" vmsize="761239" vmpeak="1894468" vmrss="40691" vmhwm="1139410" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="create_exenetwork" device="GPU" vmsize="1418938" vmpeak="2248351" vmrss="1073886" vmhwm="1967262" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="CPU" vmsize="1618592" vmpeak="1894499" vmrss="810946" vmhwm="1140422" />
+        <model path="mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml" test="infer_request_inference" device="GPU" vmsize="1996112" vmpeak="2247322" vmrss="1660700" vmhwm="1965405" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="754987" vmpeak="880664" vmrss="29475" vmhwm="43832" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="616360" vmpeak="711106" vmrss="270859" vmhwm="322498" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818562" vmpeak="818562" vmrss="47141" vmhwm="47141" />
+        <model path="mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="674124" vmpeak="746213" vmrss="322731" vmhwm="322731" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="CPU" vmsize="755224" vmpeak="1146433" vmrss="21806" vmhwm="370044" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="create_exenetwork" device="GPU" vmsize="775324" vmpeak="1077709" vmrss="430342" vmhwm="796857" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="CPU" vmsize="1113904" vmpeak="1185993" vmrss="312527" vmhwm="370946" />
+        <model path="mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml" test="infer_request_inference" device="GPU" vmsize="1137391" vmpeak="1137391" vmrss="785391" vmhwm="793201" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754133" vmpeak="2548906" vmrss="14955" vmhwm="1807044" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="668619" vmpeak="3326725" vmrss="322691" vmhwm="3044404" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2027476" vmpeak="2548906" vmrss="1242678" vmhwm="1808470" />
+        <model path="mxnet/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2438563" vmpeak="3326725" vmrss="2085028" vmhwm="3044505" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754226" vmpeak="2618325" vmrss="15708" vmhwm="1877977" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="741092" vmpeak="3397116" vmrss="396074" vmhwm="3115345" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2074089" vmpeak="2618325" vmrss="1290049" vmhwm="1878672" />
+        <model path="mxnet/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2518436" vmpeak="3397178" vmrss="2165728" vmhwm="3115459" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="CPU" vmsize="754701" vmpeak="4259684" vmrss="17626" vmhwm="3531853" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="create_exenetwork" device="GPU" vmsize="747582" vmpeak="5921322" vmrss="402490" vmhwm="5639084" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="CPU" vmsize="3095241" vmpeak="4259670" vmrss="2379062" vmhwm="3530652" />
+        <model path="mxnet/FP32/yolo_v1_full/yolo_v1_full.xml" test="infer_request_inference" device="GPU" vmsize="4163667" vmpeak="5923566" vmrss="3810193" vmhwm="5640967" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="CPU" vmsize="754023" vmpeak="1334414" vmrss="15254" vmhwm="608322" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="create_exenetwork" device="GPU" vmsize="600701" vmpeak="1330978" vmrss="255912" vmhwm="1049844" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="CPU" vmsize="1215838" vmpeak="1334383" vmrss="428331" vmhwm="607442" />
+        <model path="mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml" test="infer_request_inference" device="GPU" vmsize="1199972" vmpeak="1330384" vmrss="847391" vmhwm="1049228" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="CPU" vmsize="755387" vmpeak="1175570" vmrss="25374" vmhwm="306904" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="create_exenetwork" device="GPU" vmsize="805222" vmpeak="1346307" vmrss="460781" vmhwm="1065873" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="CPU" vmsize="1188580" vmpeak="1260670" vmrss="336036" vmhwm="336036" />
+        <model path="onnx/FP32/ssd_resnet34/ssd_resnet34.xml" test="infer_request_inference" device="GPU" vmsize="1449408" vmpeak="1521498" vmrss="1096792" vmhwm="1096792" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="CPU" vmsize="756822" vmpeak="1181615" vmrss="28468" vmhwm="309716" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="create_exenetwork" device="GPU" vmsize="819271" vmpeak="2432738" vmrss="474764" vmhwm="1101047" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="CPU" vmsize="1189117" vmpeak="1261207" vmrss="333788" vmhwm="333788" />
+        <model path="onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml" test="infer_request_inference" device="GPU" vmsize="2539222" vmpeak="2611312" vmrss="2191604" vmhwm="2191604" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="757878" vmpeak="1077934" vmrss="35261" vmhwm="348964" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="899610" vmpeak="1179116" vmrss="553863" vmhwm="896997" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1050878" vmpeak="1077876" vmrss="256506" vmhwm="347974" />
+        <model path="pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1179239" vmpeak="1251329" vmrss="826553" vmhwm="897714" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="760456" vmpeak="1096708" vmrss="27315" vmhwm="361944" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="834275" vmpeak="1073569" vmrss="489086" vmhwm="792343" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="1058622" vmpeak="1130712" vmrss="267682" vmhwm="362749" />
+        <model path="pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="1050852" vmpeak="1122941" vmrss="697576" vmhwm="791040" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="CPU" vmsize="755950" vmpeak="1092203" vmrss="27640" vmhwm="362740" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="create_exenetwork" device="GPU" vmsize="835951" vmpeak="1073516" vmrss="490674" vmhwm="792224" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="CPU" vmsize="1058626" vmpeak="1130716" vmrss="266516" vmhwm="361992" />
+        <model path="pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml" test="infer_request_inference" device="GPU" vmsize="1050218" vmpeak="1071435" vmrss="696669" vmhwm="789848" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="CPU" vmsize="754872" vmpeak="880550" vmrss="29603" vmhwm="43212" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="create_exenetwork" device="GPU" vmsize="648881" vmpeak="743626" vmrss="303424" vmhwm="318348" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="CPU" vmsize="818246" vmpeak="818246" vmrss="46534" vmhwm="46534" />
+        <model path="pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml" test="infer_request_inference" device="GPU" vmsize="674146" vmpeak="746235" vmrss="320315" vmhwm="320315" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="CPU" vmsize="764755" vmpeak="2092574" vmrss="38016" vmhwm="1352450" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="create_exenetwork" device="GPU" vmsize="1578328" vmpeak="3355976" vmrss="1233474" vmhwm="3074953" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="CPU" vmsize="1802838" vmpeak="2092587" vmrss="994188" vmhwm="1352709" />
+        <model path="tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml" test="infer_request_inference" device="GPU" vmsize="2958472" vmpeak="3352694" vmrss="2607677" vmhwm="3072185" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="CPU" vmsize="765124" vmpeak="2035453" vmrss="39745" vmhwm="1292420" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="create_exenetwork" device="GPU" vmsize="1939801" vmpeak="3261715" vmrss="1594617" vmhwm="2980577" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="CPU" vmsize="1750196" vmpeak="2039945" vmrss="935774" vmhwm="1291963" />
+        <model path="tf/1.14.0/FP32/bert_xnli/bert_xnli.xml" test="infer_request_inference" device="GPU" vmsize="2902235" vmpeak="3265460" vmrss="2551727" vmhwm="2984352" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="CPU" vmsize="757587" vmpeak="1547678" vmrss="33004" vmhwm="718973" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="create_exenetwork" device="GPU" vmsize="1154670" vmpeak="1678943" vmrss="809811" vmhwm="1398284" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="CPU" vmsize="1553134" vmpeak="1553134" vmrss="606232" vmhwm="719791" />
+        <model path="tf/1.14.0/FP32/cmu/cmu.xml" test="infer_request_inference" device="GPU" vmsize="1753910" vmpeak="1826000" vmrss="1400234" vmhwm="1400234" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="CPU" vmsize="757160" vmpeak="867486" vmrss="41307" vmhwm="62678" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="create_exenetwork" device="GPU" vmsize="743283" vmpeak="841055" vmrss="398604" vmhwm="537209" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="CPU" vmsize="888087" vmpeak="960176" vmrss="114166" vmhwm="114166" />
+        <model path="tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml" test="infer_request_inference" device="GPU" vmsize="894339" vmpeak="966429" vmrss="541912" vmhwm="541912" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="CPU" vmsize="772728" vmpeak="951218" vmrss="95840" vmhwm="151676" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="create_exenetwork" device="GPU" vmsize="1135195" vmpeak="1245301" vmrss="789848" vmhwm="820410" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="CPU" vmsize="985450" vmpeak="1057540" vmrss="159046" vmhwm="159046" />
+        <model path="tf/1.14.0/FP32/densenet_121/densenet_121.xml" test="infer_request_inference" device="GPU" vmsize="1171152" vmpeak="1243242" vmrss="818598" vmhwm="818598" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="CPU" vmsize="864168" vmpeak="998263" vmrss="126266" vmhwm="241604" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="create_exenetwork" device="GPU" vmsize="1353237" vmpeak="1472583" vmrss="1007978" vmhwm="1094614" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="CPU" vmsize="1060316" vmpeak="1132406" vmrss="238326" vmhwm="240724" />
+        <model path="tf/1.14.0/FP32/densenet_169/densenet_169.xml" test="infer_request_inference" device="GPU" vmsize="1447146" vmpeak="1519236" vmrss="1094759" vmhwm="1097835" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="CPU" vmsize="757156" vmpeak="826843" vmrss="69031" vmhwm="100887" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="create_exenetwork" device="GPU" vmsize="796250" vmpeak="906813" vmrss="451171" vmhwm="482077" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="CPU" vmsize="849041" vmpeak="849041" vmrss="104464" vmhwm="104464" />
+        <model path="tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml" test="infer_request_inference" device="GPU" vmsize="833984" vmpeak="906074" vmrss="481786" vmhwm="481786" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="CPU" vmsize="760786" vmpeak="1139173" vmrss="66413" vmhwm="353346" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="create_exenetwork" device="GPU" vmsize="1055560" vmpeak="1255601" vmrss="710595" vmhwm="974815" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="CPU" vmsize="1097984" vmpeak="1170074" vmrss="281050" vmhwm="352228" />
+        <model path="tf/1.14.0/FP32/facenet/facenet.xml" test="infer_request_inference" device="GPU" vmsize="1259253" vmpeak="1331343" vmrss="906562" vmhwm="976483" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="920884" vmpeak="2443892" vmrss="237186" vmhwm="851215" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="1751376" vmpeak="4164239" vmrss="1406411" vmhwm="3883422" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="757323" vmpeak="986519" vmrss="35006" vmhwm="212911" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="862219" vmpeak="1179283" vmrss="516881" vmhwm="897930" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="761538" vmpeak="1491811" vmrss="45667" vmhwm="671554" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="1126884" vmpeak="1800550" vmrss="781739" vmhwm="1519302" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="CPU" vmsize="766964" vmpeak="1233342" vmrss="29568" vmhwm="415509" />
+        <model path="tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml" test="create_exenetwork" device="GPU" vmsize="897432" vmpeak="1347007" vmrss="553357" vmhwm="1067290" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="CPU" vmsize="756562" vmpeak="1099533" vmrss="30078" vmhwm="245590" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="create_exenetwork" device="GPU" vmsize="764170" vmpeak="1353149" vmrss="419267" vmhwm="1072244" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="CPU" vmsize="1478496" vmpeak="1478496" vmrss="332820" vmhwm="332820" />
+        <model path="tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml" test="infer_request_inference" device="GPU" vmsize="1423364" vmpeak="1495454" vmrss="1070973" vmhwm="1172441" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="CPU" vmsize="755092" vmpeak="815298" vmrss="28811" vmhwm="43687" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="create_exenetwork" device="GPU" vmsize="620734" vmpeak="715479" vmrss="274991" vmhwm="324935" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="CPU" vmsize="825268" vmpeak="825268" vmrss="48439" vmhwm="48439" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml" test="infer_request_inference" device="GPU" vmsize="680592" vmpeak="752681" vmrss="326972" vmhwm="326972" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="765182" vmpeak="880712" vmrss="29827" vmhwm="44149" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="612620" vmpeak="707366" vmrss="266855" vmhwm="323734" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="818879" vmpeak="818879" vmrss="46534" vmhwm="46534" />
+        <model path="tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="681010" vmpeak="753099" vmrss="326902" vmhwm="326902" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="CPU" vmsize="848056" vmpeak="1522360" vmrss="147382" vmhwm="794481" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="create_exenetwork" device="GPU" vmsize="1699992" vmpeak="2187231" vmrss="1354892" vmhwm="1906344" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="CPU" vmsize="1437365" vmpeak="1522364" vmrss="643724" vmhwm="793755" />
+        <model path="tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml" test="infer_request_inference" device="GPU" vmsize="2152515" vmpeak="2224604" vmrss="1800026" vmhwm="1900395" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="CPU" vmsize="757526" vmpeak="905132" vmrss="83195" vmhwm="119653" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="create_exenetwork" device="GPU" vmsize="815988" vmpeak="932663" vmrss="470742" vmhwm="507760" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="CPU" vmsize="1007820" vmpeak="1007820" vmrss="123926" vmhwm="123926" />
+        <model path="tf/1.14.0/FP32/inception_v1/inception_v1.xml" test="infer_request_inference" device="GPU" vmsize="861520" vmpeak="933609" vmrss="507870" vmhwm="507870" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="CPU" vmsize="756756" vmpeak="925425" vmrss="34007" vmhwm="180769" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="create_exenetwork" device="GPU" vmsize="824168" vmpeak="962403" vmrss="478737" vmhwm="610280" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="CPU" vmsize="927669" vmpeak="999759" vmrss="141772" vmhwm="181966" />
+        <model path="tf/1.14.0/FP32/inception_v2/inception_v2.xml" test="infer_request_inference" device="GPU" vmsize="936755" vmpeak="1008845" vmrss="583963" vmhwm="611516" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="CPU" vmsize="759013" vmpeak="1063559" vmrss="51255" vmhwm="349113" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="create_exenetwork" device="GPU" vmsize="925958" vmpeak="1184101" vmrss="580056" vmhwm="902325" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="CPU" vmsize="1043583" vmpeak="1115672" vmrss="263520" vmhwm="349034" />
+        <model path="tf/1.14.0/FP32/inception_v3/inception_v3.xml" test="infer_request_inference" device="GPU" vmsize="1189548" vmpeak="1261638" vmrss="836646" vmhwm="903676" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="CPU" vmsize="764574" vmpeak="1327493" vmrss="64108" vmhwm="603842" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="create_exenetwork" device="GPU" vmsize="1221717" vmpeak="1686643" vmrss="875617" vmhwm="1404475" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="CPU" vmsize="1381556" vmpeak="1403402" vmrss="440356" vmhwm="602751" />
+        <model path="tf/1.14.0/FP32/inception_v4/inception_v4.xml" test="infer_request_inference" device="GPU" vmsize="1641921" vmpeak="1714011" vmrss="1289340" vmhwm="1405430" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="CPU" vmsize="762119" vmpeak="2738828" vmrss="47203" vmhwm="947557" />
+        <model path="tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml" test="create_exenetwork" device="GPU" vmsize="1295483" vmpeak="4189812" vmrss="949788" vmhwm="3908550" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="CPU" vmsize="763840" vmpeak="805556" vmrss="21938" vmhwm="33264" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="create_exenetwork" device="GPU" vmsize="652572" vmpeak="744180" vmrss="306754" vmhwm="318432" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="CPU" vmsize="814000" vmpeak="814000" vmrss="33391" vmhwm="33391" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml" test="infer_request_inference" device="GPU" vmsize="672144" vmpeak="744233" vmrss="319026" vmhwm="319026" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="CPU" vmsize="754705" vmpeak="881188" vmrss="29282" vmhwm="44836" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="create_exenetwork" device="GPU" vmsize="614209" vmpeak="709759" vmrss="268778" vmhwm="326845" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="CPU" vmsize="818228" vmpeak="890318" vmrss="45513" vmhwm="45513" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml" test="infer_request_inference" device="GPU" vmsize="682484" vmpeak="754573" vmrss="328966" vmhwm="328966" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="754903" vmpeak="821928" vmrss="55237" vmhwm="82768" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="643887" vmpeak="751788" vmrss="298685" vmhwm="367602" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="831111" vmpeak="831111" vmrss="86732" vmhwm="86732" />
+        <model path="tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="720979" vmpeak="793069" vmrss="367584" vmhwm="367584" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="CPU" vmsize="756870" vmpeak="819759" vmrss="54586" vmhwm="78570" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="create_exenetwork" device="GPU" vmsize="705724" vmpeak="809490" vmrss="360267" vmhwm="435512" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="CPU" vmsize="835978" vmpeak="835978" vmrss="82583" vmhwm="82583" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml" test="infer_request_inference" device="GPU" vmsize="788902" vmpeak="860992" vmrss="435727" vmhwm="435727" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="CPU" vmsize="756725" vmpeak="831080" vmrss="76414" vmhwm="111914" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="create_exenetwork" device="GPU" vmsize="787058" vmpeak="902290" vmrss="441399" vmhwm="476911" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="CPU" vmsize="847299" vmpeak="847299" vmrss="120969" vmhwm="120969" />
+        <model path="tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml" test="infer_request_inference" device="GPU" vmsize="828920" vmpeak="901010" vmrss="475939" vmhwm="475939" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="CPU" vmsize="760988" vmpeak="1018754" vmrss="14484" vmhwm="296612" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="create_exenetwork" device="GPU" vmsize="600859" vmpeak="965967" vmrss="255569" vmhwm="685150" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="CPU" vmsize="1095155" vmpeak="1167245" vmrss="304607" vmhwm="304607" />
+        <model path="tf/1.14.0/FP32/ncf/ncf.xml" test="infer_request_inference" device="GPU" vmsize="1004577" vmpeak="1076666" vmrss="651943" vmhwm="689915" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="CPU" vmsize="756096" vmpeak="1100136" vmrss="27812" vmhwm="362344" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="create_exenetwork" device="GPU" vmsize="822830" vmpeak="1073947" vmrss="477193" vmhwm="792264" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="CPU" vmsize="1060571" vmpeak="1132661" vmrss="269808" vmhwm="362771" />
+        <model path="tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml" test="infer_request_inference" device="GPU" vmsize="1054684" vmpeak="1075272" vmrss="702310" vmhwm="794314" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="CPU" vmsize="760764" vmpeak="1338383" vmrss="42706" vmhwm="617047" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="create_exenetwork" device="GPU" vmsize="1108602" vmpeak="1561885" vmrss="762616" vmhwm="1279700" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="CPU" vmsize="1279819" vmpeak="1338409" vmrss="435102" vmhwm="617865" />
+        <model path="tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml" test="infer_request_inference" device="GPU" vmsize="1455146" vmpeak="1561388" vmrss="1101755" vmhwm="1279845" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="CPU" vmsize="765221" vmpeak="1552262" vmrss="59875" vmhwm="829250" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="create_exenetwork" device="GPU" vmsize="1322098" vmpeak="1985359" vmrss="976223" vmhwm="1703319" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="CPU" vmsize="1373006" vmpeak="1552293" vmrss="581891" vmhwm="829848" />
+        <model path="tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml" test="infer_request_inference" device="GPU" vmsize="1814348" vmpeak="1986380" vmrss="1461099" vmhwm="1704714" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="CPU" vmsize="766088" vmpeak="1079958" vmrss="27324" vmhwm="362155" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="create_exenetwork" device="GPU" vmsize="838965" vmpeak="1085884" vmrss="493407" vmhwm="804324" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="CPU" vmsize="1046157" vmpeak="1118246" vmrss="260515" vmhwm="362810" />
+        <model path="tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml" test="infer_request_inference" device="GPU" vmsize="1057223" vmpeak="1080772" vmrss="704066" vmhwm="799440" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="CPU" vmsize="761754" vmpeak="1365104" vmrss="45179" vmhwm="620879" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="create_exenetwork" device="GPU" vmsize="1120737" vmpeak="1613546" vmrss="774637" vmhwm="1331308" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="CPU" vmsize="1251346" vmpeak="1365135" vmrss="446415" vmhwm="620241" />
+        <model path="tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml" test="infer_request_inference" device="GPU" vmsize="1515817" vmpeak="1613858" vmrss="1162572" vmhwm="1331968" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="CPU" vmsize="839823" vmpeak="1569361" vmrss="155029" vmhwm="833157" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="create_exenetwork" device="GPU" vmsize="1363960" vmpeak="2068752" vmrss="1018507" vmhwm="1787042" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="CPU" vmsize="1476041" vmpeak="1569392" vmrss="679918" vmhwm="833914" />
+        <model path="tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml" test="infer_request_inference" device="GPU" vmsize="1904799" vmpeak="2060317" vmrss="1551756" vmhwm="1778167" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="CPU" vmsize="756602" vmpeak="1096774" vmrss="28393" vmhwm="363391" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="create_exenetwork" device="GPU" vmsize="845226" vmpeak="1103374" vmrss="500051" vmhwm="821986" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="CPU" vmsize="1063304" vmpeak="1135393" vmrss="271220" vmhwm="364399" />
+        <model path="tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml" test="infer_request_inference" device="GPU" vmsize="1092159" vmpeak="1105997" vmrss="738276" vmhwm="823983" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="CPU" vmsize="838816" vmpeak="1561762" vmrss="116930" vmhwm="752906" />
+        <model path="tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml" test="create_exenetwork" device="GPU" vmsize="1674490" vmpeak="2318250" vmrss="1329842" vmhwm="2034986" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="CPU" vmsize="755062" vmpeak="880739" vmrss="28415" vmhwm="43480" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="create_exenetwork" device="GPU" vmsize="609298" vmpeak="704044" vmrss="263868" vmhwm="323488" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="CPU" vmsize="825048" vmpeak="897138" vmrss="49108" vmhwm="49108" />
+        <model path="tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml" test="infer_request_inference" device="GPU" vmsize="675844" vmpeak="747934" vmrss="322753" vmhwm="322753" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="CPU" vmsize="756804" vmpeak="978252" vmrss="70514" vmhwm="120370" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="create_exenetwork" device="GPU" vmsize="831318" vmpeak="949744" vmrss="485619" vmhwm="524550" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="CPU" vmsize="925689" vmpeak="997779" vmrss="130244" vmhwm="130244" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml" test="infer_request_inference" device="GPU" vmsize="878099" vmpeak="950188" vmrss="525395" vmhwm="525395" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="CPU" vmsize="759435" vmpeak="1442861" vmrss="34680" vmhwm="509454" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="create_exenetwork" device="GPU" vmsize="1012906" vmpeak="1460487" vmrss="667977" vmhwm="1179833" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="CPU" vmsize="1368043" vmpeak="1442861" vmrss="427737" vmhwm="509533" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml" test="infer_request_inference" device="GPU" vmsize="1542648" vmpeak="1542648" vmrss="1195304" vmhwm="1195304" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="CPU" vmsize="759558" vmpeak="1426185" vmrss="33862" vmhwm="507768" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="create_exenetwork" device="GPU" vmsize="1010358" vmpeak="1414454" vmrss="665451" vmhwm="1133941" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="CPU" vmsize="1350650" vmpeak="1426185" vmrss="421828" vmhwm="509168" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml" test="infer_request_inference" device="GPU" vmsize="1493681" vmpeak="1565770" vmrss="1145416" vmhwm="1145416" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="CPU" vmsize="761433" vmpeak="985784" vmrss="41514" vmhwm="254610" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="create_exenetwork" device="GPU" vmsize="876933" vmpeak="1078919" vmrss="531814" vmhwm="798001" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="CPU" vmsize="1028508" vmpeak="1064698" vmrss="201212" vmhwm="254390" />
+        <model path="tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml" test="infer_request_inference" device="GPU" vmsize="1091807" vmpeak="1163896" vmrss="739525" vmhwm="798023" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="CPU" vmsize="754067" vmpeak="1169247" vmrss="15686" vmhwm="429523" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="create_exenetwork" device="GPU" vmsize="682413" vmpeak="1130109" vmrss="337194" vmhwm="848733" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="CPU" vmsize="1106463" vmpeak="1178553" vmrss="321428" vmhwm="429871" />
+        <model path="tf/1.14.0/FP32/unet2d/unet2d.xml" test="infer_request_inference" device="GPU" vmsize="1083904" vmpeak="1155994" vmrss="730976" vmhwm="845882" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754010" vmpeak="2548502" vmrss="15452" vmhwm="1807863" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="686602" vmpeak="3327385" vmrss="340982" vmhwm="3045398" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2026776" vmpeak="2548502" vmrss="1241011" vmhwm="1808730" />
+        <model path="tf/1.14.0/FP32/vgg16/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2438568" vmpeak="3312188" vmrss="2084328" vmhwm="3029980" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="CPU" vmsize="754168" vmpeak="2617986" vmrss="16073" vmhwm="1877000" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="create_exenetwork" device="GPU" vmsize="612194" vmpeak="3415310" vmrss="266732" vmhwm="3133363" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="CPU" vmsize="2145479" vmpeak="2617885" vmrss="1287272" vmhwm="1877568" />
+        <model path="tf/1.14.0/FP32/vgg19/vgg19.xml" test="infer_request_inference" device="GPU" vmsize="2521367" vmpeak="3415297" vmrss="2167426" vmhwm="3133059" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1426625" vmrss="17173" vmhwm="684173" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="create_exenetwork" device="GPU" vmsize="684424" vmpeak="1460949" vmrss="339600" vmhwm="1180036" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="CPU" vmsize="1282802" vmpeak="1426625" vmrss="493737" vmhwm="684802" />
+        <model path="tf/1.14.0/FP32/yolo_v2/yolo_v2.xml" test="infer_request_inference" device="GPU" vmsize="1331783" vmpeak="1443006" vmrss="978560" vmhwm="1161124" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="CPU" vmsize="753724" vmpeak="954421" vmrss="14414" vmhwm="229578" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="create_exenetwork" device="GPU" vmsize="569179" vmpeak="816648" vmrss="224250" vmhwm="535449" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="CPU" vmsize="960810" vmpeak="960810" vmrss="174231" vmhwm="229807" />
+        <model path="tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml" test="infer_request_inference" device="GPU" vmsize="808627" vmpeak="880717" vmrss="455677" vmhwm="533002" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="CPU" vmsize="754344" vmpeak="1422647" vmrss="17437" vmhwm="680666" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="create_exenetwork" device="GPU" vmsize="686316" vmpeak="1436296" vmrss="340586" vmhwm="1154617" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="CPU" vmsize="1279797" vmpeak="1422616" vmrss="490982" vmhwm="680147" />
+        <model path="tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml" test="infer_request_inference" device="GPU" vmsize="1330780" vmpeak="1442570" vmrss="978392" vmhwm="1161490" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="CPU" vmsize="756958" vmpeak="1587260" vmrss="31108" vmhwm="836506" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="create_exenetwork" device="GPU" vmsize="1163712" vmpeak="1824596" vmrss="819011" vmhwm="1543559" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="CPU" vmsize="1405879" vmpeak="1591766" vmrss="610302" vmhwm="836594" />
+        <model path="tf/1.14.0/FP32/yolo_v3/yolo_v3.xml" test="infer_request_inference" device="GPU" vmsize="1734233" vmpeak="1823470" vmrss="1381925" vmhwm="1542178" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="CPU" vmsize="753975" vmpeak="895633" vmrss="15637" vmhwm="140927" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="create_exenetwork" device="GPU" vmsize="599332" vmpeak="728939" vmrss="254029" vmhwm="412566" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="CPU" vmsize="903469" vmpeak="975559" vmrss="116124" vmhwm="141182" />
+        <model path="tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml" test="infer_request_inference" device="GPU" vmsize="741738" vmpeak="813828" vmrss="389259" vmhwm="413476" />
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..1a50d72
--- /dev/null
+++ b/tests/stress_tests/.automation/memcheck_tests/weekly_configs/desktop_test_config.xml
@@ -0,0 +1,156 @@
+<?xml version="1.0"?>
+<attributes>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>caffe/FP32/caffenet/caffenet.xml</value>
+        <value>caffe/FP32/densenet_121/densenet_121.xml</value>
+        <value>caffe/FP32/densenet_161/densenet_161.xml</value>
+        <value>caffe/FP32/densenet_169/densenet_169.xml</value>
+        <value>caffe/FP32/densenet_201/densenet_201.xml</value>
+        <value>caffe/FP32/dpn_92/dpn_92.xml</value>
+        <value>caffe/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>caffe/FP32/inception_v1/inception_v1.xml</value>
+        <value>caffe/FP32/inception_v2/inception_v2.xml</value>
+        <value>caffe/FP32/inception_v3/inception_v3.xml</value>
+        <value>caffe/FP32/inception_v4/inception_v4.xml</value>
+        <value>caffe/FP32/lenet/lenet.xml</value>
+        <value>caffe/FP32/mobilenet/mobilenet.xml</value>
+        <value>caffe/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>caffe/FP32/resnet_18/resnet_18.xml</value>
+        <value>caffe/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>caffe/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>caffe/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>caffe/FP32/resnet_v1_269/resnet_v1_269.xml</value>
+        <value>caffe/FP32/se_resnext_50/se_resnext_50.xml</value>
+        <value>caffe/FP32/squeezenet_v1.0/squeezenet_v1.0.xml</value>
+        <value>caffe/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>caffe/FP32/ssd_googlenet/ssd_googlenet.xml</value>
+        <value>caffe/FP32/ssd_squeezenet/ssd_squeezenet.xml</value>
+        <value>caffe/FP32/ssd_mobilenet/ssd_mobilenet.xml</value>
+        <value>caffe/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>caffe/FP32/ssd_vgg16_512/ssd_vgg16_512.xml</value>
+        <value>caffe/FP32/vgg16/vgg16.xml</value>
+        <value>caffe/FP32/vgg19/vgg19.xml</value>
+        <value>caffe/FP32/wrn_50_2/wrn_50_2.xml</value>
+        <value>caffe/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>caffe/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>caffe/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>caffe/FP32/yolo_v2_tiny/yolo_v2_tiny.xml</value>
+        <value>caffe/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>caffe/FP32/dilation/dilation.xml</value>
+        <value>caffe/FP32/dssd/dssd.xml</value>
+        <value>caffe/FP32/fcn8/fcn8.xml</value>
+        <value>caffe/FP32/fcn32/fcn32.xml</value>
+        <value>caffe/FP32/fcn_alexnet/fcn_alexnet.xml</value>
+        <value>caffe/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>caffe/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>caffe/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>caffe/FP32/openpose_face/openpose_face.xml</value>
+        <value>caffe/FP32/openpose_hand/openpose_hand.xml</value>
+        <value>caffe/FP32/openpose_pose_coco/openpose_pose_coco.xml</value>
+        <value>caffe/FP32/places205_alexnet/places205_alexnet.xml</value>
+        <value>caffe/FP32/places205_googlenet/places205_googlenet.xml</value>
+        <value>caffe/FP32/se_bn_inception/se_bn_inception.xml</value>
+        <value>caffe/FP32/vnect/vnect.xml</value>
+        <value>tf/1.14.0/FP32/bert_base_uncased/bert_base_uncased.xml</value>
+        <value>tf/1.14.0/FP32/bert_xnli/bert_xnli.xml</value>
+        <value>tf/1.14.0/FP32/cmu/cmu.xml</value>
+        <value>tf/1.14.0/FP32/densenet_121/densenet_121.xml</value>
+        <value>tf/1.14.0/FP32/densenet_169/densenet_169.xml</value>
+        <value>tf/1.14.0/FP32/deeplab_v3/deeplab_v3.xml</value>
+        <value>tf/1.14.0/FP32/east/east.xml</value>
+        <value>tf/1.14.0/FP32/facenet/facenet.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_v2_coco/faster_rcnn_inception_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_inception_resnet_v2_atrous_coco/faster_rcnn_inception_resnet_v2_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet50_coco/faster_rcnn_resnet50_coco.xml</value>
+        <value>tf/1.14.0/FP32/faster_rcnn_resnet101_coco/faster_rcnn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/gnmt/gnmt.xml</value>
+        <value>tf/1.14.0/FP32/i3d_rgb/i3d_rgb.xml</value>
+        <value>tf/1.14.0/FP32/inception_v1/inception_v1.xml</value>
+        <value>tf/1.14.0/FP32/inception_v2/inception_v2.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/inception_v4/inception_v4.xml</value>
+        <value>tf/1.14.0/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>tf/1.14.0/FP32/mask_rcnn_resnet101_atrous_coco/mask_rcnn_resnet101_atrous_coco.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.25_128/mobilenet_v1_0.25_128.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_0.5_160/mobilenet_v1_0.5_160.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v1_1.0_224/mobilenet_v1_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.xml</value>
+        <value>tf/1.14.0/FP32/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.xml</value>
+        <value>tf/1.14.0/FP32/ncf/ncf.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_large/nasnet-a_large.xml</value>
+        <value>tf/1.14.0/FP32/nasnet-a_mobile/nasnet-a_mobile.xml</value>
+        <value>tf/1.14.0/FP32/pnasnet-5_large/pnasnet-5_large.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_50/resnet_v1_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1.5_50/resnet_v1.5_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_50/resnet_v2_50.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>tf/1.14.0/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>tf/1.14.0/FP32/rfcn_resnet101_coco/rfcn_resnet101_coco.xml</value>
+        <value>tf/1.14.0/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco/ssd_mobilenet_v1_fpn_coco.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_fpn_coco_602x602/ssd_mobilenet_v1_fpn_coco_602x602.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v2_coco/ssd_mobilenet_v2_coco.xml</value>
+        <value>tf/1.14.0/FP32/unet2d/unet2d.xml</value>
+        <value>tf/1.14.0/FP32/vgg16/vgg16.xml</value>
+        <value>tf/1.14.0/FP32/vgg19/vgg19.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2/yolo_v2.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_voc/yolo_v2_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v2_tiny_voc/yolo_v2_tiny_voc.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3/yolo_v3.xml</value>
+        <value>tf/1.14.0/FP32/yolo_v3_tiny/yolo_v3_tiny.xml</value>
+        <value>tf/1.14.0/FP32/dssd_avigilon/dssd_avigilon.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.0/icv_squeezenet_v1.0.xml</value>
+        <value>tf/1.14.0/FP32/icv_squeezenet_v1.1/icv_squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/caffenet/caffenet.xml</value>
+        <value>mxnet/FP32/densenet_121/densenet_121.xml</value>
+        <value>mxnet/FP32/densenet_161/densenet_161.xml</value>
+        <value>mxnet/FP32/densenet_169/densenet_169.xml</value>
+        <value>mxnet/FP32/densenet_201/densenet_201.xml</value>
+        <value>mxnet/FP32/inception_v3/inception_v3.xml</value>
+        <value>mxnet/FP32/inception_v4/inception_v4.xml</value>
+        <value>mxnet/FP32/mobilenet/mobilenet.xml</value>
+        <value>mxnet/FP32/mobilenet_v2/mobilenet_v2.xml</value>
+        <value>mxnet/FP32/resnet_v1_101/resnet_v1_101.xml</value>
+        <value>mxnet/FP32/resnet_v1_152/resnet_v1_152.xml</value>
+        <value>mxnet/FP32/resnet_v2_101/resnet_v2_101.xml</value>
+        <value>mxnet/FP32/resnet_v2_152/resnet_v2_152.xml</value>
+        <value>mxnet/FP32/resnext_101/resnext_101.xml</value>
+        <value>mxnet/FP32/squeezenet_v1.1/squeezenet_v1.1.xml</value>
+        <value>mxnet/FP32/ssd_inception_v3_512/ssd_inception_v3_512.xml</value>
+        <value>mxnet/FP32/ssd_mobilenet_512/ssd_mobilenet_512.xml</value>
+        <value>mxnet/FP32/ssd_resnet50_512/ssd_resnet50_512.xml</value>
+        <value>mxnet/FP32/ssd_vgg16_300/ssd_vgg16_300.xml</value>
+        <value>mxnet/FP32/vgg16/vgg16.xml</value>
+        <value>mxnet/FP32/vgg19/vgg19.xml</value>
+        <value>mxnet/FP32/dpn_92/dpn_92.xml</value>
+        <value>mxnet/FP32/fcn8s_vgg16/fcn8s_vgg16.xml</value>
+        <value>mxnet/FP32/full_imagenet_network/full_imagenet_network.xml</value>
+        <value>mxnet/FP32/inception_resnet_v2/inception_resnet_v2.xml</value>
+        <value>mxnet/FP32/inception_v3_no_batchnorm/inception_v3_no_batchnorm.xml</value>
+        <value>mxnet/FP32/location_net/location_net.xml</value>
+        <value>mxnet/FP32/lresnet100e/lresnet100e.xml</value>
+        <value>mxnet/FP32/mtcnn_p/mtcnn_p.xml</value>
+        <value>mxnet/FP32/mtcnn_r/mtcnn_r.xml</value>
+        <value>mxnet/FP32/mtcnn_o/mtcnn_o.xml</value>
+        <value>mxnet/FP32/nin/nin.xml</value>
+        <value>mxnet/FP32/nst_vgg19/nst_vgg19.xml</value>
+        <value>mxnet/FP32/resnext_101_64x4d/resnext_101_64x4d.xml</value>
+        <value>mxnet/FP32/yolo_v1_full/yolo_v1_full.xml</value>
+        <value>mxnet/FP32/yolo_v1_tiny/yolo_v1_tiny.xml</value>
+        <value>onnx/FP32/ssd_resnet34/ssd_resnet34.xml</value>
+        <value>onnx/FP32/ssd_resnet34_new/ssd_resnet34_new.xml</value>
+        <value>onnx/FP32/retina_net/retina_net.xml</value>
+        <value>pytorch/FP32/inceptionv3_pretrained/inceptionv3_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_pretrained/resnet50_pretrained.xml</value>
+        <value>pytorch/FP32/squeezenet_v1.1_pretrained/squeezenet_v1.1_pretrained.xml</value>
+        <value>pytorch/FP32/resnet50_torchvision/resnet50_torchvision.xml</value>
+    </models>
+</attributes>
+\ No newline at end of file
diff --git a/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..12e95b4
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/nightly_configs/desktop_test_config.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<attributes>
+<!--[ WARNING ] Use of attribute "processes" from config isn't implemented yet. It will be ignored.-->
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>1000</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..74ddd50
--- /dev/null
+++ b/tests/stress_tests/.automation/memleaks_tests/weekly_configs/desktop_test_config.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<attributes>
+<!--[ WARNING ] Use of attribute "processes" from config isn't implemented yet. It will be ignored.-->
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>10000</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..0fb2428
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/nightly_configs/desktop_test_config.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+        <value>2</value>
+    </threads>
+    <iterations>
+        <value>100</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml

new file mode 100644 (file)

index 0000000..6c94f40
--- /dev/null
+++ b/tests/stress_tests/.automation/unittests/weekly_configs/desktop_test_config.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<attributes>
+    <processes>
+        <value>1</value>
+        <value>2</value>
+    </processes>
+    <threads>
+        <value>1</value>
+        <value>2</value>
+    </threads>
+    <iterations>
+        <value>1000</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+        <value>tf/1.14.0/FP32/inception_v3/inception_v3.xml</value>
+        <value>tf/1.14.0/FP32/ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/CMakeLists.txt b/tests/stress_tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..350abba
--- /dev/null
+++ b/tests/stress_tests/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(ENABLE_DOCKER)
+    cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
+else()
+    if (APPLE)
+        # due to https://cmake.org/cmake/help/v3.12/policy/CMP0068.html
+        cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+    else()
+        cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR)
+    endif()
+endif()
+
+if (CMAKE_BUILD_TYPE STREQUAL "")
+    message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used")
+    set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+find_package(InferenceEngineDeveloperPackage REQUIRED)
+
+add_subdirectory(unittests)
+add_subdirectory(memleaks_tests)
+add_subdirectory(memcheck_tests)
diff --git a/tests/stress_tests/README.md b/tests/stress_tests/README.md

new file mode 100644 (file)

index 0000000..509bd53
--- /dev/null
+++ b/tests/stress_tests/README.md
@@ -0,0 +1,86 @@
+# Stress Tests Suite
+
+This test suite contains tests evaluating the behavior of various OpenVINO use
+cases under stress conditions:
+
+- MemCheckTests measuring memory required for the use cases and fail when memory
+usage exceeds a pre-defined level.
+
+- StressMemLeaksTests ensure that the use cases does not increase memory levels
+when executing continuously.
+
+- StressUnitTests executing various Inference Engine use cases in parallel
+threads and processes.
+
+Each test refers to configuration files located in `<test dir>\local_configs`
+folder. The configuration files are installed along with tests on build time.
+
+## Getting Started
+
+Stress tests are based on the googletest framework. You can filter tests with
+`--gtest_filter` and explore tests available with `--gtest_list_tests` options.
+
+Tests measuring memory have a temporary limitation - those need to be executed
+one at a time to mitigate memory statistics pollution. You can use
+[gtest-parallel][gtest-parallel] for massive tests execution.
+
+### Pre-requisites
+
+- Linux OS to build the tests.
+
+- [gtest-parallel][gtest-parallel] to execute tests.
+
+### Building Tests
+
+Stress tests should be built in 2 steps.
+
+1. Build `dldt`
+
+Build `dldt` as usual but with `-DENABLE_TESTS=ON`.
+
+2. Build `stress_tests`
+
+Stress tests depend from the Inference Engine Developer Package located in the
+`dldt` build directory.
+
+In the command line snippet bellow, it is assumed that the Inference Engine
+Developer Package CMake module can be found in the directory `build` under
+`dldt` repository root.
+
+``` bash
+(
+export DLDT_BUILD_DIR=$(git rev-parse --show-toplevel)/build
+mkdir -p build && cd build && \
+cmake -DInferenceEngineDeveloperPackage_DIR=$DLDT_BUILD_DIR .. && make -j$(nproc) \
+)
+```
+
+### Preparing Test Data
+
+Stress test use models from [Open Model Zoo][open_model_zoo]. Download and
+convert models to IRs using `./scripts/get_testdata.py` script.
+
+From Intel network you can use models from cache at `vdp_tests` file share.
+Refer to [VDP shared folders][VDP-shared-folders] on using file shares.
+
+### Running Tests
+
+``` bash
+gtest-parallel ./MemCheckTests
+```
+
+``` bash
+gtest-parallel ./StressMemLeaksTests
+```
+MemCheckTests logs can be used to gather reference values based on current
+memory consumption:
+
+``` bash
+mkdir -p MemCheckTests-logs && \
+gtest-parallel -d ./MemCheckTests-logs ./MemCheckTests && \
+grep -rh ./MemCheckTests-logs -e ".*<model " | sed -e "s/.*<model /<model /" | sort
+```
+
+[VDP-shared-folders]: https://wiki.ith.intel.com/display/DLSDK/VDP+shared+folders
+[gtest-parallel]: https://github.com/google/gtest-parallel
+[open_model_zoo]: https://github.com/opencv/open_model_zoo
+\ No newline at end of file
diff --git a/tests/stress_tests/common/ie_pipelines/pipelines.cpp b/tests/stress_tests/common/ie_pipelines/pipelines.cpp

new file mode 100644 (file)

index 0000000..c4d5e42
--- /dev/null
+++ b/tests/stress_tests/common/ie_pipelines/pipelines.cpp
@@ -0,0 +1,136 @@
+#include "pipelines.h"
+#include "../utils.h"
+
+#include <iostream>
+#include <string>
+
+#include <inference_engine.hpp>
+
+using namespace InferenceEngine;
+
+std::function<void()> load_unload_plugin(const std::string &target_device) {
+    return [&] {
+        Core ie;
+        // GetVersions silently register plugin in `plugins` through `GetCPPPluginByName`
+        ie.GetVersions(target_device);
+        // Remove plugin for target_device from `plugins`
+        ie.UnregisterPlugin(target_device);
+    };
+}
+
+std::function<void()> read_network(const std::string &model) {
+    return [&] {
+        IE_SUPPRESS_DEPRECATED_START
+        CNNNetReader netReader;
+        netReader.ReadNetwork(model);
+        netReader.ReadWeights(fileNameNoExt(model) + ".bin");
+        IE_SUPPRESS_DEPRECATED_END
+    };
+}
+
+std::function<void()> create_cnnnetwork(const std::string &model) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    };
+}
+
+std::function<void()> cnnnetwork_reshape_batch_x2(const std::string &model) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        const InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+        ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+        bool doReshape = false;
+        for (const InputsDataMap::value_type& input : inputInfo) {
+            int batchIndex = -1;
+            auto layout = input.second->getTensorDesc().getLayout();
+            if ((layout == Layout::NCHW) || (layout == Layout::NCDHW) ||
+                (layout == Layout::NHWC) || (layout == Layout::NDHWC) ||
+                (layout == Layout::NC)) {
+                batchIndex = 0;
+            } else if (layout == CN) {
+                batchIndex = 1;
+            }
+            if (batchIndex != -1) {
+                shapes[input.first][batchIndex] *= 2;
+                doReshape = true;
+            }
+        }
+        if (doReshape)
+            cnnNetwork.reshape(shapes);
+        else
+            throw std::logic_error("Reshape wasn't applied for a model.");
+    };
+}
+
+std::function<void()> set_input_params(const std::string &model) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+        for (auto &input : inputInfo) {
+            input.second->getPreProcess().setResizeAlgorithm(NO_RESIZE);
+            input.second->setPrecision(Precision::U8);
+            if (input.second->getInputData()->getTensorDesc().getDims().size() == 4)
+                input.second->setLayout(Layout::NCHW);
+            else if (input.second->getInputData()->getTensorDesc().getDims().size() == 2)
+                input.second->setLayout(Layout::NC);
+            else
+                throw std::logic_error("Setting of input parameters wasn't applied for a model.");
+        }
+    };
+}
+
+std::function<void()> create_exenetwork(const std::string &model, const std::string &target_device) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    };
+}
+
+std::function<void()> recreate_exenetwork(Core &ie, const std::string &model, const std::string &target_device) {
+    return [&] {
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    };
+}
+
+std::function<void()> create_infer_request(const std::string &model, const std::string &target_device) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+        InferRequest infer_request = exeNetwork.CreateInferRequest();
+    };
+}
+
+
+std::function<void()> recreate_infer_request(InferenceEngine::ExecutableNetwork& exeNetwork) {
+    return [&] {
+        InferRequest infer_request = exeNetwork.CreateInferRequest();
+    };
+}
+
+std::function<void()> infer_request_inference(const std::string &model, const std::string &target_device) {
+    return [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+        InferRequest infer_request = exeNetwork.CreateInferRequest();
+        infer_request.Infer();
+        OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+        for (auto &output : output_info)
+            Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+    };
+}
+
+std::function<void()> reinfer_request_inference(InferenceEngine::InferRequest& infer_request, InferenceEngine::CNNNetwork& cnnNetwork) {
+    return [&] {
+        infer_request.Infer();
+        OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+        for (auto &output : output_info)
+            Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+    };
+}
diff --git a/tests/stress_tests/common/ie_pipelines/pipelines.h b/tests/stress_tests/common/ie_pipelines/pipelines.h

new file mode 100644 (file)

index 0000000..aaac1c3
--- /dev/null
+++ b/tests/stress_tests/common/ie_pipelines/pipelines.h
@@ -0,0 +1,16 @@
+#include <string>
+#include <functional>
+#include <inference_engine.hpp>
+
+std::function<void()> load_unload_plugin(const std::string &target_device);
+std::function<void()> read_network(const std::string &model);
+std::function<void()> create_cnnnetwork(const std::string &model);
+std::function<void()> cnnnetwork_reshape_batch_x2(const std::string &model);
+std::function<void()> set_input_params(const std::string &model);
+std::function<void()> create_exenetwork(const std::string &model, const std::string &target_device);
+std::function<void()> recreate_exenetwork(InferenceEngine::Core &ie, const std::string &model, const std::string &target_device);
+std::function<void()> create_infer_request(const std::string &model, const std::string &target_device);
+std::function<void()> recreate_infer_request(InferenceEngine::ExecutableNetwork& exeNetwork);
+std::function<void()> infer_request_inference(const std::string &model, const std::string &target_device);
+std::function<void()> infer_request_inference(const std::string &model, const std::string &target_device);
+std::function<void()> reinfer_request_inference(InferenceEngine::InferRequest& infer_request, InferenceEngine::CNNNetwork& cnnNetwork);
diff --git a/tests/stress_tests/common/managers/task_manager.h b/tests/stress_tests/common/managers/task_manager.h

new file mode 100644 (file)

index 0000000..797432e
--- /dev/null
+++ b/tests/stress_tests/common/managers/task_manager.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <iostream>
+#include <vector>
+#include <functional>
+
+#include "../tests_utils.h"
+
+enum ManagerStatus {
+    NOT_STARTED = -2,
+    NOT_FINISHED = -1,
+    FINISHED_SUCCESSFULLY = 0,
+    FINISHED_UNEXPECTEDLY
+};
+
+template<typename Type>
+using Task = std::pair<ManagerStatus, std::function<Type()>>;
+
+template<typename Type>
+class TaskManager {
+public:
+    std::vector<Task<Type>> tasks;
+    std::vector<Type> tasks_results;
+
+    TaskManager() {}
+
+    TaskManager(const std::initializer_list<std::function<Type()>> &tasks_list) {
+        tasks.reserve(tasks_list.size());
+        for (const auto &task : tasks_list)
+            add_task(task);
+    }
+
+    void add_task(const std::function<Type()> &task) {
+        auto _task = Task<Type>(ManagerStatus::NOT_STARTED, task);
+        tasks.push_back(_task);
+    }
+
+    void run_sequentially() {
+        // TODO: make it asynchronous
+        tasks_results.reserve(tasks.size());
+        for (auto task : tasks) {
+            task.first = ManagerStatus::NOT_FINISHED;
+            tasks_results.push_back(task.second());
+        }
+    }
+
+    void run_parallel_n_wait() {
+        run_parallel();
+        wait_all();
+    }
+
+    void wait_all() {
+        int numtasks = tasks.size();
+        for (int i = 0; i < numtasks; i++)
+            if (tasks[i].first == ManagerStatus::NOT_FINISHED)
+                wait_task(i);
+    }
+
+    std::vector<ManagerStatus> get_all_statuses() {
+        std::vector<ManagerStatus> statuses;
+
+        int numtasks = tasks.size();
+        for (int i = 0; i < numtasks; i++)
+            statuses.push_back(get_task_status(i));
+        return statuses;
+    }
+
+    std::vector<TestResult> get_all_results() {
+        return tasks_results;
+    }
+
+    TestResult get_task_result(int task_index) {
+        if (tasks_results.empty() ||
+            tasks_results.size() < task_index ||
+            task_index < 0)
+            throw std::out_of_range("Task index " + std::to_string(task_index) + " out of number of tasks");
+
+        return tasks_results[task_index];
+    }
+
+    virtual void run_parallel() = 0;
+
+    virtual void wait_task(int task_index) = 0; // TODO: implement for run_sequentially
+
+    virtual ManagerStatus get_task_status(int task_index) = 0;
+
+};
+\ No newline at end of file
diff --git a/tests/stress_tests/common/managers/thread_manager.h b/tests/stress_tests/common/managers/thread_manager.h

new file mode 100644 (file)

index 0000000..9157899
--- /dev/null
+++ b/tests/stress_tests/common/managers/thread_manager.h
@@ -0,0 +1,51 @@
+#include "task_manager.h"
+
+#include <future>
+
+template <typename Type>
+class ThreadManager : public TaskManager<Type> {
+public:
+    using TaskManager<Type>::tasks;
+    using TaskManager<Type>::tasks_results;
+    std::vector<std::future<TestResult>> threads;
+
+    using TaskManager<Type>::TaskManager;
+
+    void run_parallel() final {
+        // TODO: implement run_task function according to wait_task
+        int numtasks = tasks.size();
+        threads.reserve(numtasks);
+        tasks_results.reserve(numtasks);
+
+        for (int i = 0; i < numtasks; i++)
+            if (tasks[i].first == ManagerStatus::NOT_STARTED) {
+                tasks[i].first = ManagerStatus::NOT_FINISHED;
+                threads.push_back(std::async(std::launch::async, tasks[i].second));
+            }
+    }
+
+    void wait_task(int task_index) final {
+        if (threads.empty() ||
+            threads.size() < task_index ||
+            task_index < 0)
+            throw std::out_of_range("Task index " + std::to_string(task_index) + " out of number of tasks");
+
+        try {
+            tasks_results.push_back(threads[task_index].get());
+            tasks[task_index].first = ManagerStatus::FINISHED_SUCCESSFULLY;
+        } catch (std::exception &err) { // TODO: catch any exception
+            std::exception_ptr p = std::current_exception();
+            tasks[task_index].first = ManagerStatus::FINISHED_UNEXPECTEDLY;
+            tasks_results.push_back(TestResult(TestStatus::TEST_FAILED, "Test finished unexpectedly: " + (std::string)err.what()));
+        }
+    }
+
+    ManagerStatus get_task_status(int task_index) final {
+        if (threads.empty() ||
+            threads.size() < task_index ||
+            task_index < 0)
+            throw std::out_of_range("Task index " + std::to_string(task_index) + " out of number of tasks");
+
+        return tasks[task_index].first;
+    }
+};
+\ No newline at end of file
diff --git a/tests/stress_tests/common/tests_utils.cpp b/tests/stress_tests/common/tests_utils.cpp

new file mode 100644 (file)

index 0000000..ed91e0f
--- /dev/null
+++ b/tests/stress_tests/common/tests_utils.cpp
@@ -0,0 +1,99 @@
+#include "tests_utils.h"
+
+#include <gtest/gtest.h>
+#include <pugixml.hpp>
+#include <string>
+
+#define DEBUG_MODE false
+
+const pugi::xml_document & Environment::getTestConfig() {
+    return _test_config;
+}
+
+void Environment::setTestConfig(const pugi::xml_document &test_config) {
+    _test_config.reset(test_config);
+}
+
+const pugi::xml_document & Environment::getEnvConfig() {
+    return _env_config;
+}
+
+void Environment::setEnvConfig(const pugi::xml_document &env_config) {
+    _env_config.reset(env_config);
+}
+
+std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> fields) {
+    std::vector<TestCase> tests_cases;
+    const pugi::xml_document & test_config = Environment::Instance().getTestConfig();
+    std::string models_path = Environment::Instance().getEnvConfig()
+            .child("attributes").child("irs_path").child("value").text().as_string();
+
+    std::vector<int> processes;
+    std::vector<int> threads;
+    std::vector<int> iterations;
+    std::vector<std::string> devices;
+    std::vector<std::string> models;
+
+    pugi::xml_node values;
+    for (auto field = fields.begin(); field != fields.end(); field++) {
+        if (*field == "processes") {
+            values = test_config.child("attributes").child("processes");
+            for (pugi::xml_node val = values.first_child(); val; val = val.next_sibling())
+                processes.push_back(val.text().as_int());
+        } else if (*field == "threads") {
+            values = test_config.child("attributes").child("threads");
+            for (pugi::xml_node val = values.first_child(); val; val = val.next_sibling())
+                threads.push_back(val.text().as_int());
+        } else if (*field == "iterations") {
+            values = test_config.child("attributes").child("iterations");
+            for (pugi::xml_node val = values.first_child(); val; val = val.next_sibling())
+                iterations.push_back(val.text().as_int());
+        } else if (*field == "devices") {
+            values = test_config.child("attributes").child("devices");
+            for (pugi::xml_node val = values.first_child(); val; val = val.next_sibling())
+                devices.push_back(val.text().as_string());
+        } else if (*field == "models") {
+            values = test_config.child("attributes").child("models");
+            for (pugi::xml_node val = values.first_child(); val; val = val.next_sibling())
+                models.push_back(val.text().as_string());
+        }
+    }
+
+    // Initialize variables with default value if it weren't filled
+    processes = !processes.empty() ? processes: std::vector<int>{1};
+    threads = !threads.empty() ? threads: std::vector<int>{1};
+    iterations = !iterations.empty() ? iterations: std::vector<int>{1};
+    devices = !devices.empty() ? devices : std::vector<std::string>{"NULL"};
+    models = !models.empty() ? models : std::vector<std::string>{"NULL"};
+
+    for (auto &numprocesses : processes)
+        for (auto &numthreads : threads)
+            for (auto &numiters : iterations)
+                for (auto &device : devices)
+                    for (auto &model : models)
+                        tests_cases.push_back(TestCase(numprocesses, numthreads, numiters, device, OS_PATH_JOIN({models_path, model}), model));
+
+    return tests_cases;
+}
+
+std::string getTestCaseName(const testing::TestParamInfo<TestCase> &obj) {
+    return obj.param.test_case_name;
+}
+
+void test_wrapper(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params) {
+    tests_pipeline(params.model, params.device, params.numiters);
+}
+
+void _runTest(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params) {
+    run_in_threads(params.numthreads, test_wrapper, tests_pipeline, params);
+}
+
+void runTest(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params) {
+#if DEBUG_MODE
+    tests_pipeline(params.model, params.device, params.numiters);
+#else
+    int status = run_in_processes(params.numprocesses, _runTest, tests_pipeline, params);
+    ASSERT_EQ(status, 0) << "Test failed with exitcode " << std::to_string(status);
+#endif
+}
+
diff --git a/tests/stress_tests/common/tests_utils.h b/tests/stress_tests/common/tests_utils.h

new file mode 100644 (file)

index 0000000..d2acf0b
--- /dev/null
+++ b/tests/stress_tests/common/tests_utils.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "utils.h"
+
+#include <gtest/gtest.h>
+#include <pugixml.hpp>
+#include <string>
+#include <vector>
+#include <thread>
+#include <unistd.h>
+#include <sys/wait.h>
+
+enum TestStatus
+{
+    TEST_NOT_STARTED = 0,
+    TEST_FAILED,
+    TEST_OK
+};
+
+using TestResult = std::pair<TestStatus, std::string>;
+
+class TestCase {
+public:
+    int numprocesses;
+    int numthreads;
+    int numiters;
+    std::string device;
+    std::string model_name;
+    std::string model;
+    std::string test_case_name;
+
+    TestCase(int _numprocesses, int _numthreads, int _numiters, std::string _device, const std::string& _model, const std::string& _model_name) {
+        numprocesses = _numprocesses, numthreads = _numthreads, numiters = _numiters, device = _device, model = _model, model_name = _model_name;
+        test_case_name =
+                "Numprocesses_" + std::to_string(numprocesses) + "_Numthreads_" + std::to_string(numthreads) +
+                "_Numiters_" + std::to_string(numiters) + "_Device_" + update_item_for_name(device) + "_Model_" + 
+                update_item_for_name(model_name);
+    }
+
+private:
+    std::string update_item_for_name(const std::string &item) {
+        std::string _item(item);
+        for (std::string::size_type index = 0; index < _item.size(); ++index) {
+            if (!isalnum(_item[index]) && _item[index] != '_')
+                _item[index] = '_';
+        }
+        return _item;
+    }
+};
+
+class Environment {
+private:
+    pugi::xml_document _test_config;
+    pugi::xml_document _env_config;
+    Environment() = default;
+    Environment(const Environment&) = delete;
+    Environment& operator=(const Environment&) = delete;
+public:
+    static Environment& Instance(){
+        static Environment env;
+        return env;
+    }
+
+    const pugi::xml_document & getTestConfig();
+    void setTestConfig(const pugi::xml_document &test_config);
+    const pugi::xml_document & getEnvConfig();
+    void setEnvConfig(const pugi::xml_document &env_config);
+};
+
+std::vector<TestCase> generateTestsParams(std::initializer_list<std::string> items);
+std::string getTestCaseName(const testing::TestParamInfo<TestCase> &obj);
+
+void runTest(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params);
+void _runTest(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params);
+void test_wrapper(const std::function<void(std::string, std::string, int)> &tests_pipeline, const TestCase &params);
diff --git a/tests/stress_tests/common/utils.cpp b/tests/stress_tests/common/utils.cpp

new file mode 100644 (file)

index 0000000..607c3f7
--- /dev/null
+++ b/tests/stress_tests/common/utils.cpp
@@ -0,0 +1,64 @@
+#include "utils.h"
+
+#include <string>
+#include <string.h>
+
+std::string OS_PATH_JOIN(std::initializer_list<std::string> list) {
+    if (!list.size())
+        return "";
+    std::string res = *list.begin();
+    for (auto it = list.begin() + 1; it != list.end(); it++) {
+        res += OS_SEP + *it;
+    }
+    return res;
+}
+
+std::string fileNameNoExt(const std::string &filepath) {
+    auto pos = filepath.rfind('.');
+    if (pos == std::string::npos) return filepath;
+    return filepath.substr(0, pos);
+}
+
+
+static size_t parseLine(char* line) {
+    // This assumes that a digit will be found and the line ends in " Kb".
+    size_t i = strlen(line);
+    const char* p = line;
+    while (*p <'0' || *p > '9') p++;
+    line[i-3] = '\0';
+    i = (size_t)atoi(p);
+    return i;
+}
+
+#ifdef _WIN32
+size_t getVmSizeInKB() {
+                // TODO rewrite for Virtual Memory
+                PROCESS_MEMORY_COUNTERS pmc;
+                pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS);
+                GetProcessMemoryInfo(GetCurrentProcess(),&pmc, pmc.cb);
+                return pmc.WorkingSetSize;
+           }
+#else
+size_t getVirtualMemoryInKB(char *name){
+    FILE* file = fopen("/proc/self/status", "r");
+    size_t result = 0;
+    if (file != nullptr) {
+        char line[128];
+
+        while (fgets(line, 128, file) != NULL) {
+            if (strncmp(line, name, strlen(name)) == 0) {
+                result = parseLine(line);
+                break;
+            }
+        }
+        fclose(file);
+    }
+    return result;
+}
+
+size_t getVmSizeInKB() {return getVirtualMemoryInKB((char*) "VmSize:");}
+size_t getVmPeakInKB() {return getVirtualMemoryInKB((char*) "VmPeak:");}
+size_t getVmRSSInKB() {return getVirtualMemoryInKB((char*) "VmRSS:");}
+size_t getVmHWMInKB() {return getVirtualMemoryInKB((char*) "VmHWM:");}
+
+#endif
diff --git a/tests/stress_tests/common/utils.h b/tests/stress_tests/common/utils.h

new file mode 100644 (file)

index 0000000..7e82d12
--- /dev/null
+++ b/tests/stress_tests/common/utils.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <thread>
+#include <functional>
+#include <sys/unistd.h>
+#include <sys/wait.h>
+
+#ifdef _WIN32
+#define OS_SEP std::string("\\")
+#else
+#define OS_SEP std::string("/")
+#endif
+
+
+#define log_info(str) std::cout << "[ INFO ] " << str << std::endl
+#define log_warn(str) std::cout << "[ WARNING ] " << str << std::endl
+#define log_err(str) std::cout << "[ ERROR ] " << str << std::endl
+#define log_debug(str) std::cout << "[ DEBUG ] " << str << std::endl
+
+std::string OS_PATH_JOIN(std::initializer_list<std::string> list);
+
+std::string fileNameNoExt(const std::string &filepath);
+
+#define getVmValues(vmsize, vmpeak, vmrss, vmhwm) vmsize = (long) getVmSizeInKB();    \
+                                                  vmpeak = (long) getVmPeakInKB();    \
+                                                  vmrss = (long) getVmRSSInKB();      \
+                                                  vmhwm = (long) getVmHWMInKB();
+
+size_t getVmSizeInKB();
+size_t getVmPeakInKB();
+size_t getVmRSSInKB();
+size_t getVmHWMInKB();
+
+template<typename Function, typename ... Args>
+int run_in_processes(const int &numprocesses, Function const &function, Args ... args) {
+    std::vector<pid_t> child_pids(numprocesses);
+
+    for (int i = 0; i < numprocesses; i++) {
+        child_pids[i] = fork();
+        if (child_pids[i] == 0) {
+            function(args...);
+            exit(EXIT_SUCCESS);
+        }
+    }
+
+    int status = 0;
+    for (int i = 0; i < numprocesses; i++) {
+        int _status = 0;
+        waitpid(child_pids[i], &_status, WSTOPPED);
+        if (_status) {
+            log_err("Process run # " << i << " failed with exitcode " << _status);
+            status = _status;
+        }
+    }
+    return status;
+}
+
+template<typename Function, typename ... Args>
+inline void run_in_threads(const int &numthreads, Function const &function, Args ... args) {
+    std::vector<std::thread> v(numthreads);
+    for (int thr_i = 0; thr_i < numthreads; thr_i++) {
+        v[thr_i] = std::thread(function, args...);
+    }
+
+    for (int thr_i = 0; thr_i < numthreads; thr_i++) {
+        v[thr_i].join();
+    }
+    v.clear();
+}
diff --git a/tests/stress_tests/memcheck_tests/CMakeLists.txt b/tests/stress_tests/memcheck_tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..cb7c630
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "MemCheckTests")
+
+file (GLOB SRC
+        ../common/*.cpp
+        ../common/ie_pipelines/*.cpp
+        *.cpp
+        tests_pipelines/*.cpp)
+
+file (GLOB HDR
+        ../common/*.h
+        ../common/ie_pipelines/*.h
+        *.h
+        tests_pipelines/*.h)
+
+# Create library file from sources.
+add_executable(${TARGET_NAME} ${HDR} ${SRC})
+
+find_package(gflags REQUIRED)
+
+target_link_libraries(${TARGET_NAME}
+        IE::gtest
+        IE::gtest_main
+        IE::pugixml
+        gflags
+        ${InferenceEngine_LIBRARIES}
+        )
+
+target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
+        "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src")
+
+# Copy local configs to BIN_FOLDER
+configure_file(local_configs/test_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/memcheck_tests/test_config.xml COPYONLY)
+configure_file(local_configs/env_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/memcheck_tests/env_config.xml COPYONLY)
+configure_file(local_configs/references_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/memcheck_tests/references_config.xml COPYONLY)
diff --git a/tests/stress_tests/memcheck_tests/flags.h b/tests/stress_tests/memcheck_tests/flags.h

new file mode 100644 (file)

index 0000000..9bd09da
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/flags.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "../common/utils.h"
+
+#include <gflags/gflags.h>
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Declare flag for showing help message <br>
+DECLARE_bool(help);
+
+/// @brief message for test_config argument
+static const char test_conf_message[] = "Optional. Path to a test config with description about number of threads, iterations etc.";
+
+/// @brief Define parameter for set test's configuration <br>
+/// test_conf is an optional parameter
+DEFINE_string(test_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "test_config.xml"}), test_conf_message);
+
+/// @brief message for env_config argument
+static const char env_conf_message[] = "Optional. Path to an env config with paths to models etc.";
+
+/// @brief Define parameter for set environment <br>
+/// env_conf is an optional parameter
+DEFINE_string(env_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "env_config.xml"}), env_conf_message);
+
+/// @brief message for env_config argument
+static const char refs_conf_message[] = "Optional. Path to a references config with values of memory consumption per test.";
+
+/// @brief Define parameter for set references' configuration <br>
+/// refs_conf is an optional parameter
+DEFINE_string(refs_conf, OS_PATH_JOIN({"stress_tests_configs", "memcheck_tests", "references_config.xml"}), refs_conf_message);
+\ No newline at end of file
diff --git a/tests/stress_tests/memcheck_tests/local_configs/env_config.xml b/tests/stress_tests/memcheck_tests/local_configs/env_config.xml

new file mode 100644 (file)

index 0000000..ffcac86
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/local_configs/env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/open_model_zoo/efd238d02035f8a5417b7b1e25cd4c997d44351f/IRs</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/memcheck_tests/local_configs/references_config.xml b/tests/stress_tests/memcheck_tests/local_configs/references_config.xml

new file mode 100644 (file)

index 0000000..35b701b
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/local_configs/references_config.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<attributes>
+    <models>
+<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="CPU" vmsize="757218" vmpeak="901683" vmrss="73920" vmhwm="107866" />
+<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="create_exenetwork" device="GPU" vmsize="747815" vmpeak="860978" vmrss="401808" vmhwm="435358" />
+<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="CPU" vmsize="1001189" vmpeak="1001189" vmrss="116080" vmhwm="116080" />
+<model path="public/mobilenet-ssd/FP32/mobilenet-ssd.xml" test="infer_request_inference" device="GPU" vmsize="788752" vmpeak="860842" vmrss="435283" vmhwm="435283" />
+<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="CPU" vmsize="754806" vmpeak="803184" vmrss="15206" vmhwm="26532" />
+<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="create_exenetwork" device="GPU" vmsize="554650" vmpeak="644666" vmrss="207592" vmhwm="217720" />
+<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="CPU" vmsize="959257" vmpeak="959257" vmrss="26690" vmhwm="26690" />
+<model path="public/mtcnn-r/FP32/mtcnn-r.xml" test="infer_request_inference" device="GPU" vmsize="572576" vmpeak="644666" vmrss="215230" vmhwm="215230" />
+<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="CPU" vmsize="755224" vmpeak="1146142" vmrss="22246" vmhwm="370770" />
+<model path="public/ssd300/FP32/ssd300.xml" test="create_exenetwork" device="GPU" vmsize="747709" vmpeak="1031694" vmrss="401746" vmhwm="749962" />
+<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="CPU" vmsize="1343474" vmpeak="1415563" vmrss="314204" vmhwm="371131" />
+<model path="public/ssd300/FP32/ssd300.xml" test="infer_request_inference" device="GPU" vmsize="1088700" vmpeak="1160790" vmrss="739626" vmhwm="748008" />
+<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="CPU" vmsize="754050" vmpeak="2548532" vmrss="15593" vmhwm="1808765" />
+<model path="public/vgg16/FP32/vgg16.xml" test="create_exenetwork" device="GPU" vmsize="648912" vmpeak="3289101" vmrss="299327" vmhwm="3003457" />
+<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="CPU" vmsize="2257006" vmpeak="2548532" vmrss="1243448" vmhwm="1809143" />
+<model path="public/vgg16/FP32/vgg16.xml" test="infer_request_inference" device="GPU" vmsize="2413290" vmpeak="3289101" vmrss="2059780" vmhwm="3006845" />
+    </models>
+</attributes>
diff --git a/tests/stress_tests/memcheck_tests/local_configs/test_config.xml b/tests/stress_tests/memcheck_tests/local_configs/test_config.xml

new file mode 100644 (file)

index 0000000..9944819
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/local_configs/test_config.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<attributes>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>public/vgg16/FP32/vgg16.xml</value>
+        <value>public/mtcnn-r/FP32/mtcnn-r.xml</value>
+        <value>public/mobilenet-ssd/FP32/mobilenet-ssd.xml</value>
+        <value>public/ssd300/FP32/ssd300.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/memcheck_tests/main.cpp b/tests/stress_tests/memcheck_tests/main.cpp

new file mode 100644 (file)

index 0000000..4d235d1
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/main.cpp
@@ -0,0 +1,65 @@
+#include "flags.h"
+#include "../common/utils.h"
+#include <tests_utils.h>
+#include "../common/tests_utils.h"
+
+#include <gtest/gtest.h>
+#include <pugixml.hpp>
+
+
+bool parseAndCheckCommandLine(int argc, char **argv) {
+    // ---------------------------Parsing and validating input arguments--------------------------------------
+    log_info("Parsing input parameters");
+
+    int new_argc = 0;
+    std::vector<char*> _argv;
+    for (int i = 0; i < argc; i++) {
+        if ("--gtest" != std::string(argv[i]).substr(0, 7)) {
+            _argv.push_back(argv[i]);
+            new_argc++;
+        }
+    }
+    char **new_argv = &_argv[0];
+    gflags::ParseCommandLineNonHelpFlags(&new_argc, &new_argv, true);
+
+    if (FLAGS_help || FLAGS_h) {
+        // TODO print info
+        //::testing::InitGoogleTest(&argc, argv);
+        return false;
+    }
+
+    pugi::xml_document config;
+    pugi::xml_parse_result result = config.load_file(FLAGS_test_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading test config \"" << FLAGS_test_conf << "\": " << result.description());
+        return false;
+    }
+    result = config.load_file(FLAGS_env_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading env config \"" << FLAGS_env_conf << "\": " << result.description());
+        return false;
+    }
+    result = config.load_file(FLAGS_refs_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading references config \"" << FLAGS_refs_conf << "\": " << result.description());
+        return false;
+    }
+    return true;
+}
+
+
+int main(int argc, char **argv) {
+    if (!parseAndCheckCommandLine(argc, argv)) {
+        return 0;   // TODO return correct status
+    }
+
+    pugi::xml_document config;
+    config.load_file(FLAGS_test_conf.c_str());
+    Environment::Instance().setTestConfig(config);
+    config.load_file(FLAGS_env_conf.c_str());
+    Environment::Instance().setEnvConfig(config);
+    config.load_file(FLAGS_refs_conf.c_str());
+    MemCheckEnvironment::Instance().setRefsConfig(config);
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+\ No newline at end of file
diff --git a/tests/stress_tests/memcheck_tests/tests.cpp b/tests/stress_tests/memcheck_tests/tests.cpp

new file mode 100644 (file)

index 0000000..5383a32
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/tests.cpp
@@ -0,0 +1,52 @@
+#include "tests_utils.h"
+#include "../common/tests_utils.h"
+#include "../common/managers/thread_manager.h"
+#include "tests_pipelines/tests_pipelines.h"
+
+#include <gtest/gtest.h>
+
+#define checkRefVmValues()                                                                                             \
+    ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmSize is less than 0. Value: " << test_refs.ref_vmsize;  \
+    ASSERT_GT(test_refs.ref_vmsize, 0) << "Reference value of VmPeak is less than 0. Value: " << test_refs.ref_vmpeak;  \
+    ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmRSS is less than 0. Value: " << test_refs.ref_vmrss;     \
+    ASSERT_GT(test_refs.ref_vmrss, 0) << "Reference value of VmHWM is less than 0. Value: " << test_refs.ref_vmhwm;
+
+class MemCheckTestSuite : public ::testing::TestWithParam<TestCase> {
+};
+
+// tests_pipelines/tests_pipelines.cpp
+TEST_P(MemCheckTestSuite, create_exenetwork) {
+    std::string test_name = "create_exenetwork";
+    auto test_params = GetParam();
+
+    TestReferences test_refs;
+    test_refs.collect_vm_values_for_test(test_name, test_params);
+
+    checkRefVmValues();
+
+    TestResult res = test_create_exenetwork(test_params.model_name, test_params.model, test_params.device,
+                                            test_refs.ref_vmsize, test_refs.ref_vmpeak, test_refs.ref_vmrss,
+                                            test_refs.ref_vmhwm);
+    EXPECT_EQ(res.first, TestStatus::TEST_OK) << res.second;
+}
+
+TEST_P(MemCheckTestSuite, infer_request_inference) {
+    std::string test_name = "infer_request_inference";
+    auto test_params = GetParam();
+
+    TestReferences test_refs;
+    test_refs.collect_vm_values_for_test(test_name, test_params);
+
+    checkRefVmValues();
+
+    TestResult res = test_infer_request_inference(test_params.model_name, test_params.model, test_params.device,
+                                                  test_refs.ref_vmsize, test_refs.ref_vmpeak, test_refs.ref_vmrss,
+                                                  test_refs.ref_vmhwm);
+    EXPECT_EQ(res.first, TestStatus::TEST_OK) << res.second;
+}
+// tests_pipelines/tests_pipelines.cpp
+
+INSTANTIATE_TEST_CASE_P(MemCheckTests, MemCheckTestSuite,
+                        ::testing::ValuesIn(
+                                generateTestsParams({"devices", "models"})),
+                        getTestCaseName);
diff --git a/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp

new file mode 100644 (file)

index 0000000..58204a8
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.cpp
@@ -0,0 +1,124 @@
+#include "tests_pipelines.h"
+
+#include <string>
+#include <math.h>
+#include <chrono>
+
+#include <inference_engine.hpp>
+
+#define REPORTING_THRESHOLD 1.1
+
+using namespace InferenceEngine;
+
+#define getAlignedVmValues(vmsize, vmpeak, vmrss, vmhwm, vmsize_to_align, vmrss_to_align)   \
+        getVmValues(test_cur_vmsize, test_cur_vmpeak, test_cur_vmrss, test_cur_vmhwm);      \
+        test_cur_vmsize -= vmsize_before_test;                                              \
+        test_cur_vmpeak -= vmsize_before_test;                                              \
+        test_cur_vmrss -= vmrss_before_test;                                                \
+        test_cur_vmhwm -= vmrss_before_test;
+
+#define log_debug_ref_record_for_test(test_name)                                                            \
+        log_debug("Record to update reference config: "                                                           \
+                  << "<model path=\"" + model_name + "\"" + " test=\"" + test_name + "\" device=\"" +       \
+                  target_device +                                                                           \
+                  "\" vmsize=\"" + std::to_string((int) (test_cur_vmsize * REPORTING_THRESHOLD)) +          \
+                  "\" vmpeak=\"" + std::to_string((int) (test_cur_vmpeak * REPORTING_THRESHOLD)) +          \
+                  "\" vmrss=\"" + std::to_string((int) (test_cur_vmrss * REPORTING_THRESHOLD)) +            \
+                  "\" vmhwm=\"" + std::to_string((int) (test_cur_vmhwm * REPORTING_THRESHOLD)) + "\" />");
+
+#define log_info_ref_mem_usage()                                                                \
+        log_info("Reference values of virtual memory consumption:");                            \
+        log_info("VMRSS\t\tVMHWM\t\tVMSIZE\t\tVMPEAK");                                               \
+        log_info(ref_vmrss << "\t\t" << ref_vmhwm << "\t\t" << ref_vmsize << "\t\t" << ref_vmpeak);
+
+#define log_info_cur_mem_usage()                                                                                    \
+        log_info("Current values of virtual memory consumption:");                                                  \
+        log_info("VMRSS\t\tVMHWM\t\tVMSIZE\t\tVMPEAK");                                                                   \
+        log_info(test_cur_vmrss << "\t\t" << test_cur_vmhwm << "\t\t" << test_cur_vmsize << "\t\t" << test_cur_vmpeak);
+
+TestResult
+test_create_exenetwork(const std::string &model_name, const std::string &model_path, const std::string &target_device,
+                       const long &ref_vmsize, const long &ref_vmpeak, const long &ref_vmrss, const long &ref_vmhwm) {
+    log_info("Create ExecutableNetwork from network: \"" << model_path
+                                                         << "\" for device: \"" << target_device << "\"");
+    long vmsize_before_test = 0, vmrss_before_test = 0,
+            test_cur_vmsize = 0, test_cur_vmpeak = 0,
+            test_cur_vmrss = 0, test_cur_vmhwm = 0;
+
+    vmsize_before_test = (long) getVmSizeInKB();
+    vmrss_before_test = (long) getVmRSSInKB();
+
+    create_exenetwork(model_path, target_device)();
+
+    getAlignedVmValues(test_cur_vmsize, test_cur_vmpeak, test_cur_vmrss, test_cur_vmhwm,
+                       vmsize_before_test, vmrss_before_test);
+
+    log_debug_ref_record_for_test("create_exenetwork");
+    log_info_ref_mem_usage();
+    log_info_cur_mem_usage();
+
+    if (test_cur_vmhwm > ref_vmhwm)
+        return TestResult(TestStatus::TEST_FAILED,
+                          "Test failed: HWM (peak of RSS) virtual memory consumption is greater than reference.\n"
+                          "Reference HWM of memory consumption: " + std::to_string(ref_vmhwm) + " KB.\n" +
+                          "Current HWM of memory consumption: " + std::to_string(test_cur_vmhwm) + " KB.\n");
+
+    return TestResult(TestStatus::TEST_OK, "");
+}
+
+TestResult
+test_infer_request_inference(const std::string &model_name, const std::string &model_path,
+                             const std::string &target_device,
+                             const long &ref_vmsize, const long &ref_vmpeak, const long &ref_vmrss,
+                             const long &ref_vmhwm) {
+    log_info("Inference of InferRequest from network: \"" << model_path
+                                                          << "\" for device: \"" << target_device << "\"");
+    long vmsize_before_test = 0, vmrss_before_test = 0,
+            test_cur_vmsize = 0, test_cur_vmpeak = 0,
+            test_cur_vmrss = 0, test_cur_vmhwm = 0;
+    std::chrono::system_clock::time_point t_start, t_end;
+    std::chrono::duration<double> t_diff;
+
+    vmsize_before_test = (long) getVmSizeInKB();
+    vmrss_before_test = (long) getVmRSSInKB();
+
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model_path);
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+
+    log_info_ref_mem_usage();
+
+    t_start = std::chrono::system_clock::now();
+    int seconds = 1;
+    do {
+        infer_request.Infer();
+        OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+        for (auto &output : output_info)
+            Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+        t_end = std::chrono::system_clock::now();
+        t_diff = t_end - t_start;
+
+        getAlignedVmValues(test_cur_vmsize, test_cur_vmpeak, test_cur_vmrss, test_cur_vmhwm,
+                           vmsize_before_test, vmrss_before_test);
+
+        if (test_cur_vmrss > ref_vmrss) {
+            log_debug_ref_record_for_test("infer_request_inference");
+            return TestResult(TestStatus::TEST_FAILED,
+                              "Test failed: RSS virtual memory consumption became greater than reference "
+                              "after " + std::to_string(t_diff.count()) + " sec of inference.\n"
+                              "Reference RSS memory consumption: " + std::to_string(ref_vmrss) + " KB.\n" +
+                              "Current RSS memory consumption: " + std::to_string(test_cur_vmrss) + " KB.\n");
+        }
+
+        if (t_diff.count() > (double) (seconds)) {
+            log_info("Current values of virtual memory consumption after " << seconds << " seconds:");
+            log_info("VMRSS\t\tVMHWM\t\tVMSIZE\t\tVMPEAK");
+            log_info(test_cur_vmrss << "\t\t" << test_cur_vmhwm << "\t\t" << test_cur_vmsize << "\t\t" << test_cur_vmpeak);
+            seconds++;
+        }
+    } while (t_diff.count() < 5);
+    log_debug_ref_record_for_test("infer_request_inference");
+
+    return TestResult(TestStatus::TEST_OK, "");
+}
diff --git a/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.h b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.h

new file mode 100644 (file)

index 0000000..0712bca
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/tests_pipelines/tests_pipelines.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "../../common/tests_utils.h"
+#include "../../common/utils.h"
+#include "../../common/ie_pipelines/pipelines.h"
+
+#include <string>
+
+// tests_pipelines/tests_pipelines.cpp
+TestResult test_create_exenetwork(const std::string &model_name, const std::string &model_path, const std::string &target_device,
+                                  const long &ref_vmsize, const long &ref_vmpeak, const long &ref_vmrss, const long &ref_vmhwm);
+TestResult test_infer_request_inference(const std::string &model_name, const std::string &model_path, const std::string &target_device,
+                                        const long &ref_vmsize, const long &ref_vmpeak, const long &ref_vmrss, const long &ref_vmhwm);
+// tests_pipelines/tests_pipelines.cpp
diff --git a/tests/stress_tests/memcheck_tests/tests_utils.h b/tests/stress_tests/memcheck_tests/tests_utils.h

new file mode 100644 (file)

index 0000000..32afff1
--- /dev/null
+++ b/tests/stress_tests/memcheck_tests/tests_utils.h
@@ -0,0 +1,72 @@
+#include "../common/tests_utils.h"
+
+#include <pugixml.hpp>
+
+class MemCheckEnvironment {
+private:
+    pugi::xml_document _refs_config;
+    MemCheckEnvironment() = default;
+    MemCheckEnvironment(const MemCheckEnvironment&) = delete;
+    MemCheckEnvironment& operator=(const MemCheckEnvironment&) = delete;
+public:
+    static MemCheckEnvironment& Instance(){
+        static MemCheckEnvironment env;
+        return env;
+    }
+
+    const pugi::xml_document & getRefsConfig() {
+        return _refs_config;
+    }
+
+    void setRefsConfig(const pugi::xml_document &refs_config) {
+        _refs_config.reset(refs_config);
+    }
+};
+
+class TestReferences {
+private:
+    std::vector<std::string> model_path_v, test_name_v, device_v;
+    std::vector<long> vmsize_v, vmpeak_v, vmrss_v, vmhwm_v;
+public:
+    long ref_vmsize = -1, ref_vmpeak = -1, ref_vmrss = -1, ref_vmhwm = -1;
+
+    TestReferences () {
+        // Parse RefsConfig from MemCheckEnvironment
+        std::string models_path = Environment::Instance().getEnvConfig()
+                .child("attributes").child("irs_path").child("value").text().as_string();
+
+        const pugi::xml_document &refs_config = MemCheckEnvironment::Instance().getRefsConfig();
+        auto values = refs_config.child("attributes").child("models");
+        for (pugi::xml_node node = values.first_child(); node; node = node.next_sibling()) {
+            for (pugi::xml_attribute_iterator ait = node.attributes_begin(); ait != node.attributes_end(); ait++) {
+                if (strncmp(ait->name(), "path", strlen(ait->name())) == 0) {
+                    model_path_v.push_back(OS_PATH_JOIN({models_path, ait->value()}));
+                } else if (strncmp(ait->name(), "test", strlen(ait->name())) == 0) {
+                    test_name_v.push_back(ait->value());
+                } else if (strncmp(ait->name(), "device", strlen(ait->name())) == 0) {
+                    device_v.push_back(ait->value());
+                } else if (strncmp(ait->name(), "vmsize", strlen(ait->name())) == 0) {
+                    vmsize_v.push_back(std::atoi(ait->value()));
+                } else if (strncmp(ait->name(), "vmpeak", strlen(ait->name())) == 0) {
+                    vmpeak_v.push_back(std::atoi(ait->value()));
+                } else if (strncmp(ait->name(), "vmrss", strlen(ait->name())) == 0) {
+                    vmrss_v.push_back(std::atoi(ait->value()));
+                } else if (strncmp(ait->name(), "vmhwm", strlen(ait->name())) == 0) {
+                    vmhwm_v.push_back(std::atoi(ait->value()));
+                }
+            }
+        }
+    }
+
+    void collect_vm_values_for_test(std::string test_name, TestCase test_params) {
+        for (int i = 0; i < test_name_v.size(); i++)
+            if (test_name_v[i] == test_name)
+                if (model_path_v[i] == test_params.model)
+                    if (device_v[i] == test_params.device) {
+                        ref_vmsize = vmsize_v[i];
+                        ref_vmpeak = vmpeak_v[i];
+                        ref_vmrss = vmrss_v[i];
+                        ref_vmhwm = vmhwm_v[i];
+                    }
+    }
+};
+\ No newline at end of file
diff --git a/tests/stress_tests/memleaks_tests/CMakeLists.txt b/tests/stress_tests/memleaks_tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..a396454
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (C) 2018-2019 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "StressMemLeaksTests")
+
+file (GLOB SRC
+        ../common/*.cpp
+        ../common/ie_pipelines/*.cpp
+        *.cpp
+        tests_pipelines/*.cpp)
+
+file (GLOB HDR
+        ../common/*.h
+        ../common/managers/*.h
+        ../common/ie_pipelines/*.h
+        *.h
+        tests_pipelines/*.h)
+
+# Create library file from sources.
+add_executable(${TARGET_NAME} ${HDR} ${SRC})
+
+find_package(gflags REQUIRED)
+find_package(Threads REQUIRED)
+
+target_link_libraries(${TARGET_NAME}
+        IE::gtest
+        IE::gtest_main
+        IE::pugixml
+        gflags
+        Threads::Threads
+        ${InferenceEngine_LIBRARIES}
+        )
+
+target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
+        "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src")
+
+# Copy local configs to BIN_FOLDER
+configure_file(local_configs/test_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/memleaks_tests/test_config.xml COPYONLY)
+configure_file(local_configs/env_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/memleaks_tests/env_config.xml COPYONLY)
diff --git a/tests/stress_tests/memleaks_tests/flags.h b/tests/stress_tests/memleaks_tests/flags.h

new file mode 100644 (file)

index 0000000..9687797
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/flags.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../common/utils.h"
+
+#include <gflags/gflags.h>
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Declare flag for showing help message <br>
+DECLARE_bool(help);
+
+/// @brief message for test_config argument
+static const char test_conf_message[] = "Optional. Path to a test config with description about number of threads, iterations etc.";
+
+/// @brief Define parameter for set test's configuration <br>
+/// test_conf is an optional parameter
+DEFINE_string(test_conf, OS_PATH_JOIN({"stress_tests_configs", "memleaks_tests", "test_config.xml"}), test_conf_message);
+
+/// @brief message for env_config argument
+static const char env_conf_message[] = "Optional. Path to an env config with paths to models etc.";
+
+/// @brief Define parameter for set environment <br>
+/// env_conf is an optional parameter
+DEFINE_string(env_conf, OS_PATH_JOIN({"stress_tests_configs", "memleaks_tests", "env_config.xml"}), env_conf_message);
+\ No newline at end of file
diff --git a/tests/stress_tests/memleaks_tests/local_configs/env_config.xml b/tests/stress_tests/memleaks_tests/local_configs/env_config.xml

new file mode 100644 (file)

index 0000000..7d356d0
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/local_configs/env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/memleaks_tests/local_configs/test_config.xml b/tests/stress_tests/memleaks_tests/local_configs/test_config.xml

new file mode 100644 (file)

index 0000000..28698b5
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/local_configs/test_config.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<attributes>
+<!--[ WARNING ] Use of attribute "processes" from config isn't implemented yet. It will be ignored.-->
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>30</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+<!--        <value>GPU</value>-->
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/memleaks_tests/main.cpp b/tests/stress_tests/memleaks_tests/main.cpp

new file mode 100644 (file)

index 0000000..93752ab
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/main.cpp
@@ -0,0 +1,60 @@
+#include "flags.h"
+#include "../common/utils.h"
+#include "../common/tests_utils.h"
+
+#include <gtest/gtest.h>
+#include <pugixml.hpp>
+
+
+bool parseAndCheckCommandLine(int argc, char **argv) {
+    // ---------------------------Parsing and validating input arguments--------------------------------------
+    log_info("Parsing input parameters");
+
+    int new_argc = 0;
+    std::vector<char*> _argv;
+    for (int i = 0; i < argc; i++) {
+        if ("--gtest" != std::string(argv[i]).substr(0, 7)) {
+            _argv.push_back(argv[i]);
+            new_argc++;
+        }
+    }
+    char **new_argv = &_argv[0];
+    gflags::ParseCommandLineNonHelpFlags(&new_argc, &new_argv, true);
+
+    if (FLAGS_help || FLAGS_h) {
+        // TODO print info
+        //::testing::InitGoogleTest(&argc, argv);
+        return false;
+    }
+
+    pugi::xml_document config;
+    pugi::xml_parse_result result = config.load_file(FLAGS_test_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading test config \"" << FLAGS_test_conf << "\": " << result.description());
+        return false;
+    }
+    result = config.load_file(FLAGS_env_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading env config \"" << FLAGS_env_conf << "\": " << result.description());
+        return false;
+    }
+    return true;
+}
+
+
+int main(int argc, char **argv) {
+    log_warn("Use of attribute \"processes\" from config isn't implemented yet. It will be ignored.");
+    log_warn("Use of attribute \"threads\" from config greater than 1 is risky because of "
+             "no synchronization between steps from different threads. Tests results may be non-deterministic.");
+    if (!parseAndCheckCommandLine(argc, argv)) {
+        return 0;   // TODO return correct status
+    }
+
+    pugi::xml_document config;
+    config.load_file(FLAGS_test_conf.c_str());
+    Environment::Instance().setTestConfig(config);
+    config.load_file(FLAGS_env_conf.c_str());
+    Environment::Instance().setEnvConfig(config);
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+\ No newline at end of file
diff --git a/tests/stress_tests/memleaks_tests/tests.cpp b/tests/stress_tests/memleaks_tests/tests.cpp

new file mode 100644 (file)

index 0000000..99416be
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/tests.cpp
@@ -0,0 +1,123 @@
+#include "../common/tests_utils.h"
+#include "../common/managers/thread_manager.h"
+#include "tests_pipelines/tests_pipelines.h"
+
+#include <inference_engine.hpp>
+
+#include <gtest/gtest.h>
+
+using namespace InferenceEngine;
+
+class MemLeaksTestSuiteNoModel : public ::testing::TestWithParam<TestCase> {
+};
+
+class MemLeaksTestSuiteNoDevice : public ::testing::TestWithParam<TestCase> {
+};
+
+class MemLeaksTestSuite : public ::testing::TestWithParam<TestCase> {
+};
+
+inline void test_runner(int numthreads, const std::function<TestResult()> &test_function) {
+    ThreadManager<TestResult> thr_manager;
+    for (int i = 0; i < numthreads; i++)
+        thr_manager.add_task(test_function);
+    thr_manager.run_parallel_n_wait();
+
+    std::vector<ManagerStatus> statuses = thr_manager.get_all_statuses();
+    std::vector<TestResult> results = thr_manager.get_all_results();
+
+    for (int i = 0; i < numthreads; i++) {
+        EXPECT_EQ(statuses[i], ManagerStatus::FINISHED_SUCCESSFULLY)
+                            << "[Thread " << i << "] Thread not finished successfully";
+        EXPECT_EQ(results[i].first, TestStatus::TEST_OK) << "[Thread " << i << "] " << results[i].second;
+    }
+}
+
+
+// tests_pipelines/tests_pipelines.cpp
+TEST_P(MemLeaksTestSuiteNoModel, load_unload_plugin) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        return test_load_unload_plugin(test_params.device, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuiteNoDevice, read_network) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        return test_read_network(test_params.model, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuiteNoDevice, create_cnnnetwork) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        return test_create_cnnnetwork(test_params.model, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuiteNoDevice, cnnnetwork_reshape_batch_x2) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        return test_cnnnetwork_reshape_batch_x2(test_params.model, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuiteNoDevice, set_input_params) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        return test_set_input_params(test_params.model, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuite, recreate_exenetwork) {
+    auto test_params = GetParam();
+    Core ie;
+    auto test = [&] {
+        return test_recreate_exenetwork(ie, test_params.model, test_params.device, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuite, recreate_infer_request) {
+    auto test_params = GetParam();
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(test_params.model);
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, test_params.device);
+    auto test = [&] {
+        return test_recreate_infer_request(exeNetwork, test_params.model, test_params.device, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+
+TEST_P(MemLeaksTestSuite, reinfer_request_inference) {
+    auto test_params = GetParam();
+    auto test = [&] {
+        Core ie;
+        CNNNetwork cnnNetwork = ie.ReadNetwork(test_params.model);
+        ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, test_params.device);
+        InferRequest infer_request = exeNetwork.CreateInferRequest();
+        return test_reinfer_request_inference(infer_request, cnnNetwork, test_params.model, test_params.device, test_params.numiters);
+    };
+    test_runner(test_params.numthreads, test);
+}
+// tests_pipelines/tests_pipelines.cpp
+
+INSTANTIATE_TEST_CASE_P(MemLeaksTests, MemLeaksTestSuiteNoModel,
+                        ::testing::ValuesIn(generateTestsParams({"processes", "threads", "iterations", "devices"})),
+                        getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(MemLeaksTests, MemLeaksTestSuiteNoDevice,
+                        ::testing::ValuesIn(generateTestsParams({"processes", "threads", "iterations", "models"})),
+                        getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(MemLeaksTests, MemLeaksTestSuite,
+                        ::testing::ValuesIn(
+                                generateTestsParams({"processes", "threads", "iterations", "devices", "models"})),
+                        getTestCaseName);
+
diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp

new file mode 100644 (file)

index 0000000..49e60d3
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp
@@ -0,0 +1,185 @@
+#include "tests_pipelines.h"
+
+#include <math.h>
+
+#include <inference_engine.hpp>
+#include <algorithm>
+#include <array>
+#include <string>
+
+using namespace InferenceEngine;
+
+// Number of pipeline runs before it starts measuring
+#define WARMUP_STEPS 30
+// Number memory peaks ignored. LibC memory manager can produce peaks with
+// overall flat consumption
+#define MAX_OUTLIERS 5
+// Maximum number of measuring pipeline restarts
+#define MAX_RETRY 3
+// A threshold for which memory growth will be considered an error
+#define THRESHOLD 0.1
+
+// Measure values
+enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax };
+
+namespace util {    
+template <typename In, typename Out, typename Func>
+void transform(const In& in, Out& out, const Func& func) {
+    std::transform(std::begin(in), std::end(in), std::begin(out), func);
+}
+
+template <typename In1, typename In2, typename Out, typename Func>
+void transform(const In1& in1, const In2& in2, Out& out, const Func& func) {
+    std::transform(std::begin(in1), std::end(in1), std::begin(in2), std::begin(out), func);
+}
+}  // namespace util
+
+TestResult common_test_pipeline(const std::function<void()>& test_pipeline, const int& n) {
+    int retry_count = 0;
+    float mem_threshold = THRESHOLD;
+    std::array<long, MeasureValueMax> cur = {0};           // measured for current iteration
+    std::array<long, MeasureValueMax> ref = {0};           // recorded reference
+    std::array<long, MeasureValueMax> diff = {0};          // difference between current and reference
+    std::array<bool, MeasureValueMax> outlier = {0};       // flag if current does not fit threshold
+    std::array<int, MeasureValueMax> outlier_count = {0};  // counter for how many times current does not fit threshold
+    std::array<float, MeasureValueMax> threshold = {0};    // ref * THRESHOLD
+    std::string progress_str;
+
+    progress_str.reserve(1024);
+
+    log_info("Warming up for " << WARMUP_STEPS << " iterations");
+    log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK");
+    int measure_count = n;
+    for (int iteration = 0; measure_count > 0; iteration++) {
+        // Warm up to take reference values
+        test_pipeline();
+        getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]);
+        progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" +
+                       std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" +
+                       std::to_string(cur[VMPEAK]);
+
+        // measure
+        if (iteration >= WARMUP_STEPS) {
+            // set reference
+            if (WARMUP_STEPS == iteration || (retry_count < MAX_RETRY && (outlier_count[VMRSS] > MAX_OUTLIERS ||
+                                                                          outlier_count[VMHWM] > MAX_OUTLIERS))) {
+                if (0 != retry_count) log_info("Retrying " << retry_count + 1 << " of " << MAX_RETRY);
+                retry_count++;
+                measure_count = n;
+                outlier_count = {0};
+                ref = cur;
+                util::transform(ref, threshold, [](long ref_val) -> float {
+                    return THRESHOLD * ref_val;
+                });
+                log_info("Setting thresholds VMRSS=" << ref[VMRSS] << "(+-" << static_cast<int>(threshold[VMRSS])
+                                                     << "), VMHWM=" << ref[VMHWM] << "(+-"
+                                                     << static_cast<int>(threshold[VMHWM]) << ")");
+            }
+            measure_count--;
+            util::transform(cur, ref, diff, [](long cur_val, long ref_val) -> long {
+                return labs(cur_val - ref_val);
+            });
+            util::transform(diff, threshold, outlier, [](long diff_val, float threshold_val) -> bool {
+                return diff_val > threshold_val;
+            });
+            util::transform(outlier, outlier_count, outlier_count,
+                            [](bool outlier_val, long outlier_count_val) -> long {
+                                return outlier_count_val + (outlier_val ? 1 : 0);
+                            });
+
+            if (outlier[VMRSS]) {
+                progress_str += "\t<-VMRSS outlier";
+            }
+            if (outlier[VMHWM]) {
+                progress_str += "\t<-VMHWM outlier";
+            }
+        }
+
+        log_info(progress_str);
+    }
+
+    if (outlier_count[VMRSS] > MAX_OUTLIERS)
+        return TestResult(TestStatus::TEST_FAILED, "Test failed: RSS virtual memory consumption grown too much.");
+
+    if (outlier_count[VMHWM] > MAX_OUTLIERS)
+        return TestResult(TestStatus::TEST_FAILED, "Test failed: HWM virtual memory consumption grown too much.");
+
+    return TestResult(TestStatus::TEST_OK, "");
+}
+
+
+TestResult test_load_unload_plugin(const std::string &target_device, const int &n) {
+    log_info("Load/unload plugin for device: " << target_device << " for " << n << " times");
+    return common_test_pipeline(load_unload_plugin(target_device), n);
+}
+
+TestResult test_read_network(const std::string &model, const int &n) {
+    log_info("Read network: \"" << model << "\" for " << n << " times");
+    return common_test_pipeline(read_network(model), n);
+}
+
+TestResult test_create_cnnnetwork(const std::string &model, const int &n) {
+    log_info("Create CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    return common_test_pipeline(create_cnnnetwork(model), n);
+}
+
+TestResult test_cnnnetwork_reshape_batch_x2(const std::string &model, const int &n) {
+    log_info("Reshape to batch*=2 of CNNNetwork created from network: \"" << model << "\" for " << n << " times");
+    return common_test_pipeline(cnnnetwork_reshape_batch_x2(model), n);
+}
+
+TestResult test_set_input_params(const std::string &model, const int &n) {
+    log_info("Apply preprocessing for CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    return common_test_pipeline(set_input_params(model), n);
+}
+
+TestResult test_create_exenetwork(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create ExecutableNetwork from network: \"" << model
+                                                         << "\" for device: \"" << target_device << "\" for " << n
+                                                         << " times");
+    return common_test_pipeline(create_exenetwork(model, target_device), n);
+}
+
+TestResult
+test_recreate_exenetwork(InferenceEngine::Core &ie, const std::string &model, const std::string &target_device,
+                         const int &n) {
+    log_info("Recreate ExecutableNetwork from network within existing InferenceEngine::Core: \"" << model
+                                                                                                 << "\" for device: \""
+                                                                                                 << target_device
+                                                                                                 << "\" for " << n
+                                                                                                 << " times");
+    return common_test_pipeline(recreate_exenetwork(ie, model, target_device), n);
+}
+
+TestResult test_create_infer_request(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create InferRequest from network: \"" << model
+                                                    << "\" for device: \"" << target_device << "\" for " << n
+                                                    << " times");
+    return common_test_pipeline(create_infer_request(model, target_device), n);
+}
+
+TestResult
+test_recreate_infer_request(ExecutableNetwork &network, const std::string &model, const std::string &target_device,
+                            const int &n) {
+    log_info("Create InferRequest from network: \"" << model
+                                                    << "\" for device: \"" << target_device << "\" for " << n
+                                                    << " times");
+    return common_test_pipeline(recreate_infer_request(network), n);
+}
+
+TestResult
+test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Inference of InferRequest from network: \"" << model
+                                                          << "\" for device: \"" << target_device << "\" for " << n
+                                                          << " times");
+    return common_test_pipeline(infer_request_inference(model, target_device), n);
+}
+
+TestResult
+test_reinfer_request_inference(InferenceEngine::InferRequest &infer_request, InferenceEngine::CNNNetwork &cnnNetwork,
+                               const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Inference of InferRequest from network: \"" << model
+                                                          << "\" for device: \"" << target_device << "\" for " << n
+                                                          << " times");
+    return common_test_pipeline(reinfer_request_inference(infer_request, cnnNetwork), n);
+}
diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h

new file mode 100644 (file)

index 0000000..2d144af
--- /dev/null
+++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "../../common/tests_utils.h"
+#include "../../common/utils.h"
+#include "../../common/ie_pipelines/pipelines.h"
+
+#include <string>
+
+#include <inference_engine.hpp>
+
+// tests_pipelines/tests_pipelines.cpp
+TestResult test_load_unload_plugin(const std::string &target_device, const int &n);
+TestResult test_read_network(const std::string &model, const int &n);
+TestResult test_create_cnnnetwork(const std::string &model, const int &n);
+TestResult test_cnnnetwork_reshape_batch_x2(const std::string &model, const int &n);
+TestResult test_set_input_params(const std::string &model, const int &n);
+TestResult test_recreate_exenetwork(InferenceEngine::Core &ie, const std::string &model, const std::string &target_device, const int &n);
+TestResult test_create_infer_request(const std::string &model, const std::string &target_device, const int &n);
+TestResult test_recreate_infer_request(InferenceEngine::ExecutableNetwork& network, const std::string &model, const std::string &target_device, const int &n);
+TestResult test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n);
+TestResult test_reinfer_request_inference(InferenceEngine::InferRequest& infer_request, InferenceEngine::CNNNetwork& cnnNetwork, const std::string &model, const std::string &target_device, const int &n);
+// tests_pipelines/tests_pipelines.cpp
diff --git a/tests/stress_tests/scripts/get_testdata.py b/tests/stress_tests/scripts/get_testdata.py

new file mode 100644 (file)

index 0000000..0c73b26
--- /dev/null
+++ b/tests/stress_tests/scripts/get_testdata.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+""" Script to acquire model IRs for stress tests.
+Usage: ./scrips/get_testdata.py
+"""
+import argparse
+import multiprocessing
+import os
+import shutil
+import subprocess
+from inspect import getsourcefile
+
+# Parameters
+MODEL_NAMES = 'vgg16,mtcnn-r,mobilenet-ssd,ssd300'
+OMZ_VERSION = 'efd238d02035f8a5417b7b1e25cd4c997d44351f'
+
+
+def abs_path(relative_path):
+    """Return absolute path given path relative to the current file.
+    """
+    return os.path.realpath(
+        os.path.join(os.path.dirname(getsourcefile(lambda: 0)), relative_path))
+
+
+def main():
+    """Main entry point.
+    """
+    parser = argparse.ArgumentParser(
+        description='Acquire test data',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('--output_dir', default=f'./_models', help='directory to put test data into')
+    parser.add_argument('--cache_dir', default=f'./_cache', help='directory with test data cache')
+    args = parser.parse_args()
+
+    # Clone Open Model Zoo into temporary path
+    omz_path = './_open_model_zoo'
+    if os.path.exists(omz_path):
+        shutil.rmtree(omz_path)
+    subprocess.check_call(
+        f'git clone https://github.com/opencv/open_model_zoo {omz_path}' \
+        f' && cd {omz_path}'\
+        f' && git checkout {OMZ_VERSION}', shell=True)
+    # Acquire model IRs
+    mo_tool = abs_path('../../../model-optimizer/mo.py')
+    subprocess.check_call(
+        f'{omz_path}/tools/downloader/downloader.py --name "{MODEL_NAMES}"' \
+        f' --output_dir {args.output_dir}/{OMZ_VERSION}/models' \
+        f' --cache_dir {args.cache_dir}', shell=True)
+    subprocess.check_call(
+        f'{omz_path}/tools/downloader/converter.py --name "{MODEL_NAMES}"' \
+        f' --output_dir {args.output_dir}/{OMZ_VERSION}/IRs' \
+        f' --download_dir {args.output_dir}/{OMZ_VERSION}/models' \
+        f' --mo {mo_tool} --jobs {multiprocessing.cpu_count()}', shell=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/stress_tests/unittests/CMakeLists.txt b/tests/stress_tests/unittests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..f41ba48
--- /dev/null
+++ b/tests/stress_tests/unittests/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (C) 2018-2020 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set (TARGET_NAME "StressUnitTests")
+
+file (GLOB SRC
+        ../common/*.cpp
+        ../common/ie_pipelines/*.cpp
+        *.cpp
+        tests_pipelines/*.cpp)
+
+file (GLOB HDR
+        ../common/*.h
+        ../common/managers/*.h
+        ../common/ie_pipelines/*.h
+        *.h
+        tests_pipelines/*.h)
+
+# Create library file from sources.
+add_executable(${TARGET_NAME} ${HDR} ${SRC})
+
+find_package(gflags REQUIRED)
+find_package(Threads REQUIRED)
+
+target_link_libraries(${TARGET_NAME}
+        IE::gtest
+        IE::gtest_main
+        IE::pugixml
+        gflags
+        Threads::Threads
+        ${InferenceEngine_LIBRARIES}
+        )
+
+target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
+        "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src")
+
+# Copy local configs to BIN_FOLDER
+configure_file(local_configs/test_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/unittests/test_config.xml COPYONLY)
+configure_file(local_configs/env_config.xml ${OUTPUT_ROOT}/${BIN_FOLDER}/${CMAKE_BUILD_TYPE}/stress_tests_configs/unittests/env_config.xml COPYONLY)
diff --git a/tests/stress_tests/unittests/flags.h b/tests/stress_tests/unittests/flags.h

new file mode 100644 (file)

index 0000000..7f4ff8a
--- /dev/null
+++ b/tests/stress_tests/unittests/flags.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../common/utils.h"
+
+#include <gflags/gflags.h>
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Declare flag for showing help message <br>
+DECLARE_bool(help);
+
+/// @brief message for test_config argument
+static const char test_conf_message[] = "Optional. Path to a test config with description about number of threads, iterations etc.";
+
+/// @brief Define parameter for set test's configuration <br>
+/// test_conf is an optional parameter
+DEFINE_string(test_conf, OS_PATH_JOIN({"stress_tests_configs", "unittests", "test_config.xml"}), test_conf_message);
+
+/// @brief message for env_config argument
+static const char env_conf_message[] = "Optional. Path to an env config with paths to models etc.";
+
+/// @brief Define parameter for set environment <br>
+/// env_conf is an optional parameter
+DEFINE_string(env_conf, OS_PATH_JOIN({"stress_tests_configs", "unittests", "env_config.xml"}), env_conf_message);
+\ No newline at end of file
diff --git a/tests/stress_tests/unittests/local_configs/env_config.xml b/tests/stress_tests/unittests/local_configs/env_config.xml

new file mode 100644 (file)

index 0000000..7d356d0
--- /dev/null
+++ b/tests/stress_tests/unittests/local_configs/env_config.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<attributes>
+    <irs_path>
+        <value>/nfs/inn/proj/vdp/vdp_tests/stress_tests/master_04d6f112132f92cab563ae7655747e0359687dc9/</value>
+    </irs_path>
+</attributes>
diff --git a/tests/stress_tests/unittests/local_configs/test_config.xml b/tests/stress_tests/unittests/local_configs/test_config.xml

new file mode 100644 (file)

index 0000000..162f6f1
--- /dev/null
+++ b/tests/stress_tests/unittests/local_configs/test_config.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<attributes>
+    <processes>
+        <value>1</value>
+    </processes>
+    <threads>
+        <value>1</value>
+    </threads>
+    <iterations>
+        <value>100</value>
+    </iterations>
+    <devices>
+        <value>CPU</value>
+        <value>GPU</value>
+    </devices>
+    <models>
+        <value>caffe/FP32/alexnet/alexnet.xml</value>
+    </models>
+</attributes>
diff --git a/tests/stress_tests/unittests/main.cpp b/tests/stress_tests/unittests/main.cpp

new file mode 100644 (file)

index 0000000..3d23a64
--- /dev/null
+++ b/tests/stress_tests/unittests/main.cpp
@@ -0,0 +1,57 @@
+#include "flags.h"
+#include "../common/utils.h"
+#include "../common/tests_utils.h"
+
+#include <gtest/gtest.h>
+#include <pugixml.hpp>
+
+
+bool parseAndCheckCommandLine(int argc, char **argv) {
+    // ---------------------------Parsing and validating input arguments--------------------------------------
+    log_info("Parsing input parameters");
+
+    int new_argc = 0;
+    std::vector<char*> _argv;
+    for (int i = 0; i < argc; i++) {
+        if ("--gtest" != std::string(argv[i]).substr(0, 7)) {
+            _argv.push_back(argv[i]);
+            new_argc++;
+        }
+    }
+    char **new_argv = &_argv[0];
+    gflags::ParseCommandLineNonHelpFlags(&new_argc, &new_argv, true);
+
+    if (FLAGS_help || FLAGS_h) {
+        // TODO print info
+        //::testing::InitGoogleTest(&argc, argv);
+        return false;
+    }
+
+    pugi::xml_document config;
+    pugi::xml_parse_result result = config.load_file(FLAGS_test_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading test config \"" << FLAGS_test_conf << "\": " << result.description());
+        return false;
+    }
+    result = config.load_file(FLAGS_env_conf.c_str());
+    if (!result) {
+        log_err("Exception while reading env config \"" << FLAGS_env_conf << "\": " << result.description());
+        return false;
+    }
+    return true;
+}
+
+
+int main(int argc, char **argv) {
+    if (!parseAndCheckCommandLine(argc, argv)) {
+        return 0;   // TODO return correct status
+    }
+
+    pugi::xml_document config;
+    config.load_file(FLAGS_test_conf.c_str());
+    Environment::Instance().setTestConfig(config);
+    config.load_file(FLAGS_env_conf.c_str());
+    Environment::Instance().setEnvConfig(config);
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+\ No newline at end of file
diff --git a/tests/stress_tests/unittests/tests.cpp b/tests/stress_tests/unittests/tests.cpp

new file mode 100644 (file)

index 0000000..f710d36
--- /dev/null
+++ b/tests/stress_tests/unittests/tests.cpp
@@ -0,0 +1,94 @@
+#include "../common/tests_utils.h"
+#include "tests_pipelines/tests_pipelines.h"
+
+#include <gtest/gtest.h>
+
+class UnitTestSuiteNoModel : public ::testing::TestWithParam<TestCase> {
+};
+
+class UnitTestSuiteNoDevice : public ::testing::TestWithParam<TestCase> {
+};
+
+class UnitTestSuite : public ::testing::TestWithParam<TestCase> {
+};
+
+// tests_pipelines/tests_pipelines.cpp
+TEST_P(UnitTestSuiteNoModel, load_unload_plugin) {
+    runTest(test_load_unload_plugin, GetParam());
+}
+
+TEST_P(UnitTestSuiteNoDevice, read_network) {
+    runTest(test_read_network, GetParam());
+}
+
+TEST_P(UnitTestSuiteNoDevice, create_cnnnetwork) {
+    runTest(test_create_cnnnetwork, GetParam());
+}
+
+TEST_P(UnitTestSuiteNoDevice, cnnnetwork_reshape_batch_x2) {
+    runTest(test_cnnnetwork_reshape_batch_x2, GetParam());
+}
+
+TEST_P(UnitTestSuiteNoDevice, set_input_params) {
+    runTest(test_set_input_params, GetParam());
+}
+
+TEST_P(UnitTestSuite, create_exenetwork) {
+    runTest(test_create_exenetwork, GetParam());
+}
+
+TEST_P(UnitTestSuite, create_infer_request) {
+    runTest(test_create_infer_request, GetParam());
+}
+
+TEST_P(UnitTestSuite, infer_request_inference) {
+    runTest(test_infer_request_inference, GetParam());
+}
+// tests_pipelines/tests_pipelines.cpp
+
+
+// tests_pipelines/tests_pipelines_full_pipeline.cpp
+TEST_P(UnitTestSuite, load_unload_plugin_full_pipeline) {
+    runTest(test_load_unload_plugin_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, read_network_full_pipeline) {
+    runTest(test_read_network_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, create_cnnnetwork_full_pipeline) {
+    runTest(test_create_cnnnetwork_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, set_input_params_full_pipeline) {
+    runTest(test_set_input_params_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, cnnnetwork_reshape_batch_x2_full_pipeline) {
+    runTest(test_cnnnetwork_reshape_batch_x2_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, create_exenetwork_full_pipeline) {
+    runTest(test_create_exenetwork_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, create_infer_request_full_pipeline) {
+    runTest(test_create_infer_request_full_pipeline, GetParam());
+}
+
+TEST_P(UnitTestSuite, infer_request_inference_full_pipeline) {
+    runTest(test_infer_request_inference_full_pipeline, GetParam());
+}
+// tests_pipelines/tests_pipelines_full_pipeline.cpp
+
+INSTANTIATE_TEST_CASE_P(StressUnitTests, UnitTestSuiteNoModel,
+                        ::testing::ValuesIn(generateTestsParams({"processes", "threads", "iterations", "devices"})),
+                        getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(StressUnitTests, UnitTestSuiteNoDevice,
+                        ::testing::ValuesIn(generateTestsParams({"processes", "threads", "iterations", "models"})),
+                        getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(StressUnitTests, UnitTestSuite,
+                        ::testing::ValuesIn(generateTestsParams({"processes", "threads", "iterations", "devices", "models"})),
+                        getTestCaseName);
diff --git a/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.cpp

new file mode 100644 (file)

index 0000000..afa4845
--- /dev/null
+++ b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.cpp
@@ -0,0 +1,91 @@
+#include "tests_pipelines.h"
+
+#include <string>
+
+#include <inference_engine.hpp>
+
+
+using namespace InferenceEngine;
+
+void test_load_unload_plugin(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Load/unload plugin for device: " << target_device << " for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        load_unload_plugin(target_device)();
+    }
+}
+
+void test_read_network(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Read network: \"" << model << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        read_network(model)();
+    }
+}
+
+void test_create_cnnnetwork(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        create_cnnnetwork(model)();
+    }
+}
+
+void test_cnnnetwork_reshape_batch_x2(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Reshape to batch*=2 of CNNNetwork created from network: \"" << model << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        cnnnetwork_reshape_batch_x2(model)();
+    }
+}
+
+void test_set_input_params(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Apply preprocessing for CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        set_input_params(model)();
+    }
+}
+
+void test_create_exenetwork(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create ExecutableNetwork from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        create_exenetwork(model, target_device)();
+    }
+}
+
+void test_create_infer_request(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create InferRequest from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        create_infer_request(model, target_device)();
+    }
+}
+
+void test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Inference of InferRequest from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        infer_request_inference(model, target_device)();
+    }
+}
diff --git a/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.h b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.h

new file mode 100644 (file)

index 0000000..7e5ef80
--- /dev/null
+++ b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include "../../common/utils.h"
+#include "../../common/ie_pipelines/pipelines.h"
+
+#include <string>
+
+// tests_pipelines/tests_pipelines.cpp
+void test_load_unload_plugin(const std::string &model, const std::string &target_device, const int &n);
+void test_read_network(const std::string &model, const std::string &target_device, const int &n);
+void test_create_cnnnetwork(const std::string &model, const std::string &target_device, const int &n);
+void test_cnnnetwork_reshape_batch_x2(const std::string &model, const std::string &target_device, const int &n);
+void test_set_input_params(const std::string &model, const std::string &target_device, const int &n);
+void test_create_exenetwork(const std::string &model, const std::string &target_device, const int &n);
+void test_create_infer_request(const std::string &model, const std::string &target_device, const int &n);
+void test_infer_request_inference(const std::string &model, const std::string &target_device, const int &n);
+// tests_pipelines/tests_pipelines.cpp
+
+// tests_pipelines/tests_pipelines_full_pipeline.cpp
+void test_load_unload_plugin_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_read_network_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_create_cnnnetwork_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_set_input_params_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_cnnnetwork_reshape_batch_x2_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_create_exenetwork_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_create_infer_request_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+void test_infer_request_inference_full_pipeline(const std::string &model, const std::string &target_device, const int &n);
+// tests_pipelines/tests_pipelines_full_pipeline.cpp
diff --git a/tests/stress_tests/unittests/tests_pipelines/tests_pipelines_full_pipeline.cpp b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines_full_pipeline.cpp

new file mode 100644 (file)

index 0000000..26a82b1
--- /dev/null
+++ b/tests/stress_tests/unittests/tests_pipelines/tests_pipelines_full_pipeline.cpp
@@ -0,0 +1,266 @@
+#include "tests_pipelines.h"
+
+#include <string>
+
+#include <inference_engine.hpp>
+
+using namespace InferenceEngine;
+
+#define batchIndex 0
+
+#define setInputParameters()                                                        \
+    input.second->getPreProcess().setResizeAlgorithm(NO_RESIZE);                    \
+    input.second->setPrecision(Precision::U8);                                      \
+    if (input.second->getInputData()->getTensorDesc().getDims().size() == 4)        \
+        input.second->setLayout(Layout::NCHW);                                      \
+    else if (input.second->getInputData()->getTensorDesc().getDims().size() == 2)   \
+        input.second->setLayout(Layout::NC);
+
+#define computeShapesToReshape()                                \
+    auto layout = input.second->getTensorDesc().getLayout();    \
+    if ((layout == Layout::NCHW) || (layout == Layout::NC)) {   \
+        shapes[input.first][batchIndex] *= 2;                   \
+        doReshape = true;                                       \
+    }
+
+#define reshapeCNNNetwork()                                             \
+    if (doReshape)                                                      \
+        cnnNetwork.reshape(shapes);                                     \
+    else                                                                \
+        throw std::logic_error("Reshape wasn't applied for a model.");
+
+void test_load_unload_plugin_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Load/unload plugin for device: " << target_device << " for " << n << " times");
+    Core ie;
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        // GetVersions silently register plugin in `plugins` through `GetCPPPluginByName`
+        ie.GetVersions(target_device);
+        // Remove plugin for target_device from `plugins`
+        ie.UnregisterPlugin(target_device);
+    }
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_read_network_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Read network: \"" << model << "\" for " << n << " times");
+    Core ie;
+    IE_SUPPRESS_DEPRECATED_START
+    std::shared_ptr<CNNNetReader> netReaderPtr;
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        CNNNetReader netReader;
+        netReader.ReadNetwork(model);
+        netReader.ReadWeights(fileNameNoExt(model) + ".bin");
+        netReaderPtr = std::make_shared<CNNNetReader>(netReader);
+    }
+    CNNNetwork cnnNetwork = netReaderPtr->getNetwork();
+    IE_SUPPRESS_DEPRECATED_END
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_create_cnnnetwork_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork;
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        cnnNetwork = ie.ReadNetwork(model);
+    }
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_set_input_params_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Apply preprocessing for CNNNetwork from network: \"" << model << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        for (auto &input : inputInfo) {
+            setInputParameters();
+        }
+    }
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_cnnnetwork_reshape_batch_x2_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Reshape to batch*=2 of CNNNetwork created from network: \"" << model << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    for (auto &input : inputInfo) {
+        setInputParameters();
+    }
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    int prev_batch = -1, new_batch;
+    for (auto &input : inputInfo) {
+        auto layout = input.second->getTensorDesc().getLayout();
+        if ((layout == Layout::NCHW) || (layout == Layout::NC))
+            prev_batch = shapes[input.first][batchIndex];
+    }
+    if (prev_batch == -1)
+        throw std::logic_error("Reshape wasn't applied for a model.");
+
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+
+        new_batch = ((i % 2) == 0) ? prev_batch * 2 : prev_batch;
+        for (auto &input : inputInfo) {
+            auto layout = input.second->getTensorDesc().getLayout();
+            if ((layout == Layout::NCHW) || (layout == Layout::NC)) {
+                shapes[input.first][batchIndex] = new_batch;
+                doReshape = true;
+            }
+        }
+        reshapeCNNNetwork();
+    }
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_create_exenetwork_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create ExecutableNetwork from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork;
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    }
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_create_infer_request_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Create InferRequest from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request;
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        infer_request = exeNetwork.CreateInferRequest();
+    }
+    infer_request.Infer();
+    OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+    for (auto &output : output_info)
+        Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+}
+
+void test_infer_request_inference_full_pipeline(const std::string &model, const std::string &target_device, const int &n) {
+    log_info("Inference of InferRequest from network: \"" << model
+             << "\" for device: \"" << target_device << "\" for " << n << " times");
+    Core ie;
+    CNNNetwork cnnNetwork = ie.ReadNetwork(model);
+    InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
+    ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
+    bool doReshape = false;
+    for (auto &input : inputInfo) {
+        setInputParameters();
+        computeShapesToReshape();
+    }
+    reshapeCNNNetwork();
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, target_device);
+    InferRequest infer_request = exeNetwork.CreateInferRequest();
+    for (int i = 0; i < n; i++) {
+        if (i == n / 2) {
+            log_info("Half of the test have already passed");
+        }
+        infer_request.Infer();
+        OutputsDataMap output_info(cnnNetwork.getOutputsInfo());
+        for (auto &output : output_info)
+            Blob::Ptr outputBlob = infer_request.GetBlob(output.first);
+    }
+}
diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md

index 181fb12..b3fc7b3 100644 (file)
--- a/tools/benchmark/README.md
+++ b/tools/benchmark/README.md
@@ -108,18 +108,30 @@ Options:
                         Default value is determined automatically for a device. 
                         Please note that although the automatic selection usually provides a reasonable performance, 
                         it still may be non-optimal for some cases, especially for very small networks.
+  -enforcebf16 [ENFORCE_BFLOAT16], --enforce_bfloat16 [ENFORCE_BFLOAT16]
+                        Optional. Enforcing of floating point operations
+                        execution in bfloat16 precision where it is acceptable.
    -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
                          Number of threads to use for inference on the CPU
                          (including HETERO  and MULTI cases).
-  -pin {YES,NO}, --infer_threads_pinning {YES,NO}
-                        Optional. Enable ("YES" is default value) or disable
-                        ("NO")CPU threads pinning for CPU-involved inference.
+  -pin {YES,NO,NUMA}, --infer_threads_pinning {YES,NO,NUMA}
+                        Optional. Enable threads->cores ('YES' is default
+                        value), threads->(NUMA)nodes ('NUMA') or completely
+                        disable ('NO')CPU threads pinning for CPU-involved
+                        inference.
    --exec_graph_path EXEC_GRAPH_PATH
                          Optional. Path to a file where to store executable
                          graph information serialized.
    -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
                          Optional. Report performance counters.
-
+  -dump_config DUMP_CONFIG
+                        Optional. Path to JSON file to dump IE parameters,
+                        which were set by application.
+  -load_config LOAD_CONFIG
+                        Optional. Path to JSON file to load custom IE
+                        parameters. Please note, command line parameters have
+                        higher priority then parameters from configuration
+                        file.
  ```
  
  Running the application with the empty list of options yields the usage message given above and an error message.
diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py

index ae4d746..1682bee 100644 (file)
--- a/tools/benchmark/benchmark.py
+++ b/tools/benchmark/benchmark.py
@@ -18,34 +18,33 @@ from datetime import datetime
  from statistics import median
  from openvino.inference_engine import IENetwork, IECore, get_version, StatusCode
  
-from .utils.constants import CPU_DEVICE_NAME, MULTI_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION
+from .utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, BIN_EXTENSION
  from .utils.logging import logger
-from .utils.utils import get_duration_seconds, parse_nstreams_value_per_device, parse_devices
+from .utils.utils import get_duration_seconds
  from .utils.inputs_filling import get_blob_shape
-
+from .utils.statistics_report import StatisticsReport
  
  class Benchmark:
-    def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type):
+    def __init__(self, device: str, number_infer_requests: int = None, number_iterations: int = None,
+                 duration_seconds: int = None, api_type: str = 'async'):
          self.device = device
          self.ie = IECore()
          self.nireq = number_infer_requests
          self.niter = number_iterations
          self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
          self.api_type = api_type
-        self.device_number_streams = {}
  
      def __del__(self):
          del self.ie
  
      def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None):
-        if GPU_DEVICE_NAME in self.device:
-            if path_to_cldnn_config:
-                self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
-                logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))
-        if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device:
-            if path_to_extension:
-                self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
-                logger.info('CPU extensions is loaded {}'.format(path_to_extension))
+        if path_to_cldnn_config:
+            self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
+            logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))
+
+        if path_to_extension:
+            self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
+            logger.info('CPU extensions is loaded {}'.format(path_to_extension))
  
      def get_version_info(self) -> str:
          logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version()))
@@ -67,57 +66,13 @@ class Benchmark:
              logger.info('Resizing network to batch = {}'.format(batch_size))
              ie_network.reshape(new_shapes)
  
-    def set_config(self, number_streams: int, api_type: str = 'async',
-                   number_threads: int = None, infer_threads_pinning: int = None):
-        devices = parse_devices(self.device)
-        self.device_number_streams = parse_nstreams_value_per_device(devices, number_streams)
-        for device_name in  self.device_number_streams.keys():
-            key = device_name + "_THROUGHPUT_STREAMS"
-            supported_config_keys = self.ie.get_metric(device_name, 'SUPPORTED_CONFIG_KEYS')
-            if key not in supported_config_keys:
-                raise Exception("Device " + device_name + " doesn't support config key '" + key + "'! " +
-                                "Please specify -nstreams for correct devices in format  <dev1>:<nstreams1>,<dev2>:<nstreams2>");
-
-        for device in devices:
-            if device == CPU_DEVICE_NAME:  # CPU supports few special performance-oriented keys
-                # limit threading for CPU portion of inference
-                if number_threads:
-                    self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device)
-
-                if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device:
-                    self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME)
-                else:
-                    # pin threads for CPU portion of inference
-                    self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device)
-
-                # for CPU execution, more throughput-oriented execution via streams
-                # for pure CPU execution, more throughput-oriented execution via streams
-                if api_type == 'async':
-                    cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
-                    if device in self.device_number_streams.keys():
-                        cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
-                    self.ie.set_config(cpu_throughput, device)
-                    self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')
-
-            elif device == GPU_DEVICE_NAME:
-                if api_type == 'async':
-                    gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'}
-                    if device in self.device_number_streams.keys():
-                        gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
-                    self.ie.set_config(gpu_throughput, device)
-                    self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')
-
-                if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device:
-                    # multi-device execution with the CPU+GPU performs best with GPU trottling hint,
-                    # which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
-                    self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device)
-
-            elif device == MYRIAD_DEVICE_NAME:
-                self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'}, MYRIAD_DEVICE_NAME)
+    def set_config(self, config = {}):
+        for device in config.keys():
+            self.ie.set_config(config[device], device)
  
      def read_network(self, path_to_model: str):
          xml_filename = os.path.abspath(path_to_model)
-        head, tail = os.path.splitext(xml_filename)
+        head, _ = os.path.splitext(xml_filename)
          bin_filename = os.path.abspath(head + BIN_EXTENSION)
  
          ie_network = self.ie.read_network(xml_filename, bin_filename)
@@ -129,15 +84,14 @@ class Benchmark:
  
          return ie_network
  
-    def load_network(self, ie_network: IENetwork, perf_counts: bool):
-        config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')}
-
+    def load_network(self, ie_network: IENetwork, config = {}):
          exe_network = self.ie.load_network(ie_network,
                                             self.device,
                                             config=config,
                                             num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
          # Number of requests
          self.nireq = len(exe_network.requests)
+
          return exe_network
  
      def infer(self, exe_network, batch_size, progress_bar=None):
diff --git a/tools/benchmark/main.py b/tools/benchmark/main.py

index 1158bed..f2d4b01 100644 (file)
--- a/tools/benchmark/main.py
+++ b/tools/benchmark/main.py
@@ -4,16 +4,15 @@ from datetime import datetime
  
  from openvino.tools.benchmark.benchmark import Benchmark
  from openvino.tools.benchmark.parameters import parse_args
-from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME
+from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, BIN_EXTENSION
  from openvino.tools.benchmark.utils.inputs_filling import set_inputs
  from openvino.tools.benchmark.utils.logging import logger
  from openvino.tools.benchmark.utils.progress_bar import ProgressBar
  from openvino.tools.benchmark.utils.utils import next_step, config_network_inputs, get_number_iterations, \
      process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
-    get_command_line_arguments
+    get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, load_config, dump_config
  from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
  
-
  def main():
      # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
      next_step()
@@ -27,20 +26,42 @@ def run(args):
                              "Although the automatic selection usually provides a reasonable performance, "
                              "but it still may be non-optimal for some cases, for more information look at README. ")
  
+        command_line_arguments = get_command_line_arguments(sys.argv)
          if args.report_type:
            statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder))
-          statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, get_command_line_arguments(sys.argv))
+          statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments)
+
+        def is_flag_set_in_command_line(flag):
+            return any(x.strip('-') == flag for x, y in command_line_arguments)
+
+        device_name = args.target_device
  
+        devices = parse_devices(device_name)
+        device_number_streams = parse_nstreams_value_per_device(devices, args.number_streams)
+
+        config = {}
+        if args.load_config:
+            load_config(args.load_config, config)
  
          # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
          next_step(step_id=2)
  
-        device_name = args.target_device.upper()
-
          benchmark = Benchmark(args.target_device, args.number_infer_requests,
                                args.number_iterations, args.time, args.api_type)
  
-        benchmark.add_extension(args.path_to_extension, args.path_to_cldnn_config)
+        ## CPU (MKLDNN) extensions
+        if CPU_DEVICE_NAME in device_name and args.path_to_extension:
+            benchmark.add_extension(path_to_extension=args.path_to_extension)
+
+        ## GPU (clDNN) Extensions
+        if GPU_DEVICE_NAME in device_name and args.path_to_cldnn_config:
+            if GPU_DEVICE_NAME not in config.keys():
+                config[GPU_DEVICE_NAME] = {}
+            config[GPU_DEVICE_NAME]['CONFIG_FILE'] = args.path_to_cldnn_config
+
+        if GPU_DEVICE_NAME in config.keys() and 'CONFIG_FILE' in config[GPU_DEVICE_NAME].keys():
+            cldnn_config = config[GPU_DEVICE_NAME]['CONFIG_FILE']
+            benchmark.add_extension(path_to_cldnn_config=cldnn_config)
  
          version = benchmark.get_version_info()
  
@@ -74,17 +95,89 @@ def run(args):
  
          # --------------------- 6. Setting device configuration --------------------------------------------------------
          next_step()
-        benchmark.set_config(args.number_streams, args.api_type, args.number_threads,
-                             args.infer_threads_pinning)
+
+        perf_counts = False
+        for device in devices:
+            if device not in config.keys():
+                config[device] = {}
+            ## Set performance counter
+            if is_flag_set_in_command_line('pc'):
+                ## set to user defined value
+                config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO'
+            elif 'PERF_COUNT' in config[device].keys() and config[device]['PERF_COUNT'] == 'YES':
+                logger.warn("Performance counters for {} device is turned on. ".format(device) +
+                            "To print results use -pc option.")
+            elif args.report_type in [ averageCntReport, detailedCntReport ]:
+                logger.warn("Turn on performance counters for {} device ".format(device) +
+                            "since report type is {}.".format(args.report_type))
+                config[device]['PERF_COUNT'] = 'YES'
+            elif args.exec_graph_path is not None:
+                logger.warn("Turn on performance counters for {} device ".format(device) +
+                            "due to execution graph dumping.")
+                config[device]['PERF_COUNT'] = 'YES'
+            else:
+                ## set to default value
+                config[device]['PERF_COUNT'] = 'YES' if args.perf_counts else 'NO'
+            perf_counts = True if config[device]['PERF_COUNT'] == 'YES' else perf_counts
+
+            def set_throughput_streams():
+                key = device + "_THROUGHPUT_STREAMS"
+                if device in device_number_streams.keys():
+                    ## set to user defined value
+                    supported_config_keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS')
+                    if key not in supported_config_keys:
+                        raise Exception("Device {} doesn't support config key '{}'! ".format(device, key) +
+                                        "Please specify -nstreams for correct devices in format  <dev1>:<nstreams1>,<dev2>:<nstreams2>")
+                    config[device][key] = device_number_streams[device]
+                elif key not in config[device].keys() and args.api_type == "async":
+                    logger.warn("-nstreams default value is determined automatically for {} device. ".format(device) +
+                                "Although the automatic selection usually provides a reasonable performance,"
+                                "but it still may be non-optimal for some cases, for more information look at README.")
+                    config[device][key] = device + "_THROUGHPUT_AUTO"
+                if key in config[device].keys():
+                    device_number_streams[device] = config[device][key]
+
+            if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys
+                # limit threading for CPU portion of inference
+                if args.number_threads and is_flag_set_in_command_line("nthreads"):
+                    config[device]['CPU_THREADS_NUM'] = str(args.number_threads)
+
+                if is_flag_set_in_command_line("enforcebf16") or is_flag_set_in_command_line("enforce_bfloat16"):
+                    config[device]['ENFORCE_BF16'] = 'YES' if args.enforce_bfloat16 else 'NO'
+
+                if is_flag_set_in_command_line('pin'):
+                    ## set to user defined value
+                    config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning
+                elif 'CPU_BIND_THREAD' not in config[device].keys():
+                    if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name:
+                        logger.warn("Turn off threads pinning for {}".format(device) +
+                                    "device since multi-scenario with GPU device is used.")
+                        config[device]['CPU_BIND_THREAD'] = 'NO'
+                    else:
+                        ## set to default value
+                        config[device]['CPU_BIND_THREAD'] = args.infer_threads_pinning
+
+                ## for CPU execution, more throughput-oriented execution via streams
+                set_throughput_streams()
+            elif device == GPU_DEVICE_NAME:
+                ## for GPU execution, more throughput-oriented execution via streams
+                set_throughput_streams()
+
+                if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name:
+                    logger.warn("Turn on GPU trottling. Multi-device execution with the CPU + GPU performs best with GPU trottling hint, " +
+                                "which releases another CPU thread (that is otherwise used by the GPU driver for active polling)")
+                    config[device]['CLDNN_PLUGIN_THROTTLE'] = '1'
+            elif device == MYRIAD_DEVICE_NAME:
+                config[device]['LOG_LEVEL'] = 'LOG_INFO'
+        perf_counts = perf_counts
+
+        benchmark.set_config(config)
  
          # --------------------- 7. Loading the model to the device -----------------------------------------------------
          next_step()
  
          start_time = datetime.utcnow()
-        perf_counts = True if args.perf_counts or \
-                              args.report_type in [ averageCntReport, detailedCntReport ] or \
-                              args.exec_graph_path else False
-        exe_network = benchmark.load_network(ie_network, perf_counts)
+        exe_network = benchmark.load_network(ie_network)
          duration_ms = "{:.2f}".format((datetime.utcnow() - start_time).total_seconds() * 1000)
          logger.info("Load network took {} ms".format(duration_ms))
          if statistics:
@@ -92,6 +185,10 @@ def run(args):
                                        [
                                            ('load network time (ms)', duration_ms)
                                        ])
+        ## Update number of streams
+        for device in device_number_streams.keys():
+            key = device + '_THROUGHPUT_STREAMS'
+            device_number_streams[device] = benchmark.ie.get_config(device, key)
  
          # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
          next_step()
@@ -117,14 +214,14 @@ def run(args):
                                            ('topology', ie_network.name),
                                            ('target device', device_name),
                                            ('API', args.api_type),
-                                          ('precision', str(ie_network.precision)),
+                                          ('precision', "UNSPECIFIED"),
                                            ('batch size', str(batch_size)),
                                            ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"),
                                            ('number of parallel infer requests', str(benchmark.nireq)),
                                            ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))),
                                         ])
  
-            for nstreams in benchmark.device_number_streams.items():
+            for nstreams in device_number_streams.items():
                  statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG,
                                           [
                                              ("number of {} streams".format(nstreams[0]), str(nstreams[1])),
@@ -146,6 +243,10 @@ def run(args):
          # ------------------------------------ 11. Dumping statistics report -------------------------------------------
          next_step()
  
+        if args.dump_config:
+            dump_config(args.dump_config, config)
+            logger.info("Inference Engine configuration settings were dumped to {}".format(args.dump_config))
+
          if args.exec_graph_path:
              dump_exec_graph(exe_network, args.exec_graph_path)
  
diff --git a/tools/benchmark/parameters.py b/tools/benchmark/parameters.py

index 44a7d37..f7c474f 100644 (file)
--- a/tools/benchmark/parameters.py
+++ b/tools/benchmark/parameters.py
@@ -4,7 +4,6 @@ from fnmatch import fnmatch
  from openvino.tools.benchmark.utils.constants import XML_EXTENSION_PATTERN
  from openvino.tools.benchmark.utils.utils import show_available_devices
  
-
  def str2bool(v):
      if v.lower() in ('yes', 'true', 't', 'y', '1'):
          return True
@@ -77,18 +76,19 @@ def parse_args():
                             'Default value is determined automatically for a device. Please note that although the automatic selection '
                             'usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small networks. '
                             'See samples README for more details.')
-
+    args.add_argument('-enforcebf16', '--enforce_bfloat16', type=str2bool, required=False, default=False, nargs='?', const=True,
+                      help='Optional. Enforcing of floating point operations execution in bfloat16 precision where it is acceptable.')
      args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
                        help='Number of threads to use for inference on the CPU '
                             '(including HETERO and MULTI cases).')
      args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', choices=['YES', 'NO', 'NUMA'],
                        help='Optional. Enable  threads->cores (\'YES\' is default value), threads->(NUMA)nodes (\'NUMA\') or completely  disable (\'NO\')' 
                             'CPU threads pinning for CPU-involved inference.')
-    args.add_argument('--exec_graph_path', type=str, required=False,
+    args.add_argument('-exec_graph_path', '--exec_graph_path', type=str, required=False,
                        help='Optional. Path to a file where to store executable graph information serialized.')
      args.add_argument('-pc', '--perf_counts', type=str2bool, required=False, default=False, nargs='?', const=True,
                        help='Optional. Report performance counters.', )
-    args.add_argument('--report_type', type=str, required=False,
+    args.add_argument('-report_type', '--report_type', type=str, required=False,
                        choices=['no_counters', 'average_counters', 'detailed_counters'],
                        help="Optional. Enable collecting statistics report. \"no_counters\" report contains "
                             "configuration options specified, resulting FPS and latency. \"average_counters\" "
@@ -96,8 +96,13 @@ def parse_args():
                             "counters values for each layer from the network. \"detailed_counters\" report "
                             "extends \"average_counters\" report and additionally includes per-layer PM "
                             "counters and latency for each executed infer request.")
-    args.add_argument('--report_folder', type=str, required=False, default='',
+    args.add_argument('-report_folder', '--report_folder', type=str, required=False, default='',
                        help="Optional. Path to a folder where statistics report is stored.")
+    args.add_argument('-dump_config', type=str, required=False, default='',
+                      help="Optional. Path to JSON file to dump IE parameters, which were set by application.")
+    args.add_argument('-load_config', type=str, required=False, default='',
+                      help="Optional. Path to JSON file to load custom IE parameters."
+                           " Please note, command line parameters have higher priority then parameters from configuration file.")
      parsed_args = parser.parse_args()
  
      validate_args(parsed_args)
diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py

index 834da5a..32da5e4 100644 (file)
--- a/tools/benchmark/utils/utils.py
+++ b/tools/benchmark/utils/utils.py
@@ -20,6 +20,7 @@ from .constants import DEVICE_DURATION_IN_SECS, UNKNOWN_DEVICE_TYPE, DEVICE_NIRE
  from .inputs_filling import is_image
  from .logging import logger
  
+import json
  
  def static_vars(**kwargs):
      def decorate(func):
@@ -122,6 +123,8 @@ def get_nireq(target_device):
  
  
  def parse_devices(device_string):
+    if device_string in ['MULTI', 'HETERO']:
+        return list()
      devices = device_string
      if ':' in devices:
          devices = devices.partition(':')[2]
@@ -139,14 +142,14 @@ def parse_nstreams_value_per_device(devices, values_string):
          device_value_vec = device_value_string.split(':')
          if len(device_value_vec) == 2:
              device_name = device_value_vec[0]
-            nstreams = int(device_value_vec[1])
+            nstreams = device_value_vec[1]
              if device_name in devices:
                  result[device_name] = nstreams
              else:
                  raise Exception("Can't set nstreams value " + str(nstreams) +
                                  " for device '" + device_name + "'! Incorrect device name!");
          elif len(device_value_vec) == 1:
-            nstreams = int(device_value_vec[0])
+            nstreams = device_value_vec[0]
              for device in devices:
                  result[device] = nstreams
          elif not device_value_vec:
@@ -238,4 +241,12 @@ def get_command_line_arguments(argv):
  
  def show_available_devices():
      ie = IECore()
-    print("\nAvailable target devices:  ", ("  ".join(ie.available_devices)))
-\ No newline at end of file
+    print("\nAvailable target devices:  ", ("  ".join(ie.available_devices)))
+
+def dump_config(filename, config):
+    with open(filename, 'w') as f:
+        json.dump(config, f, indent=4)
+
+def load_config(filename, config):
+    with open(filename) as f:
+        config.update(json.load(f))
+\ No newline at end of file
author	Alexey Suhov <alexey.suhov@intel.com>
	Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)
committer	Alexey Suhov <alexey.suhov@intel.com>
	Wed, 15 Apr 2020 16:01:57 +0000 (19:01 +0300)