From a57718e1acc3cc75f9b33ff5f24b555c0a2eee99 Mon Sep 17 00:00:00 2001 From: Pavel Vlasov Date: Thu, 17 Aug 2017 14:57:58 +0300 Subject: [PATCH] ICV2017u3 package update; - Optimizations set change. Now IPP integrations will provide code for SSE42, AVX2 and AVX512 (SKX) CPUs only. For HW below SSE42 IPP code is disabled. - Performance regressions fixes for IPP code paths; - cv::boxFilter integration improvement; - cv::filter2D integration improvement; --- 3rdparty/ippicv/CMakeLists.txt | 7 +- 3rdparty/ippicv/ippicv.cmake | 26 +-- CMakeLists.txt | 12 +- cmake/OpenCVFindIPP.cmake | 32 ++-- cmake/OpenCVFindIPPIW.cmake | 3 +- cmake/templates/cvconfig.h.in | 2 +- modules/core/include/opencv2/core/base.hpp | 10 +- modules/core/include/opencv2/core/private.hpp | 134 +++++++------- modules/core/src/convert.cpp | 77 ++++---- modules/core/src/copy.cpp | 34 ++-- modules/core/src/mathfuncs_core.dispatch.cpp | 8 +- modules/core/src/matmul.cpp | 2 +- modules/core/src/matrix.cpp | 5 +- modules/core/src/precomp.hpp | 6 +- modules/core/src/stat.cpp | 29 ++- modules/core/src/system.cpp | 188 +++++++++++++++++--- modules/imgproc/src/canny.cpp | 4 +- modules/imgproc/src/color.cpp | 85 +++++---- modules/imgproc/src/deriv.cpp | 18 +- modules/imgproc/src/filter.cpp | 246 ++++++++------------------ modules/imgproc/src/filterengine.hpp | 12 ++ modules/imgproc/src/histogram.cpp | 6 +- modules/imgproc/src/imgwarp.cpp | 41 +++-- modules/imgproc/src/moments.cpp | 6 + modules/imgproc/src/morph.cpp | 141 ++++++++++----- modules/imgproc/src/smooth.cpp | 134 ++++++-------- modules/imgproc/src/thresh.cpp | 17 +- modules/objdetect/src/haar.cpp | 2 +- modules/ts/include/opencv2/ts/ts_perf.hpp | 2 +- modules/ts/src/ts_func.cpp | 10 ++ 30 files changed, 721 insertions(+), 578 deletions(-) diff --git a/3rdparty/ippicv/CMakeLists.txt b/3rdparty/ippicv/CMakeLists.txt index b9fb79f..59f1788 100644 --- a/3rdparty/ippicv/CMakeLists.txt +++ b/3rdparty/ippicv/CMakeLists.txt @@ -6,7 +6,7 @@ project(${IPP_IW_LIBRARY}) ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include) add_definitions(-DIW_BUILD) -if(HAVE_IPP_ICV_ONLY) +if(HAVE_IPP_ICV) add_definitions(-DICV_BASE) endif() @@ -21,7 +21,10 @@ add_library(${IPP_IW_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs}) if(UNIX) if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers") + endif() + if (CMAKE_C_COMPILER_ID MATCHES "Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign") endif() endif() diff --git a/3rdparty/ippicv/ippicv.cmake b/3rdparty/ippicv/ippicv.cmake index d601da4..751df33 100644 --- a/3rdparty/ippicv/ippicv.cmake +++ b/3rdparty/ippicv/ippicv.cmake @@ -2,37 +2,37 @@ function(download_ippicv root_var) set(${root_var} "" PARENT_SCOPE) # Commit SHA in the opencv_3rdparty repo - set(IPPICV_COMMIT "a62e20676a60ee0ad6581e217fe7e4bada3b95db") + set(IPPICV_COMMIT "dfe3162c237af211e98b8960018b564bc209261d") # Define actual ICV versions if(APPLE) set(OPENCV_ICV_PLATFORM "macosx") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u2_mac_intel64_20170418.tgz") - set(OPENCV_ICV_HASH "0c25953c99dbb499ff502485a9356d8d") + set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20170822.tgz") + set(OPENCV_ICV_HASH "c1ebb5dfa5b7f54b0c44e1917805a463") else() - set(OPENCV_ICV_NAME "ippicv_2017u2_mac_ia32_20170418.tgz") - set(OPENCV_ICV_HASH "5f225948f3f64067c681293c098d50d8") + set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20170822.tgz") + set(OPENCV_ICV_HASH "49b05a669042753ae75895a445ebd612") endif() elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86")) set(OPENCV_ICV_PLATFORM "linux") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz") - set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a") + set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20170822.tgz") + set(OPENCV_ICV_HASH "4e0352ce96473837b1d671ce87f17359") else() - set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz") - set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e") + set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20170822.tgz") + set(OPENCV_ICV_HASH "dcdb0ba4b123f240596db1840cd59a76") endif() elseif(WIN32 AND NOT ARM) set(OPENCV_ICV_PLATFORM "windows") set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win") if(X86_64) - set(OPENCV_ICV_NAME "ippicv_2017u2_win_intel64_20170418.zip") - set(OPENCV_ICV_HASH "75060a0c662c0800f48995b7e9b085f6") + set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20170822.zip") + set(OPENCV_ICV_HASH "0421e642bc7ad741a2236d3ec4190bdd") else() - set(OPENCV_ICV_NAME "ippicv_2017u2_win_ia32_20170418.zip") - set(OPENCV_ICV_HASH "60fcf3ccd9a2ebc9e432ffb5cb91638b") + set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20170822.zip") + set(OPENCV_ICV_HASH "8a7680ae352c192de2e2e34936164bd0") endif() else() return() diff --git a/CMakeLists.txt b/CMakeLists.txt index af5e2c1..25e4a36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,7 +255,6 @@ OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) ) OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) ) -OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) ) OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT)) OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) ) @@ -1311,15 +1310,16 @@ status(" Other third-party libraries:") if(WITH_IPP AND HAVE_IPP) status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]") status(" at:" "${IPP_ROOT_DIR}") - if(NOT HAVE_IPP_ICV_ONLY) + if(NOT HAVE_IPP_ICV) status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static") endif() if(HAVE_IPP_IW) if(BUILD_IPP_IW) - status(" Use Intel IPP IW:" "build (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") + status(" Use Intel IPP IW:" "sources (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") else() - status(" Use Intel IPP IW:" "prebuilt binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") + status(" Use Intel IPP IW:" "binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})") endif() + status(" at:" "${IPP_IW_PATH}") else() status(" Use Intel IPP IW:" NO) endif() @@ -1328,10 +1328,6 @@ else() status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO) endif() -if(DEFINED WITH_IPP_A) -status(" Use Intel IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO) -endif(DEFINED WITH_IPP_A) - if(DEFINED WITH_VA) status(" Use VA:" HAVE_VA THEN "YES" ELSE NO) endif(DEFINED WITH_VA) diff --git a/cmake/OpenCVFindIPP.cmake b/cmake/OpenCVFindIPP.cmake index 04b17f7..068cef7 100644 --- a/cmake/OpenCVFindIPP.cmake +++ b/cmake/OpenCVFindIPP.cmake @@ -11,13 +11,13 @@ # # On return this will define: # -# HAVE_IPP - True if Intel IPP found -# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available -# IPP_ROOT_DIR - root of IPP installation -# IPP_INCLUDE_DIRS - IPP include folder -# IPP_LIBRARIES - IPP libraries that are used by OpenCV -# IPP_VERSION_STR - string with the newest detected IPP version -# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD) +# HAVE_IPP - True if Intel IPP found +# HAVE_IPP_ICV - True if Intel IPP ICV version is available +# IPP_ROOT_DIR - root of IPP installation +# IPP_INCLUDE_DIRS - IPP include folder +# IPP_LIBRARIES - IPP libraries that are used by OpenCV +# IPP_VERSION_STR - string with the newest detected IPP version +# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD) # IPP_VERSION_MINOR # IPP_VERSION_BUILD # @@ -25,7 +25,7 @@ # unset(HAVE_IPP CACHE) -unset(HAVE_IPP_ICV_ONLY) +unset(HAVE_IPP_ICV) unset(IPP_ROOT_DIR) unset(IPP_INCLUDE_DIRS) unset(IPP_LIBRARIES) @@ -79,7 +79,7 @@ endmacro() macro(_ipp_not_supported) message(STATUS ${ARGN}) unset(HAVE_IPP) - unset(HAVE_IPP_ICV_ONLY) + unset(HAVE_IPP_ICV) unset(IPP_VERSION_STR) return() endmacro() @@ -92,7 +92,7 @@ macro(ipp_detect_version) set(__msg) if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h) set(__msg " (ICV version)") - set(HAVE_IPP_ICV_ONLY 1) + set(HAVE_IPP_ICV 1) elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h) # nothing else() @@ -118,7 +118,7 @@ macro(ipp_detect_version) set(IPP_LIBRARY_DIR ${DIR}) endmacro() - if(APPLE AND NOT HAVE_IPP_ICV_ONLY) + if(APPLE AND NOT HAVE_IPP_ICV) _ipp_set_library_dir(${IPP_ROOT_DIR}/lib) elseif(IPP_X64) _ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64) @@ -128,7 +128,7 @@ macro(ipp_detect_version) macro(_ipp_add_library name) # dynamic linking is only supported for standalone version of Intel IPP - if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) + if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV) if (WIN32) set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX}) set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX}) @@ -141,7 +141,7 @@ macro(ipp_detect_version) set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) endif () if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) - if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) + if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV) # When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}) else () @@ -167,14 +167,14 @@ macro(ipp_detect_version) set(IPP_PREFIX "ipp") if(${IPP_VERSION_STR} VERSION_LESS "8.0") - if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) + if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV) set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x else () set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x endif () else () if(WIN32) - if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY) + if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV) set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows else () set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows @@ -184,7 +184,7 @@ macro(ipp_detect_version) endif() endif() - if(HAVE_IPP_ICV_ONLY) + if(HAVE_IPP_ICV) _ipp_add_library(icv) else() _ipp_add_library(cv) diff --git a/cmake/OpenCVFindIPPIW.cmake b/cmake/OpenCVFindIPPIW.cmake index c7927f5..10f77b1 100644 --- a/cmake/OpenCVFindIPPIW.cmake +++ b/cmake/OpenCVFindIPPIW.cmake @@ -27,6 +27,7 @@ macro(ippiw_debugmsg MESSAGE) message(STATUS "${MESSAGE}") endif() endmacro() +file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT) # This function detects Intel IPP IW version by analyzing .h file macro(ippiw_setup PATH BUILD) @@ -153,7 +154,7 @@ ippiw_setup("${IPPROOT}/../${IW_PACKAGE_SUBDIR}/" 0) # take Intel IPP IW from ICV package -if(NOT HAVE_IPP_ICV_ONLY AND BUILD_IPP_IW) +if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW) message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package") set(TEMP_ROOT 0) include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake") diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 76bb431..e92ac11 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -103,7 +103,7 @@ /* Intel Integrated Performance Primitives */ #cmakedefine HAVE_IPP -#cmakedefine HAVE_IPP_ICV_ONLY +#cmakedefine HAVE_IPP_ICV #cmakedefine HAVE_IPP_IW /* Intel IPP Async */ diff --git a/modules/core/include/opencv2/core/base.hpp b/modules/core/include/opencv2/core/base.hpp index a445599..8590d0b 100644 --- a/modules/core/include/opencv2/core/base.hpp +++ b/modules/core/include/opencv2/core/base.hpp @@ -693,8 +693,14 @@ CV_EXPORTS void setIppStatus(int status, const char * const funcname = NULL, c int line = 0); CV_EXPORTS int getIppStatus(); CV_EXPORTS String getIppErrorLocation(); -CV_EXPORTS_W bool useIPP(); -CV_EXPORTS_W void setUseIPP(bool flag); +CV_EXPORTS_W bool useIPP(); +CV_EXPORTS_W void setUseIPP(bool flag); +CV_EXPORTS_W String getIppVersion(); + +// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results +// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests. +CV_EXPORTS_W bool useIPP_NE(); +CV_EXPORTS_W void setUseIPP_NE(bool flag); } // ipp diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 61059fa..5a8e0e1 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -194,8 +194,6 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #define IPP_DISABLE_WARPAFFINE 1 // Different results #define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results #define IPP_DISABLE_REMAP 1 // Different results -#define IPP_DISABLE_MORPH_ADV 1 // mask flipping in IPP -#define IPP_DISABLE_SORT_IDX 0 // different order in index tables #define IPP_DISABLE_YUV_RGB 1 // accuracy difference #define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests #define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests @@ -205,21 +203,12 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference #define IPP_DISABLE_HAAR 1 // improper integration/results #define IPP_DISABLE_HOUGH 1 // improper integration/results -#define IPP_DISABLE_RESIZE_8U 1 // Incompatible accuracy -#define IPP_DISABLE_RESIZE_NEAREST 1 // Accuracy mismatch (max diff 1) -#define IPP_DISABLE_RESIZE_AREA 1 // Accuracy mismatch (max diff 1) - -#define IPP_DISABLE_MINMAX_NAN_SSE42 1 // cv::minMaxIdx problem with NaN input // Temporary disabled named IPP region. Performance #define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations #define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653) #define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better #define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better -#define IPP_DISABLE_PERF_HISTU32F_SSE42 1 // cv::calcHist optimizations problem -#define IPP_DISABLE_PERF_MORPH_SSE42 1 // cv::erode, cv::dilate optimizations problem -#define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem -#define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem #ifdef HAVE_IPP #include "ippversion.h" @@ -229,7 +218,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE) -#ifdef HAVE_IPP_ICV_ONLY +#ifdef HAVE_IPP_ICV #define ICV_BASE #if IPP_VERSION_X100 >= 201700 #include "ippicv.h" @@ -241,6 +230,7 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #endif #ifdef HAVE_IPP_IW #include "iw++/iw.hpp" +#include "iw/iw_ll.h" #endif #if IPP_VERSION_X100 >= 201700 @@ -251,6 +241,17 @@ CV_EXPORTS void scalarToRawData(const cv::Scalar& s, void* buf, int type, int un #define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__) +#define ippCPUID_AVX512_SKX (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ) +#define ippCPUID_AVX512_KNL (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER) + +namespace cv +{ +namespace ipp +{ +CV_EXPORTS unsigned long long getIppTopFeatures(); // Returns top major enabled IPP feature flag +} +} + static inline IppiSize ippiSize(size_t width, size_t height) { IppiSize size = { (int)width, (int)height }; @@ -322,7 +323,43 @@ static inline IppDataType ippiGetDataType(int depth) (IppDataType)-1; } +static inline int ippiSuggestThreadsNum(size_t width, size_t height, size_t elemSize, double multiplier) +{ + int threads = cv::getNumThreads(); + if(threads > 1 && height >= 64) + { + size_t opMemory = (int)(width*height*elemSize*multiplier); + int l2cache = 0; +#if IPP_VERSION_X100 >= 201700 + ippGetL2CacheSize(&l2cache); +#endif + if(!l2cache) + l2cache = 1 << 18; + + return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads))); + } + return 1; +} + +static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier) +{ + return ippiSuggestThreadsNum(image.cols, image.rows, image.elemSize(), multiplier); +} + #ifdef HAVE_IPP_IW +static inline bool ippiCheckAnchor(int x, int y, int kernelWidth, int kernelHeight) +{ + if(x != ((kernelWidth-1)/2) || y != ((kernelHeight-1)/2)) + return 0; + else + return 1; +} + +static inline ::ipp::IwiSize ippiGetSize(const cv::Size & size) +{ + return ::ipp::IwiSize((IwSize)size.width, (IwSize)size.height); +} + static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert) { return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) : @@ -341,10 +378,10 @@ static inline void ippiGetImage(const cv::Mat &src, ::ipp::IwiImage &dst) cv::Point offset; src.locateROI(origSize, offset); - inMemBorder.borderLeft = (Ipp32u)offset.x; - inMemBorder.borderTop = (Ipp32u)offset.y; - inMemBorder.borderRight = (Ipp32u)(origSize.width - src.cols - offset.x); - inMemBorder.borderBottom = (Ipp32u)(origSize.height - src.rows - offset.y); + inMemBorder.left = (IwSize)offset.x; + inMemBorder.top = (IwSize)offset.y; + inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x); + inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y); } dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step); @@ -357,7 +394,7 @@ static inline ::ipp::IwiImage ippiGetImage(const cv::Mat &src) return image; } -static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, IppiBorderSize &borderSize) +static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, ipp::IwiBorderSize &borderSize) { int inMemFlags = 0; IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED); @@ -366,91 +403,60 @@ static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorder if(!(ocvBorderType & cv::BORDER_ISOLATED)) { - if(image.m_inMemSize.borderLeft) + if(image.m_inMemSize.left) { - if(image.m_inMemSize.borderLeft >= borderSize.borderLeft) + if(image.m_inMemSize.left >= borderSize.left) inMemFlags |= ippBorderInMemLeft; else return (IppiBorderType)0; } else - borderSize.borderLeft = 0; - if(image.m_inMemSize.borderTop) + borderSize.left = 0; + if(image.m_inMemSize.top) { - if(image.m_inMemSize.borderTop >= borderSize.borderTop) + if(image.m_inMemSize.top >= borderSize.top) inMemFlags |= ippBorderInMemTop; else return (IppiBorderType)0; } else - borderSize.borderTop = 0; - if(image.m_inMemSize.borderRight) + borderSize.top = 0; + if(image.m_inMemSize.right) { - if(image.m_inMemSize.borderRight >= borderSize.borderRight) + if(image.m_inMemSize.right >= borderSize.right) inMemFlags |= ippBorderInMemRight; else return (IppiBorderType)0; } else - borderSize.borderRight = 0; - if(image.m_inMemSize.borderBottom) + borderSize.right = 0; + if(image.m_inMemSize.bottom) { - if(image.m_inMemSize.borderBottom >= borderSize.borderBottom) + if(image.m_inMemSize.bottom >= borderSize.bottom) inMemFlags |= ippBorderInMemBottom; else return (IppiBorderType)0; } else - borderSize.borderBottom = 0; + borderSize.bottom = 0; } else - borderSize.borderLeft = borderSize.borderRight = borderSize.borderTop = borderSize.borderBottom = 0; + borderSize.left = borderSize.right = borderSize.top = borderSize.bottom = 0; return (IppiBorderType)(border|inMemFlags); } -static inline ::ipp::IwValue ippiGetValue(const cv::Scalar &scalar) +static inline ::ipp::IwValueFloat ippiGetValue(const cv::Scalar &scalar) { - return ::ipp::IwValue(scalar[0], scalar[1], scalar[2], scalar[3]); + return ::ipp::IwValueFloat(scalar[0], scalar[1], scalar[2], scalar[3]); } static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier) { - int threads = cv::getNumThreads(); - if(image.m_size.height > threads) - { - size_t opMemory = (int)(image.m_step*image.m_size.height*multiplier); - int l2cache = 0; -#if IPP_VERSION_X100 >= 201700 - ippGetL2CacheSize(&l2cache); -#endif - if(!l2cache) - l2cache = 1 << 18; - - return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads))); - } - return 1; + return ippiSuggestThreadsNum(image.m_size.width, image.m_size.height, image.m_typeSize*image.m_channels, multiplier); } #endif -static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier) -{ - int threads = cv::getNumThreads(); - if(image.rows > threads) - { - size_t opMemory = (int)(image.total()*multiplier); - int l2cache = 0; -#if IPP_VERSION_X100 >= 201700 - ippGetL2CacheSize(&l2cache); -#endif - if(!l2cache) - l2cache = 1 << 18; - - return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads))); - } - return 1; -} - // IPP temporary buffer helper template class IppAutoBuffer diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index d63b53b..827612e 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -86,12 +86,6 @@ static MergeFunc getMergeFunc(int depth) } #ifdef HAVE_IPP -#ifdef HAVE_IPP_IW -extern "C" { -IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep, - IppiSize size, int typeSize, int channels); -} -#endif namespace cv { static bool ipp_split(const Mat& src, Mat* mv, int channels) @@ -114,7 +108,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels) return false; } - return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0; + return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels, 0) >= 0; } else { @@ -132,7 +126,7 @@ static bool ipp_split(const Mat& src, Mat* mv, int channels) for( size_t i = 0; i < it.nplanes; i++, ++it ) { - if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0) + if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels, 0) < 0) return false; } return true; @@ -273,12 +267,6 @@ void cv::split(InputArray _m, OutputArrayOfArrays _mv) } #ifdef HAVE_IPP -#ifdef HAVE_IPP_IW -extern "C" { -IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep, - IppiSize size, int typeSize, int channels); -} -#endif namespace cv { static bool ipp_merge(const Mat* mv, Mat& dst, int channels) @@ -301,7 +289,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels) return false; } - return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0; + return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels, 0) >= 0; } else { @@ -319,7 +307,7 @@ static bool ipp_merge(const Mat* mv, Mat& dst, int channels) for( size_t i = 0; i < it.nplanes; i++, ++it ) { - if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0) + if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels, 0) < 0) return false; } return true; @@ -820,16 +808,10 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst, } #ifdef HAVE_IPP -#ifdef HAVE_IPP_IW -extern "C" { -IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels, - IppiSize size, int typeSize, int channelsShift); -} -#endif namespace cv { -static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel) +static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel) { #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() @@ -840,14 +822,49 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel) if(src.dims != dst.dims) return false; - if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1)) + if(src.dims <= 2) + { + IppiSize size = ippiSize(src.size()); + + return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, channel, dst.ptr(), (int)dst.step, dstChannels, 0, size, (int)src.elemSize1()) >= 0; + } + else + { + const Mat *arrays[] = {&dst, NULL}; + uchar *ptrs[2] = {NULL}; + NAryMatIterator it(arrays, ptrs); + + IppiSize size = {(int)it.size, 1}; + + for( size_t i = 0; i < it.nplanes; i++, ++it ) + { + if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, channel, ptrs[1], 0, dstChannels, 0, size, (int)src.elemSize1()) < 0) + return false; + } + return true; + } +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel); + return false; +#endif +} + +static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel) +{ +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP() + + int srcChannels = src.channels(); + int dstChannels = dst.channels(); + + if(src.dims != dst.dims) return false; if(src.dims <= 2) { IppiSize size = ippiSize(src.size()); - return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0; + return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, 0, dst.ptr(), (int)dst.step, dstChannels, channel, size, (int)src.elemSize1()) >= 0; } else { @@ -859,7 +876,7 @@ static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel) for( size_t i = 0; i < it.nplanes; i++, ++it ) { - if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0) + if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, 0, ptrs[1], 0, dstChannels, channel, size, (int)src.elemSize1()) < 0) return false; } return true; @@ -893,7 +910,7 @@ void cv::extractChannel(InputArray _src, OutputArray _dst, int coi) _dst.create(src.dims, &src.size[0], depth); Mat dst = _dst.getMat(); - CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) + CV_IPP_RUN_FAST(ipp_extractChannel(src, dst, coi)) mixChannels(&src, 1, &dst, 1, ch, 1); } @@ -917,7 +934,7 @@ void cv::insertChannel(InputArray _src, InputOutputArray _dst, int coi) Mat src = _src.getMat(), dst = _dst.getMat(); - CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi)) + CV_IPP_RUN_FAST(ipp_insertChannel(src, dst, coi)) mixChannels(&src, 1, &dst, 1, ch, 1); } @@ -5152,7 +5169,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta) iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step); iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode)); } else { @@ -5168,7 +5185,7 @@ static bool ipp_convertTo(Mat &src, Mat &dst, double alpha, double beta) iwSrc.m_ptr = ptrs[0]; iwDst.m_ptr = ptrs[1]; - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode)); } } } diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index ca34e7f..5762e5b 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -49,18 +49,6 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" -#ifdef HAVE_IPP_IW -extern "C" { -IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep, - IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep); -IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep, - IppiSize size, IppDataType dataType, int channels); -IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep, - IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep); -IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep, - IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal); -} -#endif namespace cv { @@ -480,9 +468,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask) if(dst.dims <= 2) { - IppiSize size = ippiSize(dst.size()); - IppDataType dataType = ippiGetDataType(dst.depth()); - ::ipp::IwValue s; + IppiSize size = ippiSize(dst.size()); + IppDataType dataType = ippiGetDataType(dst.depth()); + ::ipp::IwValueFloat s; convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0; @@ -493,9 +481,9 @@ static bool ipp_Mat_setTo_Mat(Mat &dst, Mat &_val, Mat &mask) uchar *ptrs[2] = {NULL}; NAryMatIterator it(arrays, ptrs); - IppiSize size = {(int)it.size, 1}; - IppDataType dataType = ippiGetDataType(dst.depth()); - ::ipp::IwValue s; + IppiSize size = {(int)it.size, 1}; + IppDataType dataType = ippiGetDataType(dst.depth()); + ::ipp::IwValueFloat s; convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1); for( size_t i = 0; i < it.nplanes; i++, ++it) @@ -717,7 +705,7 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode) ::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwDst = ippiGetImage(dst); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode); } catch(::ipp::IwException) { @@ -1155,13 +1143,13 @@ static bool ipp_copyMakeBorder( Mat &_src, Mat &_dst, int top, int bottom, if(_src.dims > 2) return false; - Rect dstRect(borderSize.borderLeft, borderSize.borderTop, - _dst.cols - borderSize.borderRight - borderSize.borderLeft, - _dst.rows - borderSize.borderBottom - borderSize.borderTop); + Rect dstRect(borderSize.left, borderSize.top, + _dst.cols - borderSize.right - borderSize.left, + _dst.rows - borderSize.bottom - borderSize.top); Mat subDst = Mat(_dst, dstRect); Mat *pSrc = &_src; - return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0; + return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), borderSize, borderType, &value[0]) >= 0; #else CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right); CV_UNUSED(_borderType); CV_UNUSED(value); diff --git a/modules/core/src/mathfuncs_core.dispatch.cpp b/modules/core/src/mathfuncs_core.dispatch.cpp index b64ec4a..64d74bb 100644 --- a/modules/core/src/mathfuncs_core.dispatch.cpp +++ b/modules/core/src/mathfuncs_core.dispatch.cpp @@ -44,7 +44,8 @@ void magnitude32f(const float* x, const float* y, float* mag, int len) CV_INSTRUMENT_REGION() CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len); - CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); + // SSE42 performance issues + CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0); CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len), CV_CPU_DISPATCH_MODES_ALL); @@ -55,7 +56,8 @@ void magnitude64f(const double* x, const double* y, double* mag, int len) CV_INSTRUMENT_REGION() CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len); - CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0); + // SSE42 performance issues + CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0); CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len), CV_CPU_DISPATCH_MODES_ALL); @@ -91,7 +93,6 @@ void sqrt32f(const float* src, float* dst, int len) CV_INSTRUMENT_REGION() CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len); - CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0); CV_CPU_DISPATCH(sqrt32f, (src, dst, len), CV_CPU_DISPATCH_MODES_ALL); @@ -103,7 +104,6 @@ void sqrt64f(const double* src, double* dst, int len) CV_INSTRUMENT_REGION() CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len); - CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0); CV_CPU_DISPATCH(sqrt64f, (src, dst, len), CV_CPU_DISPATCH_MODES_ALL); diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 219089d..c84f8a7 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -3101,7 +3101,7 @@ static double dotProd_8u(const uchar* src1, const uchar* src2, int len) { double r = 0; #if ARITHM_USE_IPP - CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r); + CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r); #endif int i = 0; diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index ec09e21..4b7c257 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -4356,7 +4356,6 @@ template static void sortIdx_( const Mat& src, Mat& dst, int flags ) } #ifdef HAVE_IPP -#if !IPP_DISABLE_SORT_IDX typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer); static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending) @@ -4435,7 +4434,6 @@ static bool ipp_sortIdx( const Mat& src, Mat& dst, int flags ) return true; } #endif -#endif typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags); } @@ -4472,9 +4470,8 @@ void cv::sortIdx( InputArray _src, OutputArray _dst, int flags ) _dst.release(); _dst.create( src.size(), CV_32S ); dst = _dst.getMat(); -#if !IPP_DISABLE_SORT_IDX + CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags)); -#endif static SortFunc tab[] = { diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 255a8ad..d38e20d 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -257,7 +257,8 @@ struct CoreTLSData //#ifdef HAVE_OPENCL device(0), useOpenCL(-1), //#endif - useIPP(-1) + useIPP(-1), + useIPP_NE(-1) #ifdef HAVE_TEGRA_OPTIMIZATION ,useTegra(-1) #endif @@ -272,7 +273,8 @@ struct CoreTLSData ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized //#endif - int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized + int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized + int useIPP_NE; // 1 - use, 0 - do not use, -1 - auto/not initialized #ifdef HAVE_TEGRA_OPTIMIZATION int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized #endif diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 8018169..a978c90 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1294,6 +1294,12 @@ static bool ipp_countNonZero( Mat &src, int &res ) { CV_INSTRUMENT_REGION_IPP() +#if IPP_VERSION_X100 < 201801 + // Poor performance of SSE42 + if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) + return false; +#endif + Ipp32s count = 0; int depth = src.depth(); @@ -2531,15 +2537,16 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI #if IPP_VERSION_X100 >= 700 CV_INSTRUMENT_REGION_IPP() -#if IPP_DISABLE_MINMAX_NAN_SSE42 +#if IPP_VERSION_X100 < 201800 + // cv::minMaxIdx problem with NaN input // Disable 32F processing only - if(src.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX)) + if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) return false; #endif +#if IPP_VERSION_X100 < 201801 // cv::minMaxIdx problem with index positions on AVX -#if IPP_VERSION_X100 < 201810 - if(!mask.empty() && _maxIdx && ipp::getIppFeatures()&ippCPUID_AVX) + if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42) return false; #endif @@ -2550,8 +2557,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI IppiPoint minIdx = {-1, -1}; IppiPoint maxIdx = {-1, -1}; - float *pMinVal = (_minVal)?&minVal:NULL; - float *pMaxVal = (_maxVal)?&maxVal:NULL; + float *pMinVal = (_minVal || _minIdx)?&minVal:NULL; + float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL; IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL; IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL; @@ -2564,6 +2571,8 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI ippMinMaxFun = ipp_minIdx_wrap; else if(_maxVal && !_maxIdx && _minVal && !_minIdx) ippMinMaxFun = ipp_minMax_wrap; + else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx) + return false; else ippMinMaxFun = ipp_minMaxIndex_wrap; } @@ -2582,8 +2591,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI *_maxVal = maxVal; if(_minIdx) { +#if IPP_VERSION_X100 < 201801 // Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y) +#else + if(status == ippStsNoOperation) +#endif { _minIdx[0] = -1; _minIdx[1] = -1; @@ -2596,8 +2609,12 @@ static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minI } if(_maxIdx) { +#if IPP_VERSION_X100 < 201801 // Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y) +#else + if(status == ippStsNoOperation) +#endif { _maxIdx[0] = -1; _maxIdx[1] = -1; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 3eeb5fe..b02141f 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1909,55 +1909,146 @@ struct IPPInitSingleton public: IPPInitSingleton() { - useIPP = true; - ippStatus = 0; - funcname = NULL; - filename = NULL; - linen = 0; - ippFeatures = 0; + useIPP = true; + useIPP_NE = false; + ippStatus = 0; + funcname = NULL; + filename = NULL; + linen = 0; + cpuFeatures = 0; + ippFeatures = 0; + ippTopFeatures = 0; + pIppLibInfo = NULL; + + ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL); + if(ippStatus < 0) + { + std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl; + useIPP = false; + return; + } + ippFeatures = cpuFeatures; + bool unsupported = false; const char* pIppEnv = getenv("OPENCV_IPP"); cv::String env = pIppEnv; if(env.size()) { + env = env.toLowerCase(); + if(env.substr(0, 2) == "ne") + { + useIPP_NE = true; + env = env.substr(3, env.size()); + } + if(env == "disabled") { std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl; useIPP = false; } -#if IPP_VERSION_X100 >= 900 - else if(env == "sse") - ippFeatures = ippCPUID_SSE; - else if(env == "sse2") - ippFeatures = ippCPUID_SSE2; - else if(env == "sse3") - ippFeatures = ippCPUID_SSE3; - else if(env == "ssse3") - ippFeatures = ippCPUID_SSSE3; - else if(env == "sse41") - ippFeatures = ippCPUID_SSE41; else if(env == "sse42") - ippFeatures = ippCPUID_SSE42; - else if(env == "avx") - ippFeatures = ippCPUID_AVX; + { + if(!(cpuFeatures&ippCPUID_SSE42)) + unsupported = true; + ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42; + ippFeatures |= (cpuFeatures&ippCPUID_AES); + ippFeatures |= (cpuFeatures&ippCPUID_CLMUL); + ippFeatures |= (cpuFeatures&ippCPUID_SHA); + } else if(env == "avx2") - ippFeatures = ippCPUID_AVX2; + { + if(!(cpuFeatures&ippCPUID_AVX2)) + unsupported = true; + ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2; + ippFeatures |= (cpuFeatures&ippCPUID_AES); + ippFeatures |= (cpuFeatures&ippCPUID_CLMUL); + ippFeatures |= (cpuFeatures&ippCPUID_F16C); + ippFeatures |= (cpuFeatures&ippCPUID_ADCOX); + ippFeatures |= (cpuFeatures&ippCPUID_RDSEED); + ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW); + ippFeatures |= (cpuFeatures&ippCPUID_MPX); + } +#if defined (_M_AMD64) || defined (__x86_64__) + else if(env == "avx512") + { + if(!(cpuFeatures&ippCPUID_AVX512F)) + unsupported = true; + + ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F; + ippFeatures |= (cpuFeatures&ippCPUID_AES); + ippFeatures |= (cpuFeatures&ippCPUID_CLMUL); + ippFeatures |= (cpuFeatures&ippCPUID_F16C); + ippFeatures |= (cpuFeatures&ippCPUID_ADCOX); + ippFeatures |= (cpuFeatures&ippCPUID_RDSEED); + ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW); + ippFeatures |= (cpuFeatures&ippCPUID_MPX); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512CD); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512VL); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512BW); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512DQ); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512ER); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512PF); + ippFeatures |= (cpuFeatures&ippCPUID_AVX512VBMI); + } #endif else - std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl; + std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl; + } + + if(unsupported) + { + std::cerr << "WARNING: selected IPP features are not supported by CPU. IPP was initialized with default features" << std::endl; + ippFeatures = cpuFeatures; + } + + // Disable AVX1 since we don't track regressions for it. SSE42 will be used instead + if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2)) + ippFeatures &= ~ippCPUID_AVX; + + // IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations. + if(!( + cpuFeatures&ippCPUID_AVX512F || + cpuFeatures&ippCPUID_AVX2 || + cpuFeatures&ippCPUID_SSE42 + )) + { + useIPP = false; + return; } IPP_INITIALIZER(ippFeatures) ippFeatures = ippGetEnabledCpuFeatures(); + + // Detect top level optimizations to make comparison easier for optimizations dependent conditions + if(ippFeatures&ippCPUID_AVX512F) + { + if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX) + ippTopFeatures = ippCPUID_AVX512_SKX; + else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL) + ippTopFeatures = ippCPUID_AVX512_KNL; + else + ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration + } + else if(ippFeatures&ippCPUID_AVX2) + ippTopFeatures = ippCPUID_AVX2; + else if(ippFeatures&ippCPUID_SSE42) + ippTopFeatures = ippCPUID_SSE42; + + pIppLibInfo = ippiGetLibVersion(); } - bool useIPP; +public: + bool useIPP; + bool useIPP_NE; - int ippStatus; // 0 - all is ok, -1 - IPP functions failed + int ippStatus; // 0 - all is ok, -1 - IPP functions failed const char *funcname; const char *filename; int linen; Ipp64u ippFeatures; + Ipp64u cpuFeatures; + Ipp64u ippTopFeatures; + const IppLibraryVersion *pIppLibInfo; }; static IPPInitSingleton& getIPPSingleton() @@ -1983,6 +2074,17 @@ int getIppFeatures() #endif } +unsigned long long getIppTopFeatures(); + +unsigned long long getIppTopFeatures() +{ +#ifdef HAVE_IPP + return getIPPSingleton().ippTopFeatures; +#else + return 0; +#endif +} + void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line) { #ifdef HAVE_IPP @@ -2013,6 +2115,19 @@ String getIppErrorLocation() #endif } +String getIppVersion() +{ +#ifdef HAVE_IPP + const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo; + if(pInfo) + return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate); + else + return String("error"); +#else + return String("disabled"); +#endif +} + bool useIPP() { #ifdef HAVE_IPP @@ -2038,6 +2153,31 @@ void setUseIPP(bool flag) #endif } +bool useIPP_NE() +{ +#ifdef HAVE_IPP + CoreTLSData* data = getCoreTlsData().get(); + if(data->useIPP_NE < 0) + { + data->useIPP_NE = getIPPSingleton().useIPP_NE; + } + return (data->useIPP_NE > 0); +#else + return false; +#endif +} + +void setUseIPP_NE(bool flag) +{ + CoreTLSData* data = getCoreTlsData().get(); +#ifdef HAVE_IPP + data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false; +#else + (void)flag; + data->useIPP_NE = false; +#endif +} + } // namespace ipp } // namespace cv diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index c723c2b..b475c00 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -95,7 +95,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, ippiGetImage(dy_, iwSrcDy); ippiGetImage(dst, iwDst); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm)); } catch (::ipp::IwException ex) { @@ -121,7 +121,7 @@ static bool ipp_Canny(const Mat& src , const Mat& dx_, const Mat& dy_, Mat& dst, ippiGetImage(src, iwSrc); ippiGetImage(dst, iwDst); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl); } catch (::ipp::IwException) { diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 08153fe..545abc6 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -469,11 +469,32 @@ static ippiGeneralFunc ippiRGB2GrayC4Tab[] = 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 }; -static ippiGeneralFunc ippiCopyP3C3RTab[] = + +static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize) { - (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0, - 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0 -}; + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} +static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} +static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize); +} + +static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} +static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} +static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval) +{ + return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval); +} #if !IPP_DISABLE_RGB_XYZ static ippiGeneralFunc ippiRGB2XYZTab[] = @@ -580,48 +601,31 @@ private: Ipp32f coeffs[3]; }; +template struct IPPGray2BGRFunctor { - IPPGray2BGRFunctor(ippiGeneralFunc _func) : - ippiGrayToBGR(_func) - { - } + IPPGray2BGRFunctor(){} bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - if (ippiGrayToBGR == 0) - return false; - - const void* srcarray[3] = { src, src, src }; - return CV_INSTRUMENT_FUN_IPP(ippiGrayToBGR, srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; + return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0; } -private: - ippiGeneralFunc ippiGrayToBGR; }; +template struct IPPGray2BGRAFunctor { - IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : - ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth) + IPPGray2BGRAFunctor() { + alpha = ColorChannel::max(); } bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const { - if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0) - return false; - - const void* srcarray[3] = { src, src, src }; - Mat temp(rows, cols, CV_MAKETYPE(depth, 3)); - if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0) - return false; - int order[4] = {0, 1, 2, 3}; - return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; + return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0; } -private: - ippiGeneralFunc ippiColorConvertGeneral; - ippiReorderFunc ippiColorConvertReorder; - int depth; + + T alpha; }; struct IPPReorderGeneralFunctor @@ -9744,18 +9748,27 @@ void cvtGraytoBGR(const uchar * src_data, size_t src_step, #if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700 CV_IPP_CHECK() { + bool ippres = false; if(dcn == 3) { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) - return; + if( depth == CV_8U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); + else if( depth == CV_16U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); + else + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor()); } else if(dcn == 4) { - if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, - IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) - return; + if( depth == CV_8U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); + else if( depth == CV_16U ) + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); + else + ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor()); } + if(ippres) + return; } #endif diff --git a/modules/imgproc/src/deriv.cpp b/modules/imgproc/src/deriv.cpp index 25163c4..7f90767 100644 --- a/modules/imgproc/src/deriv.cpp +++ b/modules/imgproc/src/deriv.cpp @@ -312,7 +312,7 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi ::ipp::IwiImage iwDstProc = iwDst; ::ipp::IwiBorderSize borderSize(maskSize); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder.m_borderType) + if(!ippBorder) return false; if(srcType == ipp8u && dstType == ipp8u) @@ -324,17 +324,17 @@ static bool ipp_Deriv(InputArray _src, OutputArray _dst, int dx, int dy, int ksi { iwSrc -= borderSize; iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0, ippAlgHintFast); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast)); iwSrcProc += borderSize; } if(useScharr) - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder); else - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder); if(useScale) - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta, ippAlgHintFast); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast)); } catch (::ipp::IwException) { @@ -732,7 +732,7 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s ::ipp::IwiImage iwDstProc = iwDst; ::ipp::IwiBorderSize borderSize(maskSize); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder.m_borderType) + if(!ippBorder) return false; if(srcType == ipp8u && dstType == ipp8u) @@ -744,14 +744,14 @@ static bool ipp_Laplacian(InputArray _src, OutputArray _dst, int ksize, double s { iwSrc -= borderSize; iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0); iwSrcProc += borderSize; } - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, &iwSrcProc, &iwDstProc, maskSize, ippBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder); if(useScale) - CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta); } catch (::ipp::IwException ex) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index da14b57..e732ce5 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -4560,170 +4560,88 @@ static bool replacementFilter2D(int stype, int dtype, int kernel_type, } #ifdef HAVE_IPP -typedef IppStatus(CV_STDCALL* IppiFilterBorder)( - const void* pSrc, int srcStep, void* pDst, int dstStep, - IppiSize dstRoiSize, IppiBorderType border, const void* borderValue, - const IppiFilterBorderSpec* pSpec, Ipp8u* pBuffer); - -static IppiFilterBorder getIppFunc(int stype) +static bool ippFilter2D(int stype, int dtype, int kernel_type, + uchar * src_data, size_t src_step, + uchar * dst_data, size_t dst_step, + int width, int height, + int full_width, int full_height, + int offset_x, int offset_y, + uchar * kernel_data, size_t kernel_step, + int kernel_width, int kernel_height, + int anchor_x, int anchor_y, + double delta, int borderType, + bool isSubmatrix) { - switch (stype) - { - case CV_8UC1: - return reinterpret_cast(ippiFilterBorder_8u_C1R); - case CV_8UC3: - return reinterpret_cast(ippiFilterBorder_8u_C3R); - case CV_8UC4: - return reinterpret_cast(ippiFilterBorder_8u_C4R); - case CV_16UC1: - return reinterpret_cast(ippiFilterBorder_16u_C1R); - case CV_16UC3: - return reinterpret_cast(ippiFilterBorder_16u_C3R); - case CV_16UC4: - return reinterpret_cast(ippiFilterBorder_16u_C4R); - case CV_16SC1: - return reinterpret_cast(ippiFilterBorder_16s_C1R); - case CV_16SC3: - return reinterpret_cast(ippiFilterBorder_16s_C3R); - case CV_16SC4: - return reinterpret_cast(ippiFilterBorder_16s_C4R); - case CV_32FC1: - return reinterpret_cast(ippiFilterBorder_32f_C1R); - case CV_32FC3: - return reinterpret_cast(ippiFilterBorder_32f_C3R); - case CV_32FC4: - return reinterpret_cast(ippiFilterBorder_32f_C4R); - default: - return 0; - } -} +#ifdef HAVE_IPP_IW + CV_INSTRUMENT_REGION_IPP(); -template -struct IppFilterTrait { }; + ::ipp::IwiSize iwSize(width, height); + ::ipp::IwiSize kernelSize(kernel_width, kernel_height); + IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype)); + int channels = CV_MAT_CN(stype); -template <> -struct IppFilterTrait -{ - enum { kernel_type_id = CV_16SC1 }; - typedef Ipp16s kernel_type; - typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize); - inline static copy_fun_type get_copy_fun() { return ippiCopy_16s_C1R; } - inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec) - { - return ippiFilterBorderInit_16s(pKernel, kernelSize, divisor, dataType, numChannels, roundMode, pSpec); - } -}; + CV_UNUSED(isSubmatrix); -template <> -struct IppFilterTrait -{ - enum { kernel_type_id = CV_32FC1 }; - typedef Ipp32f kernel_type; - typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize); - inline static copy_fun_type get_copy_fun() { return ippiCopy_32f_C1R; } - inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec) - { - CV_UNUSED(divisor); - return ippiFilterBorderInit_32f(pKernel, kernelSize, dataType, numChannels, roundMode, pSpec); - } -}; +#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel + if(kernel_width == 1 && kernel_height == 1) + return false; +#endif -template -static bool ippFilter2D(int stype, int dtype, - uchar * src_data, size_t src_step, - uchar * dst_data, size_t dst_step, - int width, int height, - uchar * kernel_data, size_t kernel_step, - int kernel_width, int kernel_height, - int anchor_x, int anchor_y, - double delta, int borderType, bool isSubmatrix) -{ - CV_INSTRUMENT_REGION_IPP(); +#if IPP_VERSION_X100 < 201801 + // Too big difference compared to OpenCV FFT-based convolution + if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7)) + return false; - typedef IppFilterTrait trait; - typedef typename trait::kernel_type kernel_type; + // Poor optimization for big kernels + if(kernel_width > 7 || kernel_height > 7) + return false; +#endif - IppAutoBuffer spec; - IppAutoBuffer buffer; - IppAutoBuffer kernelBuffer; - IppiBorderType ippBorderType; - int src_type; + if(src_data == dst_data) + return false; - Point anchor(anchor_x, anchor_y); -#if IPP_VERSION_X100 >= 900 - Point ippAnchor((kernel_width - 1) / 2, (kernel_height - 1) / 2); -#else - Point ippAnchor(kernel_width >> 1, kernel_height >> 1); -#endif - bool isIsolated = (borderType & BORDER_ISOLATED) != 0; - int borderTypeNI = borderType & ~BORDER_ISOLATED; - ippBorderType = ippiGetBorderType(borderTypeNI); - int ddepth = CV_MAT_DEPTH(dtype); - int sdepth = CV_MAT_DEPTH(stype); + if(stype != dtype) + return false; -#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel - if(kernel_width == 1 && kernel_height == 1) + if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1) return false; -#endif - bool runIpp = true - && (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) - && (sdepth == ddepth) - && (getIppFunc(stype)) - && ((int)ippBorderType > 0) - && (!isSubmatrix || isIsolated) - && (std::fabs(delta - 0) < DBL_EPSILON) - && (ippAnchor == anchor) - && src_data != dst_data; - - if (!runIpp) + // TODO: Implement offset for 8u, 16u + if(std::fabs(delta) >= DBL_EPSILON) return false; - src_type = stype; - int cn = CV_MAT_CN(stype); - IppiSize kernelSize = { kernel_width, kernel_height }; - IppDataType dataType = ippiGetDataType(ddepth); - IppDataType kernelType = ippiGetDataType(kdepth); - Ipp32s specSize = 0; - Ipp32s bufsize = 0; - IppiSize dstRoiSize = { width, height }; - IppStatus status; - - status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize); - if (status < 0) + if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height)) return false; - kernel_type* pKerBuffer = (kernel_type*)kernel_data; - size_t good_kernel_step = sizeof(kernel_type) * static_cast(kernelSize.width); -#if IPP_VERSION_X100 >= 900 - if (kernel_step != good_kernel_step) { - kernelBuffer.allocate((int)good_kernel_step * kernelSize.height); - status = trait::get_copy_fun()((kernel_type*)kernel_data, (int)kernel_step, kernelBuffer, (int)good_kernel_step, kernelSize); - if (status < 0) + try + { + ::ipp::IwiBorderSize iwBorderSize; + ::ipp::IwiBorderType iwBorderType; + ::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step); + ::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step); + ::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step); + + iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize); + iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize); + if(!iwBorderType) return false; - pKerBuffer = kernelBuffer; + + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType); } -#else - kernelBuffer.Alloc(good_kernel_step * kernelSize.height); - Mat kerFlip(Size(kernelSize.width, kernelSize.height), trait::kernel_type_id, kernelBuffer, (int)good_kernel_step); - Mat kernel(Size(kernel_width, kernel_height), trait::kernel_type_id, kernel_data, kernel_step); - flip(kernel, kerFlip, -1); - pKerBuffer = kernelBuffer; -#endif - spec.allocate(specSize); - buffer.allocate(bufsize); - status = trait::runInit(pKerBuffer, kernelSize, 0, dataType, cn, ippRndFinancial, spec); - if (status < 0) { + catch(::ipp::IwException ex) + { return false; } - IppiFilterBorder ippiFilterBorder = getIppFunc(src_type); - kernel_type borderValue[4] = { 0, 0, 0, 0 }; - status = CV_INSTRUMENT_FUN_IPP(ippiFilterBorder, src_data, (int)src_step, dst_data, (int)dst_step, dstRoiSize, ippBorderType, borderValue, spec, buffer); - if (status >= 0) { - CV_IMPL_ADD(CV_IMPL_IPP); - return true; - } + + return true; +#else + CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step); + CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width); + CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step); + CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta); + CV_UNUSED(borderType); CV_UNUSED(isSubmatrix); return false; +#endif } #endif @@ -4902,34 +4820,18 @@ void filter2D(int stype, int dtype, int kernel_type, delta, borderType, isSubmatrix); if (res) return; -#ifdef HAVE_IPP - CV_IPP_CHECK() - { - res = false; - if (kernel_type == CV_32FC1) { - res = ippFilter2D(stype, dtype, - src_data, src_step, - dst_data, dst_step, - width, height, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType, isSubmatrix); - } - else if (kernel_type == CV_16SC1) { - res = ippFilter2D(stype, dtype, - src_data, src_step, - dst_data, dst_step, - width, height, - kernel_data, kernel_step, - kernel_width, kernel_height, - anchor_x, anchor_y, - delta, borderType, isSubmatrix); - } - if (res) - return; - } -#endif + + CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type, + src_data, src_step, + dst_data, dst_step, + width, height, + full_width, full_height, + offset_x, offset_y, + kernel_data, kernel_step, + kernel_width, kernel_height, + anchor_x, anchor_y, + delta, borderType, isSubmatrix)) + res = dftFilter2D(stype, dtype, kernel_type, src_data, src_step, dst_data, dst_step, diff --git a/modules/imgproc/src/filterengine.hpp b/modules/imgproc/src/filterengine.hpp index 9110668..63e74a3 100644 --- a/modules/imgproc/src/filterengine.hpp +++ b/modules/imgproc/src/filterengine.hpp @@ -370,6 +370,18 @@ void crossCorr( const Mat& src, const Mat& templ, Mat& dst, Point anchor=Point(0,0), double delta=0, int borderType=BORDER_REFLECT_101 ); + } +#ifdef HAVE_IPP_IW +static inline bool ippiCheckAnchor(cv::Point anchor, cv::Size ksize) +{ + anchor = cv::normalizeAnchor(anchor, ksize); + if(anchor.x != ((ksize.width-1)/2) || anchor.y != ((ksize.height-1)/2)) + return 0; + else + return 1; +} +#endif + #endif diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 33f15c5..b5c5775 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -1194,10 +1194,8 @@ static IppiHistogram_C1 getIppiHistogramFunction_C1(int type) { IppiHistogram_C1 ippFunction = (type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R : -#if IPP_VERSION_X100 >= 201700 || !(defined HAVE_IPP_ICV_ONLY) (type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R : (type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R : -#endif NULL; return ippFunction; @@ -1401,9 +1399,9 @@ static bool ipp_calchist(const Mat &image, Mat &hist, int histSize, const float* { CV_INSTRUMENT_REGION_IPP() +#if IPP_VERSION_X100 < 201801 // No SSE42 optimization for uniform 32f -#if IPP_DISABLE_PERF_HISTU32F_SSE42 - if(uniform && image.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX)) + if(uniform && image.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) return false; #endif diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index a960c01..69ccbac 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2971,8 +2971,8 @@ public: try { - ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(iwiResize, &m_src, &m_dst, &roi); + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(iwiResize, m_src, m_dst, ippBorderRepl, tile); } catch(::ipp::IwException) { @@ -3007,7 +3007,7 @@ public: {0, scaleY, shift+0.5*scaleY} }; - iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, ippWarpForward, inter, ::ipp::IwiWarpAffineParams(0, 0.75, 0), ippBorderRepl); + iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, iwTransForward, inter, ::ipp::IwiWarpAffineParams(0, 0, 0.75), ippBorderRepl); m_ok = true; } @@ -3021,8 +3021,8 @@ public: try { - ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, &m_src, &m_dst, &roi); + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, m_src, m_dst, tile); } catch(::ipp::IwException) { @@ -3053,23 +3053,28 @@ static bool ipp_resize(const uchar * src_data, size_t src_step, int src_width, i if(ippInter < 0) return false; -#if IPP_DISABLE_RESIZE_NEAREST - if(ippInter == ippNearest) - return false; -#endif - -#if IPP_DISABLE_RESIZE_AREA - if(ippInter == ippSuper) - return false; -#endif + // Resize which doesn't match OpenCV exactly + if(!cv::ipp::useIPP_NE()) + { + if(ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear)) + return false; + } if(ippInter != ippLinear && ippDataType == ipp64f) return false; - // Accuracy mismatch is 1 but affects detectors greatly -#if IPP_DISABLE_RESIZE_8U - if(ippDataType == ipp8u && ippInter == ippLinear) - return false; +#if IPP_VERSION_X100 < 201801 + // Degradations on int^2 linear downscale + if(ippDataType != ipp64f && ippInter == ippLinear && inv_scale_x < 1 && inv_scale_y < 1) // if downscale + { + int scale_x = (int)(1/inv_scale_x); + int scale_y = (int)(1/inv_scale_y); + if(1/inv_scale_x - scale_x < DBL_EPSILON && 1/inv_scale_y - scale_y < DBL_EPSILON) // if integer + { + if(!(scale_x&(scale_x-1)) && !(scale_y&(scale_y-1))) // if power of 2 + return false; + } + } #endif bool affine = false; diff --git a/modules/imgproc/src/moments.cpp b/modules/imgproc/src/moments.cpp index d9ae2e2..00e74f7 100644 --- a/modules/imgproc/src/moments.cpp +++ b/modules/imgproc/src/moments.cpp @@ -571,6 +571,12 @@ static bool ipp_moments(Mat &src, Moments &m ) #if IPP_VERSION_X100 >= 900 CV_INSTRUMENT_REGION_IPP() +#if IPP_VERSION_X100 < 201801 + // Degradations for CV_8UC1 + if(src.type() == CV_8UC1) + return false; +#endif + IppiSize roi = { src.cols, src.rows }; IppiPoint point = { 0, 0 }; int type = src.type(); diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index bf35ee3..ff31f3a 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1140,20 +1140,41 @@ static bool ippMorph(int op, int src_type, int dst_type, #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() - // Problem with SSE42 optimizations -#if IPP_DISABLE_PERF_MORPH_SSE42 - if(!(ipp::getIppFeatures()&ippCPUID_AVX)) +#if IPP_VERSION_X100 < 201800 + // Problem with SSE42 optimizations performance + if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42) + return false; + + // Different mask flipping + if(op == MORPH_GRADIENT) + return false; +#endif + +#if IPP_VERSION_X100 < 201801 + // Problem with AVX512 optimizations performance + if(cv::ipp::getIppTopFeatures()&ippCPUID_AVX512F) + return false; + + // Multiple iterations on small mask is not effective in current integration + // Implace imitation for 3x3 kernel is not efficient + // Advanced morphology for small mask introduces degradations + if((iterations > 1 || src_data == dst_data || (op != MORPH_ERODE && op != MORPH_DILATE)) && kernel_width*kernel_height < 25) + return false; + + // Skip even mask sizes for advanced morphology since they can produce out of spec writes + if((op != MORPH_ERODE && op != MORPH_DILATE) && (!(kernel_width&1) || !(kernel_height&1))) return false; #endif - ::ipp::IwAutoBuffer kernelTempBuffer; + IppAutoBuffer kernelTempBuffer; ::ipp::IwiBorderSize iwBorderSize; + ::ipp::IwiBorderSize iwBorderSize2; ::ipp::IwiBorderType iwBorderType; + ::ipp::IwiBorderType iwBorderType2; ::ipp::IwiImage iwMask; ::ipp::IwiImage iwInter; ::ipp::IwiSize initSize(width, height); ::ipp::IwiSize kernelSize(kernel_width, kernel_height); - ::ipp::IwiPoint anchor(anchor_x, anchor_y); IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type)); int channels = CV_MAT_CN(src_type); IwiMorphologyType morphType = ippiGetMorphologyType(op); @@ -1169,68 +1190,99 @@ static bool ippMorph(int op, int src_type, int dst_type, if(src_type != dst_type) return false; + if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height)) + return false; + try { ::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step); ::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step); - ::ipp::iwiFilterMorphology_GetBorderSize(morphType, kernelSize, iwBorderSize); - if(morphType != iwiMorphErode && morphType != iwiMorphDilate) - { - iwBorderSize.borderLeft /= 2; - iwBorderSize.borderTop /= 2; - iwBorderSize.borderRight /= 2; - iwBorderSize.borderBottom /= 2; - } - + iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize); iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize); - if(!iwBorderType.m_borderType || ((iwBorderType.m_borderFlags&ippBorderInMem) && (iwBorderType.m_borderFlags&ippBorderInMem) != ippBorderInMem)) + if(!iwBorderType) return false; - - if(iwBorderType.m_borderType == ippBorderConst) + if(iterations > 1) { - if(Vec(borderValue) == morphologyDefaultBorderValue()) - iwBorderType.m_borderType = ippBorderDefault; - else - iwBorderType.SetValue(borderValue[0], borderValue[1], borderValue[2], borderValue[3]); + // Check dst border for second and later iterations + iwBorderSize2 = ::ipp::iwiSizeToBorderSize(kernelSize); + iwBorderType2 = ippiGetBorder(iwDst, borderType, iwBorderSize2); + if(!iwBorderType2) + return false; } - if(morphType != iwiMorphErode && morphType != iwiMorphDilate) + + if(morphType != iwiMorphErode && morphType != iwiMorphDilate && morphType != iwiMorphGradient) { - if((iwBorderType.m_borderFlags&ippBorderInMem) == ippBorderInMem) - iwBorderType.m_borderFlags = ippBorderFirstStageInMem; + // For now complex morphology support only InMem around all sides. This will be improved later. + if((iwBorderType&ippBorderInMem) && (iwBorderType&ippBorderInMem) != ippBorderInMem) + return false; + + if((iwBorderType&ippBorderInMem) == ippBorderInMem) + { + iwBorderType &= ~ippBorderInMem; + iwBorderType &= ippBorderFirstStageInMem; + } } - // Test input parameters on dummy structures + if(iwBorderType.StripFlags() == ippBorderConst) { - ::ipp::IwiImage testSrc(initSize, type, channels); - ::ipp::IwiImage testDst(initSize, type, channels); - ::ipp::IwiImage testMask(ippiSize(kernel_width, kernel_height), ipp8u, CV_MAT_CN(kernel_type)); - - ::ipp::iwiFilterMorphology(&testSrc, &testDst, morphType, &testMask, &anchor, iwBorderType); + if(Vec(borderValue) == morphologyDefaultBorderValue()) + iwBorderType.SetType(ippBorderDefault); + else + iwBorderType.m_value = ::ipp::IwValueFloat(borderValue[0], borderValue[1], borderValue[2], borderValue[3]); } iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step); - if((int)kernel_step != kernel_width || CV_MAT_DEPTH(kernel_type) != CV_8U) + + ::ipp::IwiImage iwMaskLoc = iwMask; + if(morphType == iwiMorphDilate) { - kernelTempBuffer.Alloc(kernel_width*kernel_height); - ::ipp::IwiImage iwMaskTmp(ippiSize(kernel_width, kernel_height), ipp8u, 1, 0, kernelTempBuffer, kernel_width); - ::ipp::iwiScale(&iwMask, &iwMaskTmp, 1, 0); - iwMask = iwMaskTmp; + iwMaskLoc.Alloc(iwMask.m_size, iwMask.m_dataType, iwMask.m_channels); + ::ipp::iwiMirror(iwMask, iwMaskLoc, ippAxsBoth); + iwMask = iwMaskLoc; } if(iterations > 1) { - iwInter.Alloc(initSize, type, channels); + // OpenCV uses in mem border from dst for two and more iterations, so we need to keep this border in intermediate image + iwInter.Alloc(initSize, type, channels, iwBorderSize2); ::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst}; - ::ipp::IwiBorderType iterBorder = iwBorderType; - iterBorder.m_borderFlags = 0; - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType); + + // Copy border only + { + if(iwBorderSize2.top) + { + ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, -iwBorderSize2.top, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.top); + ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi); + ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi); + } + if(iwBorderSize2.bottom) + { + ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, iwDst.m_size.height, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.bottom); + ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi); + ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi); + } + if(iwBorderSize2.left) + { + ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, 0, iwBorderSize2.left, iwDst.m_size.height); + ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi); + ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi); + } + if(iwBorderSize2.right) + { + ::ipp::IwiRoi borderRoi(iwDst.m_size.width, 0, iwBorderSize2.left, iwDst.m_size.height); + ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi); + ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi); + } + } + iwBorderType2.SetType(iwBorderType); for(int i = 0; i < iterations-1; i++) - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, pSwap[i&0x1], pSwap[(i+1)&0x1], morphType, &iwMask, NULL, iterBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, *pSwap[i&0x1], *pSwap[(i+1)&0x1], morphType, iwMask, ::ipp::IwDefault(), iwBorderType2); if(iterations&0x1) - CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst); } else { @@ -1238,11 +1290,11 @@ static bool ippMorph(int op, int src_type, int dst_type, { iwInter.Alloc(initSize, type, channels); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst); } else - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwDst, morphType, &iwMask, NULL, iwBorderType); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwDst, morphType, iwMask, ::ipp::IwDefault(), iwBorderType); } } catch(::ipp::IwException ex) @@ -1912,6 +1964,7 @@ static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op, } #endif +#define IPP_DISABLE_MORPH_ADV 1 #ifdef HAVE_IPP #if !IPP_DISABLE_MORPH_ADV namespace cv { diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index dbddd33..7d6dc72 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1729,80 +1729,47 @@ namespace cv { static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType) { +#ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() - // Problem with SSE42 optimization for 16s -#if IPP_DISABLE_PERF_BOX16S_SSE42 - if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX)) +#if IPP_VERSION_X100 < 201801 + // Problem with SSE42 optimization for 16s and some 8u modes + if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5)))) return false; -#endif - int stype = src.type(), cn = CV_MAT_CN(stype); - IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED); - IppDataType ippType = ippiGetDataType(stype); - Point ocvAnchor, ippAnchor; - ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x; - ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y; - ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0); - ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0); - - if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() && - (ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */ - ippBorderType == ippBorderConst || - ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value - { - IppStatus status; - Ipp32s bufSize = 0; - IppiSize roiSize = { dst.cols, dst.rows }; - IppiSize maskSize = { ksize.width, ksize.height }; - IppAutoBuffer buffer; - - if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0) - return false; + // Other optimizations has some degradations too + if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5)))) + return false; +#endif - buffer.allocate(bufSize); + if(!normalize) + return false; - #define IPP_FILTER_BOX_BORDER(ippType, flavor)\ - {\ - ippType borderValue[4] = { 0, 0, 0, 0 };\ - status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr(), (int)src.step, dst.ptr(),\ - (int)dst.step, roiSize, maskSize,\ - ippBorderType, borderValue, buffer);\ - } + if(!ippiCheckAnchor(anchor, ksize)) + return false; - if (stype == CV_8UC1) - IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R) - else if (stype == CV_8UC3) - IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R) - else if (stype == CV_8UC4) - IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R) - else if (stype == CV_16UC1) - IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R) - else if (stype == CV_16UC3) - IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R) - else if (stype == CV_16UC4) - IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R) - else if (stype == CV_16SC1) - IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R) - else if (stype == CV_16SC3) - IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R) - else if (stype == CV_16SC4) - IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R) - else if (stype == CV_32FC1) - IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R) - else if (stype == CV_32FC3) - IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R) - else if (stype == CV_32FC4) - IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R) - else + try + { + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiSize iwKSize = ippiGetSize(ksize); + ::ipp::IwiBorderSize borderSize(iwKSize); + ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); + if(!ippBorder) return false; - if(status >= 0) - return true; + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder); + } + catch (::ipp::IwException) + { + return false; } -#undef IPP_FILTER_BOX_BORDER + return true; +#else + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType); return false; +#endif } } #endif @@ -2241,8 +2208,11 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, #endif #ifdef HAVE_IPP -#define IPP_DISABLE_FILTERING_INMEM_PARTIAL 1 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling +#if IPP_VERSION_X100 == 201702 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling +#define IPP_GAUSSIANBLUR_PARALLEL 0 +#else #define IPP_GAUSSIANBLUR_PARALLEL 1 +#endif #ifdef HAVE_IPP_IW @@ -2266,8 +2236,8 @@ public: try { - ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &m_src, &m_dst, m_kernelSize, m_sigma, m_border, &roi); + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile); } catch(::ipp::IwException e) { @@ -2295,7 +2265,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 <= 201702 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) +#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); return false; // bug on ia32 #else @@ -2313,17 +2283,15 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, { Mat src = _src.getMat(); Mat dst = _dst.getMat(); - ::ipp::IwiImage iwSrc = ippiGetImage(src); - ::ipp::IwiImage iwDst = ippiGetImage(dst); - ::ipp::IwiBorderSize borderSize(::ipp::IwiSize(ippiSize(ksize))); + ::ipp::IwiImage iwSrc = ippiGetImage(src); + ::ipp::IwiImage iwDst = ippiGetImage(dst); + ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder.m_borderType) + if(!ippBorder) return false; - const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL && - ((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem; const int threads = ippiSuggestThreadsNum(iwDst, 2); - if(!disableThreading && IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { + if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) { bool ok; ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok); @@ -2335,7 +2303,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, if(!ok) return false; } else { - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &iwSrc, &iwDst, ksize.width, (float) sigma1, ippBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder); } } catch (::ipp::IwException ex) @@ -3411,6 +3379,12 @@ static bool ipp_medianFilter(Mat &src0, Mat &dst, int ksize) { CV_INSTRUMENT_REGION_IPP() +#if IPP_VERSION_X100 < 201801 + // Degradations for big kernel + if(ksize > 7) + return false; +#endif + { int bufSize; IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize); @@ -4279,8 +4253,8 @@ public: try { - ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start); - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi); + ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile); } catch(::ipp::IwException) { @@ -4318,13 +4292,11 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do ::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiBorderSize borderSize(radius); ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize)); - if(!ippBorder.m_borderType) + if(!ippBorder) return false; - const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL && - ((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem; const int threads = ippiSuggestThreadsNum(iwDst, 2); - if(!disableThreading && IPP_BILATERAL_PARALLEL && threads > 1) { + if(IPP_BILATERAL_PARALLEL && threads > 1) { bool ok = true; Range range(0, (int)iwDst.m_size.height); ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok); @@ -4336,7 +4308,7 @@ static bool ipp_bilateralFilter(Mat &src, Mat &dst, int d, double sigmaColor, do if(!ok) return false; } else { - CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &iwSrc, &iwDst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, ippBorder); + CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder); } } catch (::ipp::IwException) diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index 674d12b..130583c 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -962,19 +962,18 @@ static bool ipp_getThreshVal_Otsu_8u( const unsigned char* _src, int step, Size { CV_INSTRUMENT_REGION_IPP() -#if IPP_VERSION_X100 >= 810 - int ippStatus = -1; +// Performance degradations +#if IPP_VERSION_X100 >= 201800 IppiSize srcSize = { size.width, size.height }; - CV_SUPPRESS_DEPRECATED_START - ippStatus = CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh); - CV_SUPPRESS_DEPRECATED_END - if(ippStatus >= 0) - return true; + if(CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh) < 0) + return false; + + return true; #else CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh); -#endif return false; +#endif } #endif @@ -992,7 +991,7 @@ getThreshVal_Otsu_8u( const Mat& _src ) #ifdef HAVE_IPP unsigned char thresh; - CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh); + CV_IPP_RUN_FAST(ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh); #endif const int N = 256; diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp index eea71c4..5884ff8 100644 --- a/modules/objdetect/src/haar.cpp +++ b/modules/objdetect/src/haar.cpp @@ -136,7 +136,7 @@ icvReleaseHidHaarClassifierCascade( CvHidHaarClassifierCascade** _cascade ) for( i = 0; i < cascade->count; i++ ) { if( cascade->ipp_stages[i] ) -#if IPP_VERSION_X100 < 900 +#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] ); #else cvFree(&cascade->ipp_stages[i]); diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 288d8d7..37e1586 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -339,7 +339,7 @@ typedef struct ImplData // convert flags register to more handy variables void flagsToVars(int flags) { -#if defined(HAVE_IPP_ICV_ONLY) +#if defined(HAVE_IPP_ICV) ipp = 0; icv = ((flags&CV_IMPL_IPP) > 0); #else diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 702bdf3..5d229cf 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -3078,6 +3078,16 @@ void printVersionInfo(bool useStdOut) ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; #endif + +#ifdef HAVE_IPP + const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled"; + ::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization); + if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl; + + cv::String ippVer = cv::ipp::getIppVersion(); + ::testing::Test::RecordProperty("cv_ipp_version", ippVer); + if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl; +#endif } -- 2.7.4