- Optimizations set change. Now IPP integrations will provide code for SSE42, AVX2 and AVX512 (SKX) CPUs only. For HW below SSE42 IPP code is disabled.
- Performance regressions fixes for IPP code paths;
- cv::boxFilter integration improvement;
- cv::filter2D integration improvement;
ocv_include_directories(${IPP_INCLUDE_DIRS} ${IPP_IW_PATH}/include)
add_definitions(-DIW_BUILD)
-if(HAVE_IPP_ICV_ONLY)
+if(HAVE_IPP_ICV)
add_definitions(-DICV_BASE)
endif()
if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers")
+ endif()
+ if (CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign")
endif()
endif()
set(${root_var} "" PARENT_SCOPE)
# Commit SHA in the opencv_3rdparty repo
- set(IPPICV_COMMIT "a62e20676a60ee0ad6581e217fe7e4bada3b95db")
+ set(IPPICV_COMMIT "dfe3162c237af211e98b8960018b564bc209261d")
# Define actual ICV versions
if(APPLE)
set(OPENCV_ICV_PLATFORM "macosx")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_mac")
if(X86_64)
- set(OPENCV_ICV_NAME "ippicv_2017u2_mac_intel64_20170418.tgz")
- set(OPENCV_ICV_HASH "0c25953c99dbb499ff502485a9356d8d")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_mac_intel64_general_20170822.tgz")
+ set(OPENCV_ICV_HASH "c1ebb5dfa5b7f54b0c44e1917805a463")
else()
- set(OPENCV_ICV_NAME "ippicv_2017u2_mac_ia32_20170418.tgz")
- set(OPENCV_ICV_HASH "5f225948f3f64067c681293c098d50d8")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_mac_ia32_general_20170822.tgz")
+ set(OPENCV_ICV_HASH "49b05a669042753ae75895a445ebd612")
endif()
elseif((UNIX AND NOT ANDROID) OR (UNIX AND ANDROID_ABI MATCHES "x86"))
set(OPENCV_ICV_PLATFORM "linux")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
if(X86_64)
- set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_intel64_20170418.tgz")
- set(OPENCV_ICV_HASH "87cbdeb627415d8e4bc811156289fa3a")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_intel64_general_20170822.tgz")
+ set(OPENCV_ICV_HASH "4e0352ce96473837b1d671ce87f17359")
else()
- set(OPENCV_ICV_NAME "ippicv_2017u2_lnx_ia32_20170418.tgz")
- set(OPENCV_ICV_HASH "f2cece00d802d4dea86df52ed095257e")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_lnx_ia32_general_20170822.tgz")
+ set(OPENCV_ICV_HASH "dcdb0ba4b123f240596db1840cd59a76")
endif()
elseif(WIN32 AND NOT ARM)
set(OPENCV_ICV_PLATFORM "windows")
set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
if(X86_64)
- set(OPENCV_ICV_NAME "ippicv_2017u2_win_intel64_20170418.zip")
- set(OPENCV_ICV_HASH "75060a0c662c0800f48995b7e9b085f6")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_win_intel64_general_20170822.zip")
+ set(OPENCV_ICV_HASH "0421e642bc7ad741a2236d3ec4190bdd")
else()
- set(OPENCV_ICV_NAME "ippicv_2017u2_win_ia32_20170418.zip")
- set(OPENCV_ICV_HASH "60fcf3ccd9a2ebc9e432ffb5cb91638b")
+ set(OPENCV_ICV_NAME "ippicv_2017u3_win_ia32_general_20170822.zip")
+ set(OPENCV_ICV_HASH "8a7680ae352c192de2e2e34936164bd0")
endif()
else()
return()
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF (WIN32 AND NOT WINRT) )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF (WIN32 AND NOT WINRT) )
-OCV_OPTION(WITH_IPP_A "Include Intel IPP_A support" OFF IF (MSVC OR X86 OR X86_64) )
OCV_OPTION(WITH_MATLAB "Include Matlab support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT))
OCV_OPTION(WITH_VA "Include VA support" OFF IF (UNIX AND NOT ANDROID) )
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
if(WITH_IPP AND HAVE_IPP)
status(" Use Intel IPP:" "${IPP_VERSION_STR} [${IPP_VERSION_MAJOR}.${IPP_VERSION_MINOR}.${IPP_VERSION_BUILD}]")
status(" at:" "${IPP_ROOT_DIR}")
- if(NOT HAVE_IPP_ICV_ONLY)
+ if(NOT HAVE_IPP_ICV)
status(" linked:" BUILD_WITH_DYNAMIC_IPP THEN "dynamic" ELSE "static")
endif()
if(HAVE_IPP_IW)
if(BUILD_IPP_IW)
- status(" Use Intel IPP IW:" "build (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
+ status(" Use Intel IPP IW:" "sources (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
else()
- status(" Use Intel IPP IW:" "prebuilt binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
+ status(" Use Intel IPP IW:" "binaries (${IW_VERSION_MAJOR}.${IW_VERSION_MINOR}.${IW_VERSION_UPDATE})")
endif()
+ status(" at:" "${IPP_IW_PATH}")
else()
status(" Use Intel IPP IW:" NO)
endif()
status(" Use Intel IPP IW:" WITH_IPP AND NOT HAVE_IPP AND HAVE_IPP_IW THEN "IPP not found or implicitly disabled" ELSE NO)
endif()
-if(DEFINED WITH_IPP_A)
-status(" Use Intel IPP Async:" HAVE_IPP_A THEN "YES" ELSE NO)
-endif(DEFINED WITH_IPP_A)
-
if(DEFINED WITH_VA)
status(" Use VA:" HAVE_VA THEN "YES" ELSE NO)
endif(DEFINED WITH_VA)
#
# On return this will define:
#
-# HAVE_IPP - True if Intel IPP found
-# HAVE_IPP_ICV_ONLY - True if Intel IPP ICV version is available
-# IPP_ROOT_DIR - root of IPP installation
-# IPP_INCLUDE_DIRS - IPP include folder
-# IPP_LIBRARIES - IPP libraries that are used by OpenCV
-# IPP_VERSION_STR - string with the newest detected IPP version
-# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
+# HAVE_IPP - True if Intel IPP found
+# HAVE_IPP_ICV - True if Intel IPP ICV version is available
+# IPP_ROOT_DIR - root of IPP installation
+# IPP_INCLUDE_DIRS - IPP include folder
+# IPP_LIBRARIES - IPP libraries that are used by OpenCV
+# IPP_VERSION_STR - string with the newest detected IPP version
+# IPP_VERSION_MAJOR - numbers of IPP version (MAJOR.MINOR.BUILD)
# IPP_VERSION_MINOR
# IPP_VERSION_BUILD
#
#
unset(HAVE_IPP CACHE)
-unset(HAVE_IPP_ICV_ONLY)
+unset(HAVE_IPP_ICV)
unset(IPP_ROOT_DIR)
unset(IPP_INCLUDE_DIRS)
unset(IPP_LIBRARIES)
macro(_ipp_not_supported)
message(STATUS ${ARGN})
unset(HAVE_IPP)
- unset(HAVE_IPP_ICV_ONLY)
+ unset(HAVE_IPP_ICV)
unset(IPP_VERSION_STR)
return()
endmacro()
set(__msg)
if(EXISTS ${IPP_ROOT_DIR}/include/ippicv_redefs.h)
set(__msg " (ICV version)")
- set(HAVE_IPP_ICV_ONLY 1)
+ set(HAVE_IPP_ICV 1)
elseif(EXISTS ${IPP_ROOT_DIR}/include/ipp.h)
# nothing
else()
set(IPP_LIBRARY_DIR ${DIR})
endmacro()
- if(APPLE AND NOT HAVE_IPP_ICV_ONLY)
+ if(APPLE AND NOT HAVE_IPP_ICV)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib)
elseif(IPP_X64)
_ipp_set_library_dir(${IPP_ROOT_DIR}/lib/intel64)
macro(_ipp_add_library name)
# dynamic linking is only supported for standalone version of Intel IPP
- if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
+ if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
if (WIN32)
set(IPP_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
set(IPP_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
set(IPP_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
endif ()
if (EXISTS ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
- if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
+ if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
# When using dynamic libraries from standalone Intel IPP it is your responsibility to install those on the target system
list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
else ()
set(IPP_PREFIX "ipp")
if(${IPP_VERSION_STR} VERSION_LESS "8.0")
- if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
+ if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 7.x
else ()
set(IPP_SUFFIX "_l") # static not threaded libs suffix Intel IPP 7.x
endif ()
else ()
if(WIN32)
- if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV_ONLY)
+ if (BUILD_WITH_DYNAMIC_IPP AND NOT HAVE_IPP_ICV)
set(IPP_SUFFIX "") # dynamic not threaded libs suffix Intel IPP 8.x for Windows
else ()
set(IPP_SUFFIX "mt") # static not threaded libs suffix Intel IPP 8.x for Windows
endif()
endif()
- if(HAVE_IPP_ICV_ONLY)
+ if(HAVE_IPP_ICV)
_ipp_add_library(icv)
else()
_ipp_add_library(cv)
message(STATUS "${MESSAGE}")
endif()
endmacro()
+file(TO_CMAKE_PATH "${IPPROOT}" IPPROOT)
# This function detects Intel IPP IW version by analyzing .h file
macro(ippiw_setup PATH BUILD)
# take Intel IPP IW from ICV package
-if(NOT HAVE_IPP_ICV_ONLY AND BUILD_IPP_IW)
+if(NOT HAVE_IPP_ICV AND BUILD_IPP_IW)
message(STATUS "Cannot find Intel IPP IW. Checking \"Intel IPP for OpenCV\" package")
set(TEMP_ROOT 0)
include("${OpenCV_SOURCE_DIR}/3rdparty/ippicv/ippicv.cmake")
/* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP
-#cmakedefine HAVE_IPP_ICV_ONLY
+#cmakedefine HAVE_IPP_ICV
#cmakedefine HAVE_IPP_IW
/* Intel IPP Async */
int line = 0);
CV_EXPORTS int getIppStatus();
CV_EXPORTS String getIppErrorLocation();
-CV_EXPORTS_W bool useIPP();
-CV_EXPORTS_W void setUseIPP(bool flag);
+CV_EXPORTS_W bool useIPP();
+CV_EXPORTS_W void setUseIPP(bool flag);
+CV_EXPORTS_W String getIppVersion();
+
+// IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
+// but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
+CV_EXPORTS_W bool useIPP_NE();
+CV_EXPORTS_W void setUseIPP_NE(bool flag);
} // ipp
#define IPP_DISABLE_WARPAFFINE 1 // Different results
#define IPP_DISABLE_WARPPERSPECTIVE 1 // Different results
#define IPP_DISABLE_REMAP 1 // Different results
-#define IPP_DISABLE_MORPH_ADV 1 // mask flipping in IPP
-#define IPP_DISABLE_SORT_IDX 0 // different order in index tables
#define IPP_DISABLE_YUV_RGB 1 // accuracy difference
#define IPP_DISABLE_RGB_YUV 1 // breaks OCL accuracy tests
#define IPP_DISABLE_RGB_HSV 1 // breaks OCL accuracy tests
#define IPP_DISABLE_XYZ_RGB 1 // big accuracy difference
#define IPP_DISABLE_HAAR 1 // improper integration/results
#define IPP_DISABLE_HOUGH 1 // improper integration/results
-#define IPP_DISABLE_RESIZE_8U 1 // Incompatible accuracy
-#define IPP_DISABLE_RESIZE_NEAREST 1 // Accuracy mismatch (max diff 1)
-#define IPP_DISABLE_RESIZE_AREA 1 // Accuracy mismatch (max diff 1)
-
-#define IPP_DISABLE_MINMAX_NAN_SSE42 1 // cv::minMaxIdx problem with NaN input
// Temporary disabled named IPP region. Performance
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)
#define IPP_DISABLE_PERF_TRUE_DIST_MT 1 // cv::distanceTransform OpenCV MT performance is better
#define IPP_DISABLE_PERF_CANNY_MT 1 // cv::Canny OpenCV MT performance is better
-#define IPP_DISABLE_PERF_HISTU32F_SSE42 1 // cv::calcHist optimizations problem
-#define IPP_DISABLE_PERF_MORPH_SSE42 1 // cv::erode, cv::dilate optimizations problem
-#define IPP_DISABLE_PERF_MAG_SSE42 1 // cv::magnitude optimizations problem
-#define IPP_DISABLE_PERF_BOX16S_SSE42 1 // cv::boxFilter optimizations problem
#ifdef HAVE_IPP
#include "ippversion.h"
#define IPP_VERSION_X100 (IPP_VERSION_MAJOR * 100 + IPP_VERSION_MINOR*10 + IPP_VERSION_UPDATE)
-#ifdef HAVE_IPP_ICV_ONLY
+#ifdef HAVE_IPP_ICV
#define ICV_BASE
#if IPP_VERSION_X100 >= 201700
#include "ippicv.h"
#endif
#ifdef HAVE_IPP_IW
#include "iw++/iw.hpp"
+#include "iw/iw_ll.h"
#endif
#if IPP_VERSION_X100 >= 201700
#define setIppErrorStatus() cv::ipp::setIppStatus(-1, CV_Func, __FILE__, __LINE__)
+#define ippCPUID_AVX512_SKX (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ)
+#define ippCPUID_AVX512_KNL (ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER)
+
+namespace cv
+{
+namespace ipp
+{
+CV_EXPORTS unsigned long long getIppTopFeatures(); // Returns top major enabled IPP feature flag
+}
+}
+
static inline IppiSize ippiSize(size_t width, size_t height)
{
IppiSize size = { (int)width, (int)height };
(IppDataType)-1;
}
+static inline int ippiSuggestThreadsNum(size_t width, size_t height, size_t elemSize, double multiplier)
+{
+ int threads = cv::getNumThreads();
+ if(threads > 1 && height >= 64)
+ {
+ size_t opMemory = (int)(width*height*elemSize*multiplier);
+ int l2cache = 0;
+#if IPP_VERSION_X100 >= 201700
+ ippGetL2CacheSize(&l2cache);
+#endif
+ if(!l2cache)
+ l2cache = 1 << 18;
+
+ return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
+ }
+ return 1;
+}
+
+static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
+{
+ return ippiSuggestThreadsNum(image.cols, image.rows, image.elemSize(), multiplier);
+}
+
#ifdef HAVE_IPP_IW
+static inline bool ippiCheckAnchor(int x, int y, int kernelWidth, int kernelHeight)
+{
+ if(x != ((kernelWidth-1)/2) || y != ((kernelHeight-1)/2))
+ return 0;
+ else
+ return 1;
+}
+
+static inline ::ipp::IwiSize ippiGetSize(const cv::Size & size)
+{
+ return ::ipp::IwiSize((IwSize)size.width, (IwSize)size.height);
+}
+
static inline IwiDerivativeType ippiGetDerivType(int dx, int dy, bool nvert)
{
return (dx == 1 && dy == 0) ? ((nvert)?iwiDerivNVerFirst:iwiDerivVerFirst) :
cv::Point offset;
src.locateROI(origSize, offset);
- inMemBorder.borderLeft = (Ipp32u)offset.x;
- inMemBorder.borderTop = (Ipp32u)offset.y;
- inMemBorder.borderRight = (Ipp32u)(origSize.width - src.cols - offset.x);
- inMemBorder.borderBottom = (Ipp32u)(origSize.height - src.rows - offset.y);
+ inMemBorder.left = (IwSize)offset.x;
+ inMemBorder.top = (IwSize)offset.y;
+ inMemBorder.right = (IwSize)(origSize.width - src.cols - offset.x);
+ inMemBorder.bottom = (IwSize)(origSize.height - src.rows - offset.y);
}
dst.Init(ippiSize(src.size()), ippiGetDataType(src.depth()), src.channels(), inMemBorder, (void*)src.ptr(), src.step);
return image;
}
-static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, IppiBorderSize &borderSize)
+static inline IppiBorderType ippiGetBorder(::ipp::IwiImage &image, int ocvBorderType, ipp::IwiBorderSize &borderSize)
{
int inMemFlags = 0;
IppiBorderType border = ippiGetBorderType(ocvBorderType & ~cv::BORDER_ISOLATED);
if(!(ocvBorderType & cv::BORDER_ISOLATED))
{
- if(image.m_inMemSize.borderLeft)
+ if(image.m_inMemSize.left)
{
- if(image.m_inMemSize.borderLeft >= borderSize.borderLeft)
+ if(image.m_inMemSize.left >= borderSize.left)
inMemFlags |= ippBorderInMemLeft;
else
return (IppiBorderType)0;
}
else
- borderSize.borderLeft = 0;
- if(image.m_inMemSize.borderTop)
+ borderSize.left = 0;
+ if(image.m_inMemSize.top)
{
- if(image.m_inMemSize.borderTop >= borderSize.borderTop)
+ if(image.m_inMemSize.top >= borderSize.top)
inMemFlags |= ippBorderInMemTop;
else
return (IppiBorderType)0;
}
else
- borderSize.borderTop = 0;
- if(image.m_inMemSize.borderRight)
+ borderSize.top = 0;
+ if(image.m_inMemSize.right)
{
- if(image.m_inMemSize.borderRight >= borderSize.borderRight)
+ if(image.m_inMemSize.right >= borderSize.right)
inMemFlags |= ippBorderInMemRight;
else
return (IppiBorderType)0;
}
else
- borderSize.borderRight = 0;
- if(image.m_inMemSize.borderBottom)
+ borderSize.right = 0;
+ if(image.m_inMemSize.bottom)
{
- if(image.m_inMemSize.borderBottom >= borderSize.borderBottom)
+ if(image.m_inMemSize.bottom >= borderSize.bottom)
inMemFlags |= ippBorderInMemBottom;
else
return (IppiBorderType)0;
}
else
- borderSize.borderBottom = 0;
+ borderSize.bottom = 0;
}
else
- borderSize.borderLeft = borderSize.borderRight = borderSize.borderTop = borderSize.borderBottom = 0;
+ borderSize.left = borderSize.right = borderSize.top = borderSize.bottom = 0;
return (IppiBorderType)(border|inMemFlags);
}
-static inline ::ipp::IwValue ippiGetValue(const cv::Scalar &scalar)
+static inline ::ipp::IwValueFloat ippiGetValue(const cv::Scalar &scalar)
{
- return ::ipp::IwValue(scalar[0], scalar[1], scalar[2], scalar[3]);
+ return ::ipp::IwValueFloat(scalar[0], scalar[1], scalar[2], scalar[3]);
}
static inline int ippiSuggestThreadsNum(const ::ipp::IwiImage &image, double multiplier)
{
- int threads = cv::getNumThreads();
- if(image.m_size.height > threads)
- {
- size_t opMemory = (int)(image.m_step*image.m_size.height*multiplier);
- int l2cache = 0;
-#if IPP_VERSION_X100 >= 201700
- ippGetL2CacheSize(&l2cache);
-#endif
- if(!l2cache)
- l2cache = 1 << 18;
-
- return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
- }
- return 1;
+ return ippiSuggestThreadsNum(image.m_size.width, image.m_size.height, image.m_typeSize*image.m_channels, multiplier);
}
#endif
-static inline int ippiSuggestThreadsNum(const cv::Mat &image, double multiplier)
-{
- int threads = cv::getNumThreads();
- if(image.rows > threads)
- {
- size_t opMemory = (int)(image.total()*multiplier);
- int l2cache = 0;
-#if IPP_VERSION_X100 >= 201700
- ippGetL2CacheSize(&l2cache);
-#endif
- if(!l2cache)
- l2cache = 1 << 18;
-
- return IPP_MAX(1, (IPP_MIN((int)(opMemory/l2cache), threads)));
- }
- return 1;
-}
-
// IPP temporary buffer helper
template<typename T>
class IppAutoBuffer
}
#ifdef HAVE_IPP
-#ifdef HAVE_IPP_IW
-extern "C" {
-IW_DECL(IppStatus) llwiCopySplit(const void *pSrc, int srcStep, void* const pDstOrig[], int dstStep,
- IppiSize size, int typeSize, int channels);
-}
-#endif
namespace cv {
static bool ipp_split(const Mat& src, Mat* mv, int channels)
return false;
}
- return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels) >= 0;
+ return CV_INSTRUMENT_FUN_IPP(llwiCopySplit, src.ptr(), (int)src.step, dstPtrs, (int)dstStep, size, (int)src.elemSize1(), channels, 0) >= 0;
}
else
{
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
- if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels) < 0)
+ if(CV_INSTRUMENT_FUN_IPP(llwiCopySplit, ptrs[0], 0, (void**)&ptrs[1], 0, size, (int)src.elemSize1(), channels, 0) < 0)
return false;
}
return true;
}
#ifdef HAVE_IPP
-#ifdef HAVE_IPP_IW
-extern "C" {
-IW_DECL(IppStatus) llwiCopyMerge(const void* const pSrc[], int srcStep, void *pDst, int dstStep,
- IppiSize size, int typeSize, int channels);
-}
-#endif
namespace cv {
static bool ipp_merge(const Mat* mv, Mat& dst, int channels)
return false;
}
- return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels) >= 0;
+ return CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, srcPtrs, (int)srcStep, dst.ptr(), (int)dst.step, size, (int)mv[0].elemSize1(), channels, 0) >= 0;
}
else
{
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
- if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels) < 0)
+ if(CV_INSTRUMENT_FUN_IPP(llwiCopyMerge, (const void**)&ptrs[1], 0, ptrs[0], 0, size, (int)mv[0].elemSize1(), channels, 0) < 0)
return false;
}
return true;
}
#ifdef HAVE_IPP
-#ifdef HAVE_IPP_IW
-extern "C" {
-IW_DECL(IppStatus) llwiCopyMixed(const void *pSrc, int srcStep, int srcChannels, void *pDst, int dstStep, int dstChannels,
- IppiSize size, int typeSize, int channelsShift);
-}
-#endif
namespace cv
{
-static bool ipp_extractInsertChannel(const Mat &src, Mat &dst, int channel)
+static bool ipp_extractChannel(const Mat &src, Mat &dst, int channel)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
if(src.dims != dst.dims)
return false;
- if(srcChannels == dstChannels || (srcChannels != 1 && dstChannels != 1))
+ if(src.dims <= 2)
+ {
+ IppiSize size = ippiSize(src.size());
+
+ return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, channel, dst.ptr(), (int)dst.step, dstChannels, 0, size, (int)src.elemSize1()) >= 0;
+ }
+ else
+ {
+ const Mat *arrays[] = {&dst, NULL};
+ uchar *ptrs[2] = {NULL};
+ NAryMatIterator it(arrays, ptrs);
+
+ IppiSize size = {(int)it.size, 1};
+
+ for( size_t i = 0; i < it.nplanes; i++, ++it )
+ {
+ if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, channel, ptrs[1], 0, dstChannels, 0, size, (int)src.elemSize1()) < 0)
+ return false;
+ }
+ return true;
+ }
+#else
+ CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(channel);
+ return false;
+#endif
+}
+
+static bool ipp_insertChannel(const Mat &src, Mat &dst, int channel)
+{
+#ifdef HAVE_IPP_IW
+ CV_INSTRUMENT_REGION_IPP()
+
+ int srcChannels = src.channels();
+ int dstChannels = dst.channels();
+
+ if(src.dims != dst.dims)
return false;
if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
- return CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, src.ptr(), (int)src.step, srcChannels, dst.ptr(), (int)dst.step, dstChannels, size, (int)src.elemSize1(), channel) >= 0;
+ return CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, src.ptr(), (int)src.step, srcChannels, 0, dst.ptr(), (int)dst.step, dstChannels, channel, size, (int)src.elemSize1()) >= 0;
}
else
{
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
- if(CV_INSTRUMENT_FUN_IPP(llwiCopyMixed, ptrs[0], 0, srcChannels, ptrs[1], 0, dstChannels, size, (int)src.elemSize1(), channel) < 0)
+ if(CV_INSTRUMENT_FUN_IPP(llwiCopyChannel, ptrs[0], 0, srcChannels, 0, ptrs[1], 0, dstChannels, channel, size, (int)src.elemSize1()) < 0)
return false;
}
return true;
_dst.create(src.dims, &src.size[0], depth);
Mat dst = _dst.getMat();
- CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
+ CV_IPP_RUN_FAST(ipp_extractChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1);
}
Mat src = _src.getMat(), dst = _dst.getMat();
- CV_IPP_RUN_FAST(ipp_extractInsertChannel(src, dst, coi))
+ CV_IPP_RUN_FAST(ipp_insertChannel(src, dst, coi))
mixChannels(&src, 1, &dst, 1, ch, 1);
}
iwSrc.Init(ippiSize(sz), srcDepth, 1, NULL, (void*)src.ptr(), src.step);
iwDst.Init(ippiSize(sz), dstDepth, 1, NULL, (void*)dst.ptr(), dst.step);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
else
{
iwSrc.m_ptr = ptrs[0];
iwDst.m_ptr = ptrs[1];
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwDst, alpha, beta, mode);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwDst, alpha, beta, ::ipp::IwiScaleParams(mode));
}
}
}
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
-#ifdef HAVE_IPP_IW
-extern "C" {
-IW_DECL(IppStatus) llwiCopyMask(const void *pSrc, int srcStep, void *pDst, int dstStep,
- IppiSize size, int typeSize, int channels, const Ipp8u *pMask, int maskStep);
-IW_DECL(IppStatus) llwiSet(const double *pValue, void *pDst, int dstStep,
- IppiSize size, IppDataType dataType, int channels);
-IW_DECL(IppStatus) llwiSetMask(const double *pValue, void *pDst, int dstStep,
- IppiSize size, IppDataType dataType, int channels, const Ipp8u *pMask, int maskStep);
-IW_DECL(IppStatus) llwiCopyMakeBorder(const void *pSrc, IppSizeL srcStep, void *pDst, IppSizeL dstStep,
- IppiSizeL size, IppDataType dataType, int channels, IppiBorderSize *pBorderSize, IppiBorderType border, const Ipp64f *pBorderVal);
-}
-#endif
namespace cv
{
if(dst.dims <= 2)
{
- IppiSize size = ippiSize(dst.size());
- IppDataType dataType = ippiGetDataType(dst.depth());
- ::ipp::IwValue s;
+ IppiSize size = ippiSize(dst.size());
+ IppDataType dataType = ippiGetDataType(dst.depth());
+ ::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
return CV_INSTRUMENT_FUN_IPP(llwiSetMask, s, dst.ptr(), (int)dst.step, size, dataType, dst.channels(), mask.ptr(), (int)mask.step) >= 0;
uchar *ptrs[2] = {NULL};
NAryMatIterator it(arrays, ptrs);
- IppiSize size = {(int)it.size, 1};
- IppDataType dataType = ippiGetDataType(dst.depth());
- ::ipp::IwValue s;
+ IppiSize size = {(int)it.size, 1};
+ IppDataType dataType = ippiGetDataType(dst.depth());
+ ::ipp::IwValueFloat s;
convertAndUnrollScalar(_val, CV_MAKETYPE(CV_64F, dst.channels()), (uchar*)((Ipp64f*)s), 1);
for( size_t i = 0; i < it.nplanes; i++, ++it)
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, &iwSrc, &iwDst, ippMode);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
}
catch(::ipp::IwException)
{
if(_src.dims > 2)
return false;
- Rect dstRect(borderSize.borderLeft, borderSize.borderTop,
- _dst.cols - borderSize.borderRight - borderSize.borderLeft,
- _dst.rows - borderSize.borderBottom - borderSize.borderTop);
+ Rect dstRect(borderSize.left, borderSize.top,
+ _dst.cols - borderSize.right - borderSize.left,
+ _dst.rows - borderSize.bottom - borderSize.top);
Mat subDst = Mat(_dst, dstRect);
Mat *pSrc = &_src;
- return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), &borderSize, borderType, &value[0]) >= 0;
+ return CV_INSTRUMENT_FUN_IPP(llwiCopyMakeBorder, pSrc->ptr(), pSrc->step, subDst.ptr(), subDst.step, size, dataType, _src.channels(), borderSize, borderType, &value[0]) >= 0;
#else
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(top); CV_UNUSED(bottom); CV_UNUSED(left); CV_UNUSED(right);
CV_UNUSED(_borderType); CV_UNUSED(value);
CV_INSTRUMENT_REGION()
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
- CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
+ // SSE42 performance issues
+ CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL);
CV_INSTRUMENT_REGION()
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
- CV_IPP_RUN(!IPP_DISABLE_PERF_MAG_SSE42 || (ipp::getIppFeatures()&ippCPUID_AVX), CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
+ // SSE42 performance issues
+ CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
CV_CPU_DISPATCH_MODES_ALL);
CV_INSTRUMENT_REGION()
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
- CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
CV_INSTRUMENT_REGION()
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
- CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
{
double r = 0;
#if ARITHM_USE_IPP
- CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
+ CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippiDotProd_8u64f_C1R, src1, len*sizeof(uchar), src2, len*sizeof(uchar), ippiSize(len, 1), &r) >= 0, r);
#endif
int i = 0;
}
#ifdef HAVE_IPP
-#if !IPP_DISABLE_SORT_IDX
typedef IppStatus (CV_STDCALL *IppSortIndexFunc)(const void* pSrc, Ipp32s srcStrideBytes, Ipp32s *pDstIndx, int len, Ipp8u *pBuffer);
static IppSortIndexFunc getSortIndexFunc(int depth, bool sortDescending)
return true;
}
#endif
-#endif
typedef void (*SortFunc)(const Mat& src, Mat& dst, int flags);
}
_dst.release();
_dst.create( src.size(), CV_32S );
dst = _dst.getMat();
-#if !IPP_DISABLE_SORT_IDX
+
CV_IPP_RUN_FAST(ipp_sortIdx(src, dst, flags));
-#endif
static SortFunc tab[] =
{
//#ifdef HAVE_OPENCL
device(0), useOpenCL(-1),
//#endif
- useIPP(-1)
+ useIPP(-1),
+ useIPP_NE(-1)
#ifdef HAVE_TEGRA_OPTIMIZATION
,useTegra(-1)
#endif
ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
//#endif
- int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
+ int useIPP; // 1 - use, 0 - do not use, -1 - auto/not initialized
+ int useIPP_NE; // 1 - use, 0 - do not use, -1 - auto/not initialized
#ifdef HAVE_TEGRA_OPTIMIZATION
int useTegra; // 1 - use, 0 - do not use, -1 - auto/not initialized
#endif
{
CV_INSTRUMENT_REGION_IPP()
+#if IPP_VERSION_X100 < 201801
+ // Poor performance of SSE42
+ if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
+ return false;
+#endif
+
Ipp32s count = 0;
int depth = src.depth();
#if IPP_VERSION_X100 >= 700
CV_INSTRUMENT_REGION_IPP()
-#if IPP_DISABLE_MINMAX_NAN_SSE42
+#if IPP_VERSION_X100 < 201800
+ // cv::minMaxIdx problem with NaN input
// Disable 32F processing only
- if(src.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
+ if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
+#if IPP_VERSION_X100 < 201801
// cv::minMaxIdx problem with index positions on AVX
-#if IPP_VERSION_X100 < 201810
- if(!mask.empty() && _maxIdx && ipp::getIppFeatures()&ippCPUID_AVX)
+ if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42)
return false;
#endif
IppiPoint minIdx = {-1, -1};
IppiPoint maxIdx = {-1, -1};
- float *pMinVal = (_minVal)?&minVal:NULL;
- float *pMaxVal = (_maxVal)?&maxVal:NULL;
+ float *pMinVal = (_minVal || _minIdx)?&minVal:NULL;
+ float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL;
IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL;
IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
ippMinMaxFun = ipp_minIdx_wrap;
else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
ippMinMaxFun = ipp_minMax_wrap;
+ else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx)
+ return false;
else
ippMinMaxFun = ipp_minMaxIndex_wrap;
}
*_maxVal = maxVal;
if(_minIdx)
{
+#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y)
+#else
+ if(status == ippStsNoOperation)
+#endif
{
_minIdx[0] = -1;
_minIdx[1] = -1;
}
if(_maxIdx)
{
+#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y)
+#else
+ if(status == ippStsNoOperation)
+#endif
{
_maxIdx[0] = -1;
_maxIdx[1] = -1;
public:
IPPInitSingleton()
{
- useIPP = true;
- ippStatus = 0;
- funcname = NULL;
- filename = NULL;
- linen = 0;
- ippFeatures = 0;
+ useIPP = true;
+ useIPP_NE = false;
+ ippStatus = 0;
+ funcname = NULL;
+ filename = NULL;
+ linen = 0;
+ cpuFeatures = 0;
+ ippFeatures = 0;
+ ippTopFeatures = 0;
+ pIppLibInfo = NULL;
+
+ ippStatus = ippGetCpuFeatures(&cpuFeatures, NULL);
+ if(ippStatus < 0)
+ {
+ std::cerr << "ERROR: IPP cannot detect CPU features, IPP was disabled " << std::endl;
+ useIPP = false;
+ return;
+ }
+ ippFeatures = cpuFeatures;
+ bool unsupported = false;
const char* pIppEnv = getenv("OPENCV_IPP");
cv::String env = pIppEnv;
if(env.size())
{
+ env = env.toLowerCase();
+ if(env.substr(0, 2) == "ne")
+ {
+ useIPP_NE = true;
+ env = env.substr(3, env.size());
+ }
+
if(env == "disabled")
{
std::cerr << "WARNING: IPP was disabled by OPENCV_IPP environment variable" << std::endl;
useIPP = false;
}
-#if IPP_VERSION_X100 >= 900
- else if(env == "sse")
- ippFeatures = ippCPUID_SSE;
- else if(env == "sse2")
- ippFeatures = ippCPUID_SSE2;
- else if(env == "sse3")
- ippFeatures = ippCPUID_SSE3;
- else if(env == "ssse3")
- ippFeatures = ippCPUID_SSSE3;
- else if(env == "sse41")
- ippFeatures = ippCPUID_SSE41;
else if(env == "sse42")
- ippFeatures = ippCPUID_SSE42;
- else if(env == "avx")
- ippFeatures = ippCPUID_AVX;
+ {
+ if(!(cpuFeatures&ippCPUID_SSE42))
+ unsupported = true;
+ ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42;
+ ippFeatures |= (cpuFeatures&ippCPUID_AES);
+ ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
+ ippFeatures |= (cpuFeatures&ippCPUID_SHA);
+ }
else if(env == "avx2")
- ippFeatures = ippCPUID_AVX2;
+ {
+ if(!(cpuFeatures&ippCPUID_AVX2))
+ unsupported = true;
+ ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2;
+ ippFeatures |= (cpuFeatures&ippCPUID_AES);
+ ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
+ ippFeatures |= (cpuFeatures&ippCPUID_F16C);
+ ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
+ ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
+ ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
+ ippFeatures |= (cpuFeatures&ippCPUID_MPX);
+ }
+#if defined (_M_AMD64) || defined (__x86_64__)
+ else if(env == "avx512")
+ {
+ if(!(cpuFeatures&ippCPUID_AVX512F))
+ unsupported = true;
+
+ ippFeatures = ippCPUID_MMX|ippCPUID_SSE|ippCPUID_SSE2|ippCPUID_SSE3|ippCPUID_SSSE3|ippCPUID_SSE41|ippCPUID_SSE42|ippCPUID_AVX|ippCPUID_AVX2|ippCPUID_AVX512F;
+ ippFeatures |= (cpuFeatures&ippCPUID_AES);
+ ippFeatures |= (cpuFeatures&ippCPUID_CLMUL);
+ ippFeatures |= (cpuFeatures&ippCPUID_F16C);
+ ippFeatures |= (cpuFeatures&ippCPUID_ADCOX);
+ ippFeatures |= (cpuFeatures&ippCPUID_RDSEED);
+ ippFeatures |= (cpuFeatures&ippCPUID_PREFETCHW);
+ ippFeatures |= (cpuFeatures&ippCPUID_MPX);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512CD);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512VL);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512BW);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512DQ);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512ER);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512PF);
+ ippFeatures |= (cpuFeatures&ippCPUID_AVX512VBMI);
+ }
#endif
else
- std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << std::endl;
+ std::cerr << "ERROR: Improper value of OPENCV_IPP: " << env.c_str() << ". Correct values are: disabled, sse42, avx2, avx512 (Intel64 only)" << std::endl;
+ }
+
+ if(unsupported)
+ {
+ std::cerr << "WARNING: selected IPP features are not supported by CPU. IPP was initialized with default features" << std::endl;
+ ippFeatures = cpuFeatures;
+ }
+
+ // Disable AVX1 since we don't track regressions for it. SSE42 will be used instead
+ if(cpuFeatures&ippCPUID_AVX && !(cpuFeatures&ippCPUID_AVX2))
+ ippFeatures &= ~ippCPUID_AVX;
+
+ // IPP integrations in OpenCV support only SSE4.2, AVX2 and AVX-512 optimizations.
+ if(!(
+ cpuFeatures&ippCPUID_AVX512F ||
+ cpuFeatures&ippCPUID_AVX2 ||
+ cpuFeatures&ippCPUID_SSE42
+ ))
+ {
+ useIPP = false;
+ return;
}
IPP_INITIALIZER(ippFeatures)
ippFeatures = ippGetEnabledCpuFeatures();
+
+ // Detect top level optimizations to make comparison easier for optimizations dependent conditions
+ if(ippFeatures&ippCPUID_AVX512F)
+ {
+ if((ippFeatures&ippCPUID_AVX512_SKX) == ippCPUID_AVX512_SKX)
+ ippTopFeatures = ippCPUID_AVX512_SKX;
+ else if((ippFeatures&ippCPUID_AVX512_KNL) == ippCPUID_AVX512_KNL)
+ ippTopFeatures = ippCPUID_AVX512_KNL;
+ else
+ ippTopFeatures = ippCPUID_AVX512F; // Unknown AVX512 configuration
+ }
+ else if(ippFeatures&ippCPUID_AVX2)
+ ippTopFeatures = ippCPUID_AVX2;
+ else if(ippFeatures&ippCPUID_SSE42)
+ ippTopFeatures = ippCPUID_SSE42;
+
+ pIppLibInfo = ippiGetLibVersion();
}
- bool useIPP;
+public:
+ bool useIPP;
+ bool useIPP_NE;
- int ippStatus; // 0 - all is ok, -1 - IPP functions failed
+ int ippStatus; // 0 - all is ok, -1 - IPP functions failed
const char *funcname;
const char *filename;
int linen;
Ipp64u ippFeatures;
+ Ipp64u cpuFeatures;
+ Ipp64u ippTopFeatures;
+ const IppLibraryVersion *pIppLibInfo;
};
static IPPInitSingleton& getIPPSingleton()
#endif
}
+unsigned long long getIppTopFeatures();
+
+unsigned long long getIppTopFeatures()
+{
+#ifdef HAVE_IPP
+ return getIPPSingleton().ippTopFeatures;
+#else
+ return 0;
+#endif
+}
+
void setIppStatus(int status, const char * const _funcname, const char * const _filename, int _line)
{
#ifdef HAVE_IPP
#endif
}
+String getIppVersion()
+{
+#ifdef HAVE_IPP
+ const IppLibraryVersion *pInfo = getIPPSingleton().pIppLibInfo;
+ if(pInfo)
+ return format("%s %s %s", pInfo->Name, pInfo->Version, pInfo->BuildDate);
+ else
+ return String("error");
+#else
+ return String("disabled");
+#endif
+}
+
bool useIPP()
{
#ifdef HAVE_IPP
#endif
}
+bool useIPP_NE()
+{
+#ifdef HAVE_IPP
+ CoreTLSData* data = getCoreTlsData().get();
+ if(data->useIPP_NE < 0)
+ {
+ data->useIPP_NE = getIPPSingleton().useIPP_NE;
+ }
+ return (data->useIPP_NE > 0);
+#else
+ return false;
+#endif
+}
+
+void setUseIPP_NE(bool flag)
+{
+ CoreTLSData* data = getCoreTlsData().get();
+#ifdef HAVE_IPP
+ data->useIPP_NE = (getIPPSingleton().useIPP_NE)?flag:false;
+#else
+ (void)flag;
+ data->useIPP_NE = false;
+#endif
+}
+
} // namespace ipp
} // namespace cv
ippiGetImage(dy_, iwSrcDy);
ippiGetImage(dst, iwDst);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, &iwSrcDx, &iwSrcDy, &iwDst, norm, low, high);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCannyDeriv, iwSrcDx, iwSrcDy, iwDst, low, high, ::ipp::IwiFilterCannyDerivParams(norm));
}
catch (::ipp::IwException ex)
{
ippiGetImage(src, iwSrc);
ippiGetImage(dst, iwDst);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, &iwSrc, &iwDst, ippFilterSobel, kernel, norm, low, high, ippBorderRepl);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterCanny, iwSrc, iwDst, low, high, ::ipp::IwiFilterCannyParams(ippFilterSobel, kernel, norm), ippBorderRepl);
}
catch (::ipp::IwException)
{
0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0
};
-static ippiGeneralFunc ippiCopyP3C3RTab[] =
+
+static IppStatus ippiGrayToRGB_C1C3R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize)
{
- (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0,
- 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0
-};
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
+}
+static IppStatus ippiGrayToRGB_C1C3R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize)
+{
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
+}
+static IppStatus ippiGrayToRGB_C1C3R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize)
+{
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C3R, pSrc, srcStep, pDst, dstStep, roiSize);
+}
+
+static IppStatus ippiGrayToRGB_C1C4R(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, IppiSize roiSize, Ipp8u aval)
+{
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_8u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
+}
+static IppStatus ippiGrayToRGB_C1C4R(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, IppiSize roiSize, Ipp16u aval)
+{
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_16u_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
+}
+static IppStatus ippiGrayToRGB_C1C4R(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, IppiSize roiSize, Ipp32f aval)
+{
+ return CV_INSTRUMENT_FUN_IPP(ippiGrayToRGB_32f_C1C4R, pSrc, srcStep, pDst, dstStep, roiSize, aval);
+}
#if !IPP_DISABLE_RGB_XYZ
static ippiGeneralFunc ippiRGB2XYZTab[] =
Ipp32f coeffs[3];
};
+template <typename T>
struct IPPGray2BGRFunctor
{
- IPPGray2BGRFunctor(ippiGeneralFunc _func) :
- ippiGrayToBGR(_func)
- {
- }
+ IPPGray2BGRFunctor(){}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
- if (ippiGrayToBGR == 0)
- return false;
-
- const void* srcarray[3] = { src, src, src };
- return CV_INSTRUMENT_FUN_IPP(ippiGrayToBGR, srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0;
+ return ippiGrayToRGB_C1C3R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows)) >= 0;
}
-private:
- ippiGeneralFunc ippiGrayToBGR;
};
+template <typename T>
struct IPPGray2BGRAFunctor
{
- IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) :
- ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
+ IPPGray2BGRAFunctor()
{
+ alpha = ColorChannel<T>::max();
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
- if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0)
- return false;
-
- const void* srcarray[3] = { src, src, src };
- Mat temp(rows, cols, CV_MAKETYPE(depth, 3));
- if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
- return false;
- int order[4] = {0, 1, 2, 3};
- return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
+ return ippiGrayToRGB_C1C4R((T*)src, srcStep, (T*)dst, dstStep, ippiSize(cols, rows), alpha) >= 0;
}
-private:
- ippiGeneralFunc ippiColorConvertGeneral;
- ippiReorderFunc ippiColorConvertReorder;
- int depth;
+
+ T alpha;
};
struct IPPReorderGeneralFunctor
#if defined(HAVE_IPP) && IPP_VERSION_X100 >= 700
CV_IPP_CHECK()
{
+ bool ippres = false;
if(dcn == 3)
{
- if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
- IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) )
- return;
+ if( depth == CV_8U )
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp8u>());
+ else if( depth == CV_16U )
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp16u>());
+ else
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRFunctor<Ipp32f>());
}
else if(dcn == 4)
{
- if( CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height,
- IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) )
- return;
+ if( depth == CV_8U )
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp8u>());
+ else if( depth == CV_16U )
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp16u>());
+ else
+ ippres = CvtColorIPPLoop(src_data, src_step, dst_data, dst_step, width, height, IPPGray2BGRAFunctor<Ipp32f>());
}
+ if(ippres)
+ return;
}
#endif
::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
- if(!ippBorder.m_borderType)
+ if(!ippBorder)
return false;
if(srcType == ipp8u && dstType == ipp8u)
{
iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0, ippAlgHintFast);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0, ::ipp::IwiScaleParams(ippAlgHintFast));
iwSrcProc += borderSize;
}
if(useScharr)
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterScharr, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
else
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, &iwSrcProc, &iwDstProc, derivType, maskSize, ippBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterSobel, iwSrcProc, iwDstProc, derivType, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale)
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta, ippAlgHintFast);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta, ::ipp::IwiScaleParams(ippAlgHintFast));
}
catch (::ipp::IwException)
{
::ipp::IwiImage iwDstProc = iwDst;
::ipp::IwiBorderSize borderSize(maskSize);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
- if(!ippBorder.m_borderType)
+ if(!ippBorder)
return false;
if(srcType == ipp8u && dstType == ipp8u)
{
iwSrc -= borderSize;
iwSrcProc.Alloc(iwSrc.m_size, ipp32f, channels);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwSrc, &iwSrcProc, 1, 0);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwSrc, iwSrcProc, 1, 0);
iwSrcProc += borderSize;
}
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, &iwSrcProc, &iwDstProc, maskSize, ippBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterLaplacian, iwSrcProc, iwDstProc, maskSize, ::ipp::IwDefault(), ippBorder);
if(useScale)
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, &iwDstProc, &iwDst, scale, delta);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiScale, iwDstProc, iwDst, scale, delta);
}
catch (::ipp::IwException ex)
}
#ifdef HAVE_IPP
-typedef IppStatus(CV_STDCALL* IppiFilterBorder)(
- const void* pSrc, int srcStep, void* pDst, int dstStep,
- IppiSize dstRoiSize, IppiBorderType border, const void* borderValue,
- const IppiFilterBorderSpec* pSpec, Ipp8u* pBuffer);
-
-static IppiFilterBorder getIppFunc(int stype)
+static bool ippFilter2D(int stype, int dtype, int kernel_type,
+ uchar * src_data, size_t src_step,
+ uchar * dst_data, size_t dst_step,
+ int width, int height,
+ int full_width, int full_height,
+ int offset_x, int offset_y,
+ uchar * kernel_data, size_t kernel_step,
+ int kernel_width, int kernel_height,
+ int anchor_x, int anchor_y,
+ double delta, int borderType,
+ bool isSubmatrix)
{
- switch (stype)
- {
- case CV_8UC1:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C1R);
- case CV_8UC3:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C3R);
- case CV_8UC4:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_8u_C4R);
- case CV_16UC1:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C1R);
- case CV_16UC3:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C3R);
- case CV_16UC4:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16u_C4R);
- case CV_16SC1:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C1R);
- case CV_16SC3:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C3R);
- case CV_16SC4:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_16s_C4R);
- case CV_32FC1:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C1R);
- case CV_32FC3:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C3R);
- case CV_32FC4:
- return reinterpret_cast<IppiFilterBorder>(ippiFilterBorder_32f_C4R);
- default:
- return 0;
- }
-}
+#ifdef HAVE_IPP_IW
+ CV_INSTRUMENT_REGION_IPP();
-template <int kdepth>
-struct IppFilterTrait { };
+ ::ipp::IwiSize iwSize(width, height);
+ ::ipp::IwiSize kernelSize(kernel_width, kernel_height);
+ IppDataType type = ippiGetDataType(CV_MAT_DEPTH(stype));
+ int channels = CV_MAT_CN(stype);
-template <>
-struct IppFilterTrait<CV_16S>
-{
- enum { kernel_type_id = CV_16SC1 };
- typedef Ipp16s kernel_type;
- typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
- inline static copy_fun_type get_copy_fun() { return ippiCopy_16s_C1R; }
- inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
- {
- return ippiFilterBorderInit_16s(pKernel, kernelSize, divisor, dataType, numChannels, roundMode, pSpec);
- }
-};
+ CV_UNUSED(isSubmatrix);
-template <>
-struct IppFilterTrait<CV_32F>
-{
- enum { kernel_type_id = CV_32FC1 };
- typedef Ipp32f kernel_type;
- typedef IppStatus(CV_STDCALL* copy_fun_type)(const kernel_type* pSrc, int srcStep, kernel_type* pDst, int dstStep, IppiSize roiSize);
- inline static copy_fun_type get_copy_fun() { return ippiCopy_32f_C1R; }
- inline static IppStatus runInit(const kernel_type* pKernel, IppiSize kernelSize, int divisor, IppDataType dataType, int numChannels, IppRoundMode roundMode, IppiFilterBorderSpec* pSpec)
- {
- CV_UNUSED(divisor);
- return ippiFilterBorderInit_32f(pKernel, kernelSize, dataType, numChannels, roundMode, pSpec);
- }
-};
+#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel
+ if(kernel_width == 1 && kernel_height == 1)
+ return false;
+#endif
-template <int kdepth>
-static bool ippFilter2D(int stype, int dtype,
- uchar * src_data, size_t src_step,
- uchar * dst_data, size_t dst_step,
- int width, int height,
- uchar * kernel_data, size_t kernel_step,
- int kernel_width, int kernel_height,
- int anchor_x, int anchor_y,
- double delta, int borderType, bool isSubmatrix)
-{
- CV_INSTRUMENT_REGION_IPP();
+#if IPP_VERSION_X100 < 201801
+ // Too big difference compared to OpenCV FFT-based convolution
+ if(kernel_type == CV_32FC1 && (type == ipp16s || type == ipp16u) && (kernel_width > 7 || kernel_height > 7))
+ return false;
- typedef IppFilterTrait<kdepth> trait;
- typedef typename trait::kernel_type kernel_type;
+ // Poor optimization for big kernels
+ if(kernel_width > 7 || kernel_height > 7)
+ return false;
+#endif
- IppAutoBuffer<IppiFilterBorderSpec> spec;
- IppAutoBuffer<Ipp8u> buffer;
- IppAutoBuffer<kernel_type> kernelBuffer;
- IppiBorderType ippBorderType;
- int src_type;
+ if(src_data == dst_data)
+ return false;
- Point anchor(anchor_x, anchor_y);
-#if IPP_VERSION_X100 >= 900
- Point ippAnchor((kernel_width - 1) / 2, (kernel_height - 1) / 2);
-#else
- Point ippAnchor(kernel_width >> 1, kernel_height >> 1);
-#endif
- bool isIsolated = (borderType & BORDER_ISOLATED) != 0;
- int borderTypeNI = borderType & ~BORDER_ISOLATED;
- ippBorderType = ippiGetBorderType(borderTypeNI);
- int ddepth = CV_MAT_DEPTH(dtype);
- int sdepth = CV_MAT_DEPTH(stype);
+ if(stype != dtype)
+ return false;
-#if IPP_VERSION_X100 >= 201700 && IPP_VERSION_X100 <= 201702 // IPP bug with 1x1 kernel
- if(kernel_width == 1 && kernel_height == 1)
+ if(kernel_type != CV_16SC1 && kernel_type != CV_32FC1)
return false;
-#endif
- bool runIpp = true
- && (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE)
- && (sdepth == ddepth)
- && (getIppFunc(stype))
- && ((int)ippBorderType > 0)
- && (!isSubmatrix || isIsolated)
- && (std::fabs(delta - 0) < DBL_EPSILON)
- && (ippAnchor == anchor)
- && src_data != dst_data;
-
- if (!runIpp)
+ // TODO: Implement offset for 8u, 16u
+ if(std::fabs(delta) >= DBL_EPSILON)
return false;
- src_type = stype;
- int cn = CV_MAT_CN(stype);
- IppiSize kernelSize = { kernel_width, kernel_height };
- IppDataType dataType = ippiGetDataType(ddepth);
- IppDataType kernelType = ippiGetDataType(kdepth);
- Ipp32s specSize = 0;
- Ipp32s bufsize = 0;
- IppiSize dstRoiSize = { width, height };
- IppStatus status;
-
- status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize);
- if (status < 0)
+ if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
return false;
- kernel_type* pKerBuffer = (kernel_type*)kernel_data;
- size_t good_kernel_step = sizeof(kernel_type) * static_cast<size_t>(kernelSize.width);
-#if IPP_VERSION_X100 >= 900
- if (kernel_step != good_kernel_step) {
- kernelBuffer.allocate((int)good_kernel_step * kernelSize.height);
- status = trait::get_copy_fun()((kernel_type*)kernel_data, (int)kernel_step, kernelBuffer, (int)good_kernel_step, kernelSize);
- if (status < 0)
+ try
+ {
+ ::ipp::IwiBorderSize iwBorderSize;
+ ::ipp::IwiBorderType iwBorderType;
+ ::ipp::IwiImage iwKernel(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, (void*)kernel_data, kernel_step);
+ ::ipp::IwiImage iwSrc(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)src_data, src_step);
+ ::ipp::IwiImage iwDst(iwSize, type, channels, ::ipp::IwiBorderSize(offset_x, offset_y, full_width-offset_x-width, full_height-offset_y-height), (void*)dst_data, dst_step);
+
+ iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
+ iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
+ if(!iwBorderType)
return false;
- pKerBuffer = kernelBuffer;
+
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilter, iwSrc, iwDst, iwKernel, ::ipp::IwiFilterParams(1, 0, ippAlgHintNone, ippRndFinancial), iwBorderType);
}
-#else
- kernelBuffer.Alloc(good_kernel_step * kernelSize.height);
- Mat kerFlip(Size(kernelSize.width, kernelSize.height), trait::kernel_type_id, kernelBuffer, (int)good_kernel_step);
- Mat kernel(Size(kernel_width, kernel_height), trait::kernel_type_id, kernel_data, kernel_step);
- flip(kernel, kerFlip, -1);
- pKerBuffer = kernelBuffer;
-#endif
- spec.allocate(specSize);
- buffer.allocate(bufsize);
- status = trait::runInit(pKerBuffer, kernelSize, 0, dataType, cn, ippRndFinancial, spec);
- if (status < 0) {
+ catch(::ipp::IwException ex)
+ {
return false;
}
- IppiFilterBorder ippiFilterBorder = getIppFunc(src_type);
- kernel_type borderValue[4] = { 0, 0, 0, 0 };
- status = CV_INSTRUMENT_FUN_IPP(ippiFilterBorder, src_data, (int)src_step, dst_data, (int)dst_step, dstRoiSize, ippBorderType, borderValue, spec, buffer);
- if (status >= 0) {
- CV_IMPL_ADD(CV_IMPL_IPP);
- return true;
- }
+
+ return true;
+#else
+ CV_UNUSED(stype); CV_UNUSED(dtype); CV_UNUSED(kernel_type); CV_UNUSED(src_data); CV_UNUSED(src_step);
+ CV_UNUSED(dst_data); CV_UNUSED(dst_step); CV_UNUSED(width); CV_UNUSED(height); CV_UNUSED(full_width);
+ CV_UNUSED(full_height); CV_UNUSED(offset_x); CV_UNUSED(offset_y); CV_UNUSED(kernel_data); CV_UNUSED(kernel_step);
+ CV_UNUSED(kernel_width); CV_UNUSED(kernel_height); CV_UNUSED(anchor_x); CV_UNUSED(anchor_y); CV_UNUSED(delta);
+ CV_UNUSED(borderType); CV_UNUSED(isSubmatrix);
return false;
+#endif
}
#endif
delta, borderType, isSubmatrix);
if (res)
return;
-#ifdef HAVE_IPP
- CV_IPP_CHECK()
- {
- res = false;
- if (kernel_type == CV_32FC1) {
- res = ippFilter2D<CV_32F>(stype, dtype,
- src_data, src_step,
- dst_data, dst_step,
- width, height,
- kernel_data, kernel_step,
- kernel_width, kernel_height,
- anchor_x, anchor_y,
- delta, borderType, isSubmatrix);
- }
- else if (kernel_type == CV_16SC1) {
- res = ippFilter2D<CV_16S>(stype, dtype,
- src_data, src_step,
- dst_data, dst_step,
- width, height,
- kernel_data, kernel_step,
- kernel_width, kernel_height,
- anchor_x, anchor_y,
- delta, borderType, isSubmatrix);
- }
- if (res)
- return;
- }
-#endif
+
+ CV_IPP_RUN_FAST(ippFilter2D(stype, dtype, kernel_type,
+ src_data, src_step,
+ dst_data, dst_step,
+ width, height,
+ full_width, full_height,
+ offset_x, offset_y,
+ kernel_data, kernel_step,
+ kernel_width, kernel_height,
+ anchor_x, anchor_y,
+ delta, borderType, isSubmatrix))
+
res = dftFilter2D(stype, dtype, kernel_type,
src_data, src_step,
dst_data, dst_step,
Point anchor=Point(0,0), double delta=0,
int borderType=BORDER_REFLECT_101 );
+
}
+#ifdef HAVE_IPP_IW
+static inline bool ippiCheckAnchor(cv::Point anchor, cv::Size ksize)
+{
+ anchor = cv::normalizeAnchor(anchor, ksize);
+ if(anchor.x != ((ksize.width-1)/2) || anchor.y != ((ksize.height-1)/2))
+ return 0;
+ else
+ return 1;
+}
+#endif
+
#endif
{
IppiHistogram_C1 ippFunction =
(type == CV_8UC1) ? (IppiHistogram_C1)ippiHistogram_8u_C1R :
-#if IPP_VERSION_X100 >= 201700 || !(defined HAVE_IPP_ICV_ONLY)
(type == CV_16UC1) ? (IppiHistogram_C1)ippiHistogram_16u_C1R :
(type == CV_32FC1) ? (IppiHistogram_C1)ippiHistogram_32f_C1R :
-#endif
NULL;
return ippFunction;
{
CV_INSTRUMENT_REGION_IPP()
+#if IPP_VERSION_X100 < 201801
// No SSE42 optimization for uniform 32f
-#if IPP_DISABLE_PERF_HISTU32F_SSE42
- if(uniform && image.depth() == CV_32F && !(ipp::getIppFeatures()&ippCPUID_AVX))
+ if(uniform && image.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
try
{
- ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
- CV_INSTRUMENT_FUN_IPP(iwiResize, &m_src, &m_dst, &roi);
+ ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
+ CV_INSTRUMENT_FUN_IPP(iwiResize, m_src, m_dst, ippBorderRepl, tile);
}
catch(::ipp::IwException)
{
{0, scaleY, shift+0.5*scaleY}
};
- iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, ippWarpForward, inter, ::ipp::IwiWarpAffineParams(0, 0.75, 0), ippBorderRepl);
+ iwiWarpAffine.InitAlloc(m_src.m_size, m_dst.m_size, m_src.m_dataType, m_src.m_channels, coeffs, iwTransForward, inter, ::ipp::IwiWarpAffineParams(0, 0, 0.75), ippBorderRepl);
m_ok = true;
}
try
{
- ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
- CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, &m_src, &m_dst, &roi);
+ ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
+ CV_INSTRUMENT_FUN_IPP(iwiWarpAffine, m_src, m_dst, tile);
}
catch(::ipp::IwException)
{
if(ippInter < 0)
return false;
-#if IPP_DISABLE_RESIZE_NEAREST
- if(ippInter == ippNearest)
- return false;
-#endif
-
-#if IPP_DISABLE_RESIZE_AREA
- if(ippInter == ippSuper)
- return false;
-#endif
+ // Resize which doesn't match OpenCV exactly
+ if(!cv::ipp::useIPP_NE())
+ {
+ if(ippInter == ippNearest || ippInter == ippSuper || (ippDataType == ipp8u && ippInter == ippLinear))
+ return false;
+ }
if(ippInter != ippLinear && ippDataType == ipp64f)
return false;
- // Accuracy mismatch is 1 but affects detectors greatly
-#if IPP_DISABLE_RESIZE_8U
- if(ippDataType == ipp8u && ippInter == ippLinear)
- return false;
+#if IPP_VERSION_X100 < 201801
+ // Degradations on int^2 linear downscale
+ if(ippDataType != ipp64f && ippInter == ippLinear && inv_scale_x < 1 && inv_scale_y < 1) // if downscale
+ {
+ int scale_x = (int)(1/inv_scale_x);
+ int scale_y = (int)(1/inv_scale_y);
+ if(1/inv_scale_x - scale_x < DBL_EPSILON && 1/inv_scale_y - scale_y < DBL_EPSILON) // if integer
+ {
+ if(!(scale_x&(scale_x-1)) && !(scale_y&(scale_y-1))) // if power of 2
+ return false;
+ }
+ }
#endif
bool affine = false;
#if IPP_VERSION_X100 >= 900
CV_INSTRUMENT_REGION_IPP()
+#if IPP_VERSION_X100 < 201801
+ // Degradations for CV_8UC1
+ if(src.type() == CV_8UC1)
+ return false;
+#endif
+
IppiSize roi = { src.cols, src.rows };
IppiPoint point = { 0, 0 };
int type = src.type();
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
- // Problem with SSE42 optimizations
-#if IPP_DISABLE_PERF_MORPH_SSE42
- if(!(ipp::getIppFeatures()&ippCPUID_AVX))
+#if IPP_VERSION_X100 < 201800
+ // Problem with SSE42 optimizations performance
+ if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
+ return false;
+
+ // Different mask flipping
+ if(op == MORPH_GRADIENT)
+ return false;
+#endif
+
+#if IPP_VERSION_X100 < 201801
+ // Problem with AVX512 optimizations performance
+ if(cv::ipp::getIppTopFeatures()&ippCPUID_AVX512F)
+ return false;
+
+ // Multiple iterations on small mask is not effective in current integration
+ // Implace imitation for 3x3 kernel is not efficient
+ // Advanced morphology for small mask introduces degradations
+ if((iterations > 1 || src_data == dst_data || (op != MORPH_ERODE && op != MORPH_DILATE)) && kernel_width*kernel_height < 25)
+ return false;
+
+ // Skip even mask sizes for advanced morphology since they can produce out of spec writes
+ if((op != MORPH_ERODE && op != MORPH_DILATE) && (!(kernel_width&1) || !(kernel_height&1)))
return false;
#endif
- ::ipp::IwAutoBuffer<Ipp8u> kernelTempBuffer;
+ IppAutoBuffer<Ipp8u> kernelTempBuffer;
::ipp::IwiBorderSize iwBorderSize;
+ ::ipp::IwiBorderSize iwBorderSize2;
::ipp::IwiBorderType iwBorderType;
+ ::ipp::IwiBorderType iwBorderType2;
::ipp::IwiImage iwMask;
::ipp::IwiImage iwInter;
::ipp::IwiSize initSize(width, height);
::ipp::IwiSize kernelSize(kernel_width, kernel_height);
- ::ipp::IwiPoint anchor(anchor_x, anchor_y);
IppDataType type = ippiGetDataType(CV_MAT_DEPTH(src_type));
int channels = CV_MAT_CN(src_type);
IwiMorphologyType morphType = ippiGetMorphologyType(op);
if(src_type != dst_type)
return false;
+ if(!ippiCheckAnchor(anchor_x, anchor_y, kernel_width, kernel_height))
+ return false;
+
try
{
::ipp::IwiImage iwSrc(initSize, type, channels, ::ipp::IwiBorderSize(roi_x, roi_y, roi_width-roi_x-width, roi_height-roi_y-height), (void*)src_data, src_step);
::ipp::IwiImage iwDst(initSize, type, channels, ::ipp::IwiBorderSize(roi_x2, roi_y2, roi_width2-roi_x2-width, roi_height2-roi_y2-height), (void*)dst_data, dst_step);
- ::ipp::iwiFilterMorphology_GetBorderSize(morphType, kernelSize, iwBorderSize);
- if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
- {
- iwBorderSize.borderLeft /= 2;
- iwBorderSize.borderTop /= 2;
- iwBorderSize.borderRight /= 2;
- iwBorderSize.borderBottom /= 2;
- }
-
+ iwBorderSize = ::ipp::iwiSizeToBorderSize(kernelSize);
iwBorderType = ippiGetBorder(iwSrc, borderType, iwBorderSize);
- if(!iwBorderType.m_borderType || ((iwBorderType.m_borderFlags&ippBorderInMem) && (iwBorderType.m_borderFlags&ippBorderInMem) != ippBorderInMem))
+ if(!iwBorderType)
return false;
-
- if(iwBorderType.m_borderType == ippBorderConst)
+ if(iterations > 1)
{
- if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue())
- iwBorderType.m_borderType = ippBorderDefault;
- else
- iwBorderType.SetValue(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
+ // Check dst border for second and later iterations
+ iwBorderSize2 = ::ipp::iwiSizeToBorderSize(kernelSize);
+ iwBorderType2 = ippiGetBorder(iwDst, borderType, iwBorderSize2);
+ if(!iwBorderType2)
+ return false;
}
- if(morphType != iwiMorphErode && morphType != iwiMorphDilate)
+
+ if(morphType != iwiMorphErode && morphType != iwiMorphDilate && morphType != iwiMorphGradient)
{
- if((iwBorderType.m_borderFlags&ippBorderInMem) == ippBorderInMem)
- iwBorderType.m_borderFlags = ippBorderFirstStageInMem;
+ // For now complex morphology support only InMem around all sides. This will be improved later.
+ if((iwBorderType&ippBorderInMem) && (iwBorderType&ippBorderInMem) != ippBorderInMem)
+ return false;
+
+ if((iwBorderType&ippBorderInMem) == ippBorderInMem)
+ {
+ iwBorderType &= ~ippBorderInMem;
+ iwBorderType &= ippBorderFirstStageInMem;
+ }
}
- // Test input parameters on dummy structures
+ if(iwBorderType.StripFlags() == ippBorderConst)
{
- ::ipp::IwiImage testSrc(initSize, type, channels);
- ::ipp::IwiImage testDst(initSize, type, channels);
- ::ipp::IwiImage testMask(ippiSize(kernel_width, kernel_height), ipp8u, CV_MAT_CN(kernel_type));
-
- ::ipp::iwiFilterMorphology(&testSrc, &testDst, morphType, &testMask, &anchor, iwBorderType);
+ if(Vec<double, 4>(borderValue) == morphologyDefaultBorderValue())
+ iwBorderType.SetType(ippBorderDefault);
+ else
+ iwBorderType.m_value = ::ipp::IwValueFloat(borderValue[0], borderValue[1], borderValue[2], borderValue[3]);
}
iwMask.Init(ippiSize(kernel_width, kernel_height), ippiGetDataType(CV_MAT_DEPTH(kernel_type)), CV_MAT_CN(kernel_type), 0, kernel_data, kernel_step);
- if((int)kernel_step != kernel_width || CV_MAT_DEPTH(kernel_type) != CV_8U)
+
+ ::ipp::IwiImage iwMaskLoc = iwMask;
+ if(morphType == iwiMorphDilate)
{
- kernelTempBuffer.Alloc(kernel_width*kernel_height);
- ::ipp::IwiImage iwMaskTmp(ippiSize(kernel_width, kernel_height), ipp8u, 1, 0, kernelTempBuffer, kernel_width);
- ::ipp::iwiScale(&iwMask, &iwMaskTmp, 1, 0);
- iwMask = iwMaskTmp;
+ iwMaskLoc.Alloc(iwMask.m_size, iwMask.m_dataType, iwMask.m_channels);
+ ::ipp::iwiMirror(iwMask, iwMaskLoc, ippAxsBoth);
+ iwMask = iwMaskLoc;
}
if(iterations > 1)
{
- iwInter.Alloc(initSize, type, channels);
+ // OpenCV uses in mem border from dst for two and more iterations, so we need to keep this border in intermediate image
+ iwInter.Alloc(initSize, type, channels, iwBorderSize2);
::ipp::IwiImage *pSwap[2] = {&iwInter, &iwDst};
- ::ipp::IwiBorderType iterBorder = iwBorderType;
- iterBorder.m_borderFlags = 0;
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
+
+ // Copy border only
+ {
+ if(iwBorderSize2.top)
+ {
+ ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, -iwBorderSize2.top, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.top);
+ ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
+ ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
+ }
+ if(iwBorderSize2.bottom)
+ {
+ ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, iwDst.m_size.height, iwDst.m_size.width+iwBorderSize2.left+iwBorderSize2.right, iwBorderSize2.bottom);
+ ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
+ ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
+ }
+ if(iwBorderSize2.left)
+ {
+ ::ipp::IwiRoi borderRoi(-iwBorderSize2.left, 0, iwBorderSize2.left, iwDst.m_size.height);
+ ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
+ ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
+ }
+ if(iwBorderSize2.right)
+ {
+ ::ipp::IwiRoi borderRoi(iwDst.m_size.width, 0, iwBorderSize2.left, iwDst.m_size.height);
+ ::ipp::IwiImage iwInterRoi = iwInter.GetRoiImage(borderRoi);
+ ::ipp::iwiCopy(iwDst.GetRoiImage(borderRoi), iwInterRoi);
+ }
+ }
+ iwBorderType2.SetType(iwBorderType);
for(int i = 0; i < iterations-1; i++)
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, pSwap[i&0x1], pSwap[(i+1)&0x1], morphType, &iwMask, NULL, iterBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, *pSwap[i&0x1], *pSwap[(i+1)&0x1], morphType, iwMask, ::ipp::IwDefault(), iwBorderType2);
if(iterations&0x1)
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
}
else
{
{
iwInter.Alloc(initSize, type, channels);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwInter, morphType, &iwMask, NULL, iwBorderType);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopyMask, &iwInter, &iwDst);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwInter, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiCopy, iwInter, iwDst);
}
else
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, &iwSrc, &iwDst, morphType, &iwMask, NULL, iwBorderType);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterMorphology, iwSrc, iwDst, morphType, iwMask, ::ipp::IwDefault(), iwBorderType);
}
}
catch(::ipp::IwException ex)
}
#endif
+#define IPP_DISABLE_MORPH_ADV 1
#ifdef HAVE_IPP
#if !IPP_DISABLE_MORPH_ADV
namespace cv {
{
static bool ipp_boxfilter(Mat &src, Mat &dst, Size ksize, Point anchor, bool normalize, int borderType)
{
+#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
- // Problem with SSE42 optimization for 16s
-#if IPP_DISABLE_PERF_BOX16S_SSE42
- if(src.depth() == CV_16S && !(ipp::getIppFeatures()&ippCPUID_AVX))
+#if IPP_VERSION_X100 < 201801
+ // Problem with SSE42 optimization for 16s and some 8u modes
+ if(ipp::getIppTopFeatures() == ippCPUID_SSE42 && (((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 3 || src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 3 && (ksize.width > 5 || ksize.height > 5))))
return false;
-#endif
- int stype = src.type(), cn = CV_MAT_CN(stype);
- IppiBorderType ippBorderType = ippiGetBorderType(borderType & ~BORDER_ISOLATED);
- IppDataType ippType = ippiGetDataType(stype);
- Point ocvAnchor, ippAnchor;
- ocvAnchor.x = anchor.x < 0 ? ksize.width / 2 : anchor.x;
- ocvAnchor.y = anchor.y < 0 ? ksize.height / 2 : anchor.y;
- ippAnchor.x = ksize.width / 2 - (ksize.width % 2 == 0 ? 1 : 0);
- ippAnchor.y = ksize.height / 2 - (ksize.height % 2 == 0 ? 1 : 0);
-
- if(normalize && (!src.isSubmatrix() || borderType&BORDER_ISOLATED) && stype == dst.type() &&
- (ippBorderType == ippBorderRepl || /* returns ippStsStepErr: Step value is not valid */
- ippBorderType == ippBorderConst ||
- ippBorderType == ippBorderMirror) && ocvAnchor == ippAnchor) // returns ippStsMaskSizeErr: mask has an illegal value
- {
- IppStatus status;
- Ipp32s bufSize = 0;
- IppiSize roiSize = { dst.cols, dst.rows };
- IppiSize maskSize = { ksize.width, ksize.height };
- IppAutoBuffer<Ipp8u> buffer;
-
- if(ippiFilterBoxBorderGetBufferSize(roiSize, maskSize, ippType, cn, &bufSize) < 0)
- return false;
+ // Other optimizations has some degradations too
+ if((((src.depth() == CV_16S || src.depth() == CV_16U) && (src.channels() == 4)) || (src.depth() == CV_8U && src.channels() == 1 && (ksize.width > 5 || ksize.height > 5))))
+ return false;
+#endif
- buffer.allocate(bufSize);
+ if(!normalize)
+ return false;
- #define IPP_FILTER_BOX_BORDER(ippType, flavor)\
- {\
- ippType borderValue[4] = { 0, 0, 0, 0 };\
- status = CV_INSTRUMENT_FUN_IPP(ippiFilterBoxBorder_##flavor, src.ptr<ippType>(), (int)src.step, dst.ptr<ippType>(),\
- (int)dst.step, roiSize, maskSize,\
- ippBorderType, borderValue, buffer);\
- }
+ if(!ippiCheckAnchor(anchor, ksize))
+ return false;
- if (stype == CV_8UC1)
- IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C1R)
- else if (stype == CV_8UC3)
- IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C3R)
- else if (stype == CV_8UC4)
- IPP_FILTER_BOX_BORDER(Ipp8u, 8u_C4R)
- else if (stype == CV_16UC1)
- IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C1R)
- else if (stype == CV_16UC3)
- IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C3R)
- else if (stype == CV_16UC4)
- IPP_FILTER_BOX_BORDER(Ipp16u, 16u_C4R)
- else if (stype == CV_16SC1)
- IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C1R)
- else if (stype == CV_16SC3)
- IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C3R)
- else if (stype == CV_16SC4)
- IPP_FILTER_BOX_BORDER(Ipp16s, 16s_C4R)
- else if (stype == CV_32FC1)
- IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C1R)
- else if (stype == CV_32FC3)
- IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C3R)
- else if (stype == CV_32FC4)
- IPP_FILTER_BOX_BORDER(Ipp32f, 32f_C4R)
- else
+ try
+ {
+ ::ipp::IwiImage iwSrc = ippiGetImage(src);
+ ::ipp::IwiImage iwDst = ippiGetImage(dst);
+ ::ipp::IwiSize iwKSize = ippiGetSize(ksize);
+ ::ipp::IwiBorderSize borderSize(iwKSize);
+ ::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
+ if(!ippBorder)
return false;
- if(status >= 0)
- return true;
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBox, iwSrc, iwDst, iwKSize, ::ipp::IwDefault(), ippBorder);
+ }
+ catch (::ipp::IwException)
+ {
+ return false;
}
-#undef IPP_FILTER_BOX_BORDER
+ return true;
+#else
+ CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(normalize); CV_UNUSED(borderType);
return false;
+#endif
}
}
#endif
#endif
#ifdef HAVE_IPP
-#define IPP_DISABLE_FILTERING_INMEM_PARTIAL 1 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
+#if IPP_VERSION_X100 == 201702 // IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
+#define IPP_GAUSSIANBLUR_PARALLEL 0
+#else
#define IPP_GAUSSIANBLUR_PARALLEL 1
+#endif
#ifdef HAVE_IPP_IW
try
{
- ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, m_dst.m_size.width, range.end - range.start);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &m_src, &m_dst, m_kernelSize, m_sigma, m_border, &roi);
+ ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, m_dst.m_size.width, range.end - range.start);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, m_src, m_dst, m_kernelSize, m_sigma, ::ipp::IwDefault(), m_border, tile);
}
catch(::ipp::IwException e)
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP()
-#if IPP_VERSION_X100 <= 201702 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
+#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; // bug on ia32
#else
{
Mat src = _src.getMat();
Mat dst = _dst.getMat();
- ::ipp::IwiImage iwSrc = ippiGetImage(src);
- ::ipp::IwiImage iwDst = ippiGetImage(dst);
- ::ipp::IwiBorderSize borderSize(::ipp::IwiSize(ippiSize(ksize)));
+ ::ipp::IwiImage iwSrc = ippiGetImage(src);
+ ::ipp::IwiImage iwDst = ippiGetImage(dst);
+ ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
- if(!ippBorder.m_borderType)
+ if(!ippBorder)
return false;
- const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
- ((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2);
- if(!disableThreading && IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
+ if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
bool ok;
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
if(!ok)
return false;
} else {
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, &iwSrc, &iwDst, ksize.width, (float) sigma1, ippBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterGaussian, iwSrc, iwDst, ksize.width, sigma1, ::ipp::IwDefault(), ippBorder);
}
}
catch (::ipp::IwException ex)
{
CV_INSTRUMENT_REGION_IPP()
+#if IPP_VERSION_X100 < 201801
+ // Degradations for big kernel
+ if(ksize > 7)
+ return false;
+#endif
+
{
int bufSize;
IppiSize dstRoiSize = ippiSize(dst.cols, dst.rows), maskSize = ippiSize(ksize, ksize);
try
{
- ::ipp::IwiRoi roi = ::ipp::IwiRect(0, range.start, dst.m_size.width, range.end - range.start);
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &src, &dst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, borderType, &roi);
+ ::ipp::IwiTile tile = ::ipp::IwiRoi(0, range.start, dst.m_size.width, range.end - range.start);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, src, dst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), borderType, tile);
}
catch(::ipp::IwException)
{
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize(radius);
::ipp::IwiBorderType ippBorder(ippiGetBorder(iwSrc, borderType, borderSize));
- if(!ippBorder.m_borderType)
+ if(!ippBorder)
return false;
- const bool disableThreading = IPP_DISABLE_FILTERING_INMEM_PARTIAL &&
- ((ippBorder.m_borderFlags)&ippBorderInMem) && ((ippBorder.m_borderFlags)&ippBorderInMem) != ippBorderInMem;
const int threads = ippiSuggestThreadsNum(iwDst, 2);
- if(!disableThreading && IPP_BILATERAL_PARALLEL && threads > 1) {
+ if(IPP_BILATERAL_PARALLEL && threads > 1) {
bool ok = true;
Range range(0, (int)iwDst.m_size.height);
ipp_bilateralFilterParallel invoker(iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ippBorder, &ok);
if(!ok)
return false;
} else {
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, &iwSrc, &iwDst, radius, valSquareSigma, posSquareSigma, ippiFilterBilateralGauss, ippDistNormL1, ippBorder);
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiFilterBilateral, iwSrc, iwDst, radius, valSquareSigma, posSquareSigma, ::ipp::IwDefault(), ippBorder);
}
}
catch (::ipp::IwException)
{
CV_INSTRUMENT_REGION_IPP()
-#if IPP_VERSION_X100 >= 810
- int ippStatus = -1;
+// Performance degradations
+#if IPP_VERSION_X100 >= 201800
IppiSize srcSize = { size.width, size.height };
- CV_SUPPRESS_DEPRECATED_START
- ippStatus = CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh);
- CV_SUPPRESS_DEPRECATED_END
- if(ippStatus >= 0)
- return true;
+ if(CV_INSTRUMENT_FUN_IPP(ippiComputeThreshold_Otsu_8u_C1R, _src, step, srcSize, &thresh) < 0)
+ return false;
+
+ return true;
#else
CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh);
-#endif
return false;
+#endif
}
#endif
#ifdef HAVE_IPP
unsigned char thresh;
- CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
+ CV_IPP_RUN_FAST(ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
#endif
const int N = 256;
for( i = 0; i < cascade->count; i++ )
{
if( cascade->ipp_stages[i] )
-#if IPP_VERSION_X100 < 900
+#if IPP_VERSION_X100 < 900 && !IPP_DISABLE_HAAR
ippiHaarClassifierFree_32f( (IppiHaarClassifier_32f*)cascade->ipp_stages[i] );
#else
cvFree(&cascade->ipp_stages[i]);
// convert flags register to more handy variables
void flagsToVars(int flags)
{
-#if defined(HAVE_IPP_ICV_ONLY)
+#if defined(HAVE_IPP_ICV)
ipp = 0;
icv = ((flags&CV_IMPL_IPP) > 0);
#else
::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization);
if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl;
#endif
+
+#ifdef HAVE_IPP
+ const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled";
+ ::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization);
+ if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl;
+
+ cv::String ippVer = cv::ipp::getIppVersion();
+ ::testing::Test::RecordProperty("cv_ipp_version", ippVer);
+ if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl;
+#endif
}