From aa80f754f403bb1e8e3248e201bab090f64cc401 Mon Sep 17 00:00:00 2001 From: antalzsiroscandid Date: Wed, 27 Nov 2019 15:31:38 +0100 Subject: [PATCH 01/16] dnn: reading IR models from buffer --- modules/dnn/include/opencv2/dnn/dnn.hpp | 47 +++++++++++- modules/dnn/src/dnn.cpp | 128 ++++++++++++++++++++++++++++---- modules/dnn/test/test_misc.cpp | 54 ++++++++++++++ 3 files changed, 211 insertions(+), 18 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index a4bbffc..94e2ada 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -384,7 +384,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN CV_WRAP Net(); //!< Default constructor. CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore. - /** @brief Create a network from Intel's Model Optimizer intermediate representation. + /** @brief Create a network from Intel's Model Optimizer intermediate representation (IR). * @param[in] xml XML configuration file with network's topology. * @param[in] bin Binary file with trained weights. * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine @@ -392,6 +392,25 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN */ CV_WRAP static Net readFromModelOptimizer(const String& xml, const String& bin); + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfig buffer with model's configuration. + * @param[in] bufferWeights buffer with model's trained weights. + * @returns Net object. + */ + CV_WRAP static + Net readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Create a network from Intel's Model Optimizer in-memory buffers with intermediate representation (IR). + * @param[in] bufferModelConfigPtr buffer pointer of model's configuration. + * @param[in] bufferModelConfigSize buffer size of model's configuration. + * @param[in] bufferWeightsPtr buffer pointer of model's trained weights. + * @param[in] bufferWeightsSize buffer size of model's trained weights. + * @returns Net object. + */ + static + Net readFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); + /** Returns true if there are no layers in the network. */ CV_WRAP bool empty() const; @@ -857,7 +876,31 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine * backend. */ - CV_EXPORTS_W Net readNetFromModelOptimizer(const String &xml, const String &bin); + CV_EXPORTS_W + Net readNetFromModelOptimizer(const String &xml, const String &bin); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfig Buffer contains XML configuration with network's topology. + * @param[in] bufferWeights Buffer contains binary data with trained weights. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS_W + Net readNetFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights); + + /** @brief Load a network from Intel's Model Optimizer intermediate representation. + * @param[in] bufferModelConfigPtr Pointer to buffer which contains XML configuration with network's topology. + * @param[in] bufferModelConfigSize Binary size of XML configuration data. + * @param[in] bufferWeightsPtr Pointer to buffer which contains binary data with trained weights. + * @param[in] bufferWeightsSize Binary size of trained weights data. + * @returns Net object. + * Networks imported from Intel's Model Optimizer are launched in Intel's Inference Engine + * backend. + */ + CV_EXPORTS + Net readNetFromModelOptimizer(const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize); /** @brief Reads a network model ONNX. * @param onnxFile path to the .onnx file with text description of the network architecture. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ad2e527..e2c296b 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2910,28 +2910,22 @@ struct Net::Impl return getBlobAsync(getPinByAlias(outputName)); } #endif // CV_CXX11 + +#ifdef HAVE_INF_ENGINE + static + Net createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet); +#endif }; Net::Net() : impl(new Net::Impl) { } -Net Net::readFromModelOptimizer(const String& xml, const String& bin) +#ifdef HAVE_INF_ENGINE +/*static*/ +Net Net::Impl::createNetworkFromModelOptimizer(InferenceEngine::CNNNetwork& ieNet) { -#ifndef HAVE_INF_ENGINE - CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); -#else - -#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) - InferenceEngine::CNNNetReader reader; - reader.ReadNetwork(xml); - reader.ReadWeights(bin); - - InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); -#else - InferenceEngine::Core& ie = getCore(); - InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); -#endif + CV_TRACE_FUNCTION(); std::vector inputsNames; for (auto& it : ieNet.getInputsInfo()) @@ -3001,9 +2995,95 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin) cvNet.impl->skipInfEngineInit = true; return cvNet; +} +#endif // HAVE_INF_ENGINE + +Net Net::readFromModelOptimizer(const String& xml, const String& bin) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(xml); CV_UNUSED(bin); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) + InferenceEngine::CNNNetReader reader; + reader.ReadNetwork(xml); + reader.ReadWeights(bin); + + InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); +#else + InferenceEngine::Core& ie = getCore(); + InferenceEngine::CNNNetwork ieNet = ie.ReadNetwork(xml, bin); +#endif + + return Impl::createNetworkFromModelOptimizer(ieNet); #endif // HAVE_INF_ENGINE } +Net Net::readFromModelOptimizer(const std::vector& bufferModelConfig, const std::vector& bufferWeights) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!bufferModelConfig.empty()); + CV_Assert(!bufferWeights.empty()); + return readFromModelOptimizer(bufferModelConfig.data(), bufferModelConfig.size(), + bufferWeights.data(), bufferWeights.size()); +} + +Net Net::readFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + CV_TRACE_FUNCTION(); +#ifndef HAVE_INF_ENGINE + CV_UNUSED(bufferModelConfigPtr); CV_UNUSED(bufferWeightsPtr); + CV_UNUSED(bufferModelConfigSize); CV_UNUSED(bufferModelConfigSize); + CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer."); +#else + +#if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3) + InferenceEngine::CNNNetReader reader; + + try + { + reader.ReadNetwork(bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::TBlob::Ptr weightsBlobPtr(new InferenceEngine::TBlob(tensorDesc)); + weightsBlobPtr->allocate(); + std::memcpy(weightsBlobPtr->buffer(), (uchar*)bufferWeightsPtr, bufferWeightsSize); + reader.SetWeights(weightsBlobPtr); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } + + InferenceEngine::CNNNetwork ieNet = reader.getNetwork(); +#else + InferenceEngine::Core& ie = getCore(); + + std::string model; model.assign((char*)bufferModelConfigPtr, bufferModelConfigSize); + + InferenceEngine::CNNNetwork ieNet; + try + { + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, { bufferWeightsSize }, InferenceEngine::Layout::C); + InferenceEngine::Blob::CPtr weights_blob = InferenceEngine::make_shared_blob(tensorDesc, (uint8_t*)bufferWeightsPtr, bufferWeightsSize); + + ieNet = ie.ReadNetwork(model, weights_blob); + } + catch (const std::exception& e) + { + CV_Error(Error::StsError, std::string("DNN: IE failed to load model: ") + e.what()); + } +#endif + + return Impl::createNetworkFromModelOptimizer(ieNet); +#endif // HAVE_INF_ENGINE +} + + Net::~Net() { } @@ -4344,7 +4424,7 @@ Net readNet(const String& _framework, const std::vector& bufferModel, else if (framework == "torch") CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); else if (framework == "dldt") - CV_Error(Error::StsNotImplemented, "Reading Intel's Model Optimizer models from buffers"); + return readNetFromModelOptimizer(bufferConfig, bufferModel); CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); } @@ -4353,5 +4433,21 @@ Net readNetFromModelOptimizer(const String &xml, const String &bin) return Net::readFromModelOptimizer(xml, bin); } +Net readNetFromModelOptimizer(const std::vector& bufferCfg, const std::vector& bufferModel) +{ + return Net::readFromModelOptimizer(bufferCfg, bufferModel); +} + +Net readNetFromModelOptimizer( + const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize, + const uchar* bufferWeightsPtr, size_t bufferWeightsSize +) +{ + return Net::readFromModelOptimizer( + bufferModelConfigPtr, bufferModelConfigSize, + bufferWeightsPtr, bufferWeightsSize + ); +} + CV__DNN_EXPERIMENTAL_NS_END }} // namespace diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index 464ef10..2069b97 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -637,6 +637,60 @@ TEST_P(Test_Model_Optimizer, forward_two_nets) normAssert(ref0, ref2, 0, 0); } + +TEST_P(Test_Model_Optimizer, readFromBuffer) +{ + const Backend backendId = get<0>(GetParam()); + const Target targetId = get<1>(GetParam()); + + if (backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + throw SkipTestException("No support for async forward"); + + const std::string suffix = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? "_fp16" : ""; + const std::string& weightsFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".bin"); + const std::string& modelFile = findDataFile("dnn/layers/layer_convolution" + suffix + ".xml"); + + if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_API); + else if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + setInferenceEngineBackendType(CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH); + else + FAIL() << "Unknown backendId"; + + Net net1 = readNetFromModelOptimizer(modelFile, weightsFile); + net1.setPreferableBackend(backendId); + net1.setPreferableTarget(targetId); + + + std::vector modelConfig; + readFileContent(modelFile, modelConfig); + std::vector weights; + readFileContent(weightsFile, weights); + + Net net2 = readNetFromModelOptimizer( + (const uchar*)modelConfig.data(), modelConfig.size(), + (const uchar*)weights.data(), weights.size() + ); + net2.setPreferableBackend(backendId); + net2.setPreferableTarget(targetId); + + int blobSize[] = {2, 6, 75, 113}; + Mat input(4, &blobSize[0], CV_32F); + randu(input, 0, 255); + + Mat ref, actual; + { + net1.setInput(input); + ref = net1.forward(); + } + { + net2.setInput(input); + actual = net2.forward(); + } + + normAssert(ref, actual, "", 0, 0); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_Model_Optimizer, dnnBackendsAndTargetsIE() ); -- 2.7.4 From f5a84f75c4427e0754138264dbce0b55a80d5d38 Mon Sep 17 00:00:00 2001 From: Vitaly Tuzov Date: Wed, 18 Dec 2019 20:05:36 +0300 Subject: [PATCH 02/16] Fix for CV_8UC2 linear resize vectorization --- modules/imgproc/src/resize.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index df127d1..cc967cf 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -1605,13 +1605,14 @@ struct HResizeLinearVecU8_X4 for( dx = 0; dx < len0; dx += step ) { + int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] }; v_int16x8 al = v_load(alpha+dx*2); v_int16x8 ah = v_load(alpha+dx*2+8); v_uint16x8 sl, sh; - v_expand(v_interleave_pairs(v_lut_quads(S0, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S0, ofs)), sl, sh); v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D0[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); - v_expand(v_interleave_pairs(v_lut_pairs(S1, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S1, ofs)), sl, sh); v_store(&D1[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D1[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); } @@ -1622,10 +1623,11 @@ struct HResizeLinearVecU8_X4 int *D = dst[k]; for( dx = 0; dx < len0; dx += step ) { + int ofs[4] = { xofs[dx], xofs[dx + 2], xofs[dx + 4], xofs[dx + 6] }; v_int16x8 al = v_load(alpha+dx*2); v_int16x8 ah = v_load(alpha+dx*2+8); v_uint16x8 sl, sh; - v_expand(v_interleave_pairs(v_lut_quads(S, xofs+dx)), sl, sh); + v_expand(v_interleave_pairs(v_lut_quads(S, ofs)), sl, sh); v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(sl), al)); v_store(&D[dx+4], v_dotprod(v_reinterpret_as_s16(sh), ah)); } -- 2.7.4 From 28a5f7d66bed5b562cba38bf588dc573c26c24f5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 18 Dec 2019 23:14:38 +0000 Subject: [PATCH 03/16] 3rdparty: TBB version 2019u8 => 2020.0 --- 3rdparty/tbb/CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index a3c0a81..67eae7d 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -5,10 +5,11 @@ if (WIN32 AND NOT ARM) message(FATAL_ERROR "BUILD_TBB option supports Windows on ARM only!\nUse regular official TBB build instead of the BUILD_TBB option!") endif() -ocv_update(OPENCV_TBB_RELEASE "2019_U8") -ocv_update(OPENCV_TBB_RELEASE_MD5 "7c371d0f62726154d2c568a85697a0ad") +ocv_update(OPENCV_TBB_RELEASE "v2020.0") +ocv_update(OPENCV_TBB_RELEASE_MD5 "5858dd01ec007c139d5d178b21e06dae") ocv_update(OPENCV_TBB_FILENAME "${OPENCV_TBB_RELEASE}.tar.gz") -ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE}") +string(REGEX REPLACE "^v" "" OPENCV_TBB_RELEASE_ "${OPENCV_TBB_RELEASE}") +ocv_update(OPENCV_TBB_SUBDIR "tbb-${OPENCV_TBB_RELEASE_}") set(tbb_src_dir "${OpenCV_BINARY_DIR}/3rdparty/tbb") ocv_download(FILENAME ${OPENCV_TBB_FILENAME} @@ -34,10 +35,12 @@ ocv_include_directories("${tbb_src_dir}/include" file(GLOB lib_srcs "${tbb_src_dir}/src/tbb/*.cpp") file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h") list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp") +ocv_list_filterout(lib_srcs "${tbb_src_dir}/src/tbb/tbbbind.cpp") # hwloc.h requirement if (WIN32) add_definitions(/D__TBB_DYNAMIC_LOAD_ENABLED=0 /D__TBB_BUILD=1 + /DTBB_SUPPRESS_DEPRECATED_MESSAGES=1 /DTBB_NO_LEGACY=1 /D_UNICODE /DUNICODE -- 2.7.4 From 8d22ac200f488eb76d86cdf6ad12581df2980095 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 11 Dec 2019 20:08:10 +0000 Subject: [PATCH 04/16] core: workaround flipHoriz() alignment issues --- modules/core/include/opencv2/core/utility.hpp | 37 +++++++++++++ modules/core/src/copy.cpp | 77 ++++++++++++++++++++++----- 2 files changed, 102 insertions(+), 12 deletions(-) diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index e7f169b..063747e 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -514,6 +514,43 @@ static inline size_t roundUp(size_t a, unsigned int b) return a + b - 1 - (a + b - 1) % b; } +/** @brief Alignment check of passed values + +Usage: `isAligned(...)` + +@note Alignment(N) must be a power of 2 (2**k, 2^k) +*/ +template static inline +bool isAligned(const T& data) +{ + CV_StaticAssert((N & (N - 1)) == 0, ""); // power of 2 + return (((size_t)data) & (N - 1)) == 0; +} +/** @overload */ +template static inline +bool isAligned(const void* p1) +{ + return isAligned((size_t)p1); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2) +{ + return isAligned(((size_t)p1)|((size_t)p2)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)); +} +/** @overload */ +template static inline +bool isAligned(const void* p1, const void* p2, const void* p3, const void* p4) +{ + return isAligned(((size_t)p1)|((size_t)p2)|((size_t)p3)|((size_t)p4)); +} + /** @brief Enables or disables the optimized code. The function can be used to dynamically turn on and off optimized dispatched code (code that uses SSE4.2, AVX/AVX2, diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 3f68a25..3fa4982 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -563,6 +563,12 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask) return *this; } +#if CV_NEON && !defined(__aarch64__) +#define CV_CHECK_ALIGNMENT 1 +#else +#define CV_CHECK_ALIGNMENT 0 +#endif + #if CV_SIMD128 template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { @@ -572,6 +578,10 @@ template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s int width_1 = width & -v_uint8x16::nlanes; int i, j; +#if CV_CHECK_ALIGNMENT + CV_Assert(isAligned(src, dst)); +#endif + for( ; size.height--; src += sstep, dst += dstep ) { for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes ) @@ -585,7 +595,7 @@ template CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, s v_store((T*)(dst + j - v_uint8x16::nlanes), t0); v_store((T*)(dst + i), t1); } - if (((size_t)src|(size_t)dst) % sizeof(T) == 0) + if (isAligned(src, dst)) { for ( ; i < width; i += sizeof(T), j -= sizeof(T) ) { @@ -620,6 +630,11 @@ template CV_ALWAYS_INLINE void flipHoriz_double( const int end = (int)(size.width*esz); int width = (end + 1)/2; +#if CV_CHECK_ALIGNMENT + CV_Assert(isAligned(src, dst)); + CV_Assert(isAligned(src, dst)); +#endif + for( ; size.height--; src += sstep, dst += dstep ) { for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) ) @@ -644,6 +659,9 @@ static void flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz ) { #if CV_SIMD +#if CV_CHECK_ALIGNMENT + size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep; +#endif if (esz == 2 * v_uint8x16::nlanes) { int end = (int)(size.width*esz); @@ -693,15 +711,27 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } } } - else if (esz == 8) + else if (esz == 8 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 4) + else if (esz == 4 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 2) + else if (esz == 2 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { flipHoriz_single(src, sstep, dst, dstep, size, esz); } @@ -709,7 +739,11 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, { flipHoriz_single(src, sstep, dst, dstep, size, esz); } - else if (esz == 24) + else if (esz == 24 +#if CV_CHECK_ALIGNMENT + && isAligned(alignmentMark) +#endif + ) { int end = (int)(size.width*esz); int width = (end + 1)/2; @@ -732,6 +766,7 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, } } } +#if !CV_CHECK_ALIGNMENT else if (esz == 12) { flipHoriz_double(src, sstep, dst, dstep, size, esz); @@ -744,8 +779,9 @@ flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, { flipHoriz_double(src, sstep, dst, dstep, size, esz); } - else #endif + else +#endif // CV_SIMD { int i, j, limit = (int)(((size.width + 1)/2)*esz); AutoBuffer _tab(size.width*esz); @@ -779,16 +815,33 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, { int i = 0; #if CV_SIMD - for( ; i <= size.width - (v_int32::nlanes * 4); i += v_int32::nlanes * 4 ) +#if CV_CHECK_ALIGNMENT + if (isAligned(src0, src1, dst0, dst1)) +#endif { - v_int32 t0 = vx_load((int*)(src0 + i)); - v_int32 t1 = vx_load((int*)(src1 + i)); - vx_store((int*)(dst0 + i), t1); - vx_store((int*)(dst1 + i), t0); + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_int32 t0 = vx_load((int*)(src0 + i)); + v_int32 t1 = vx_load((int*)(src1 + i)); + vx_store((int*)(dst0 + i), t1); + vx_store((int*)(dst1 + i), t0); + } } +#if CV_CHECK_ALIGNMENT + else + { + for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH) + { + v_uint8 t0 = vx_load(src0 + i); + v_uint8 t1 = vx_load(src1 + i); + vx_store(dst0 + i, t1); + vx_store(dst1 + i, t0); + } + } +#endif #endif - if( ((size_t)src0|(size_t)dst0|(size_t)src1|(size_t)dst1) % sizeof(int) == 0 ) + if (isAligned(src0, src1, dst0, dst1)) { for( ; i <= size.width - 16; i += 16 ) { -- 2.7.4 From 9cd1d087c38dfa41d4606b53a40496871ae4e875 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 00:29:53 +0000 Subject: [PATCH 05/16] android(camera2): apply .disconnectCamera() patch from issue 13574 --- .../android-21/java/org/opencv/android/JavaCamera2View.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java index 09e01b0..5eac339 100644 --- a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java +++ b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java @@ -230,7 +230,7 @@ public class JavaCamera2View extends CameraBridgeViewBase { @Override protected void disconnectCamera() { - Log.i(LOGTAG, "closeCamera"); + Log.i(LOGTAG, "close camera"); try { CameraDevice c = mCameraDevice; mCameraDevice = null; @@ -241,13 +241,14 @@ public class JavaCamera2View extends CameraBridgeViewBase { if (null != c) { c.close(); } + } finally { + stopBackgroundThread(); if (null != mImageReader) { mImageReader.close(); mImageReader = null; } - } finally { - stopBackgroundThread(); } + Log.i(LOGTAG, "camera closed!"); } public static class JavaCameraSizeAccessor implements ListItemAccessor { -- 2.7.4 From e801f0e954a4d095b0240c0d83cfb5376a6f0e85 Mon Sep 17 00:00:00 2001 From: Sebastien Wybo Date: Thu, 19 Dec 2019 10:59:18 +0100 Subject: [PATCH 06/16] Merge pull request #16011 from sebastien-wybo:fix_16007 * Fix #16007 - colinearity computed using all 3 coordinates * calib3d(test): estimateAffine3D regression 16007 --- modules/calib3d/src/ptsetreg.cpp | 6 +++--- modules/calib3d/test/test_affine3d_estimator.cpp | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/calib3d/src/ptsetreg.cpp b/modules/calib3d/src/ptsetreg.cpp index 88e1815..9b2ec78 100644 --- a/modules/calib3d/src/ptsetreg.cpp +++ b/modules/calib3d/src/ptsetreg.cpp @@ -488,13 +488,13 @@ public: for(j = 0; j < i; ++j) { Point3f d1 = ptr[j] - ptr[i]; - float n1 = d1.x*d1.x + d1.y*d1.y; + float n1 = d1.x*d1.x + d1.y*d1.y + d1.z*d1.z; for(k = 0; k < j; ++k) { Point3f d2 = ptr[k] - ptr[i]; - float denom = (d2.x*d2.x + d2.y*d2.y)*n1; - float num = d1.x*d2.x + d1.y*d2.y; + float denom = (d2.x*d2.x + d2.y*d2.y + d2.z*d2.z)*n1; + float num = d1.x*d2.x + d1.y*d2.y + d1.z*d2.z; if( num*num > threshold*threshold*denom ) return false; diff --git a/modules/calib3d/test/test_affine3d_estimator.cpp b/modules/calib3d/test/test_affine3d_estimator.cpp index 9c3821b..dba09af 100644 --- a/modules/calib3d/test/test_affine3d_estimator.cpp +++ b/modules/calib3d/test/test_affine3d_estimator.cpp @@ -192,4 +192,18 @@ void CV_Affine3D_EstTest::run( int /* start_from */) TEST(Calib3d_EstimateAffine3D, accuracy) { CV_Affine3D_EstTest test; test.safe_run(); } +TEST(Calib3d_EstimateAffine3D, regression_16007) +{ + std::vector m1, m2; + m1.push_back(Point3f(1.0f, 0.0f, 0.0f)); m2.push_back(Point3f(1.0f, 1.0f, 0.0f)); + m1.push_back(Point3f(1.0f, 0.0f, 1.0f)); m2.push_back(Point3f(1.0f, 1.0f, 1.0f)); + m1.push_back(Point3f(0.5f, 0.0f, 0.5f)); m2.push_back(Point3f(0.5f, 1.0f, 0.5f)); + m1.push_back(Point3f(2.5f, 0.0f, 2.5f)); m2.push_back(Point3f(2.5f, 1.0f, 2.5f)); + m1.push_back(Point3f(2.0f, 0.0f, 1.0f)); m2.push_back(Point3f(2.0f, 1.0f, 1.0f)); + + cv::Mat m3D, inl; + int res = cv::estimateAffine3D(m1, m2, m3D, inl); + EXPECT_EQ(1, res); +} + }} // namespace -- 2.7.4 From 5bf73457431b7d2cb87ac8c107865388dbf66642 Mon Sep 17 00:00:00 2001 From: jeffeDurand Date: Thu, 19 Dec 2019 05:02:48 -0500 Subject: [PATCH 07/16] Merge pull request #16090 from jeffeDurand:cuda_mog2_issue_5296 * cuda_mog2_issue_5296 --- modules/cudabgsegm/src/cuda/mog2.cu | 581 +++++++++++++++++------------------ modules/cudabgsegm/src/cuda/mog2.hpp | 37 +++ modules/cudabgsegm/src/mog2.cpp | 354 +++++++++++---------- 3 files changed, 493 insertions(+), 479 deletions(-) create mode 100644 modules/cudabgsegm/src/cuda/mog2.hpp diff --git a/modules/cudabgsegm/src/cuda/mog2.cu b/modules/cudabgsegm/src/cuda/mog2.cu index 789afa4..46891c6 100644 --- a/modules/cudabgsegm/src/cuda/mog2.cu +++ b/modules/cudabgsegm/src/cuda/mog2.cu @@ -47,393 +47,372 @@ #include "opencv2/core/cuda/vec_math.hpp" #include "opencv2/core/cuda/limits.hpp" -namespace cv { namespace cuda { namespace device -{ - namespace mog2 - { - /////////////////////////////////////////////////////////////// - // Utility - - __device__ __forceinline__ float cvt(uchar val) - { - return val; - } - __device__ __forceinline__ float3 cvt(const uchar3& val) - { - return make_float3(val.x, val.y, val.z); - } - __device__ __forceinline__ float4 cvt(const uchar4& val) - { - return make_float4(val.x, val.y, val.z, val.w); - } - - __device__ __forceinline__ float sqr(float val) - { - return val * val; - } - __device__ __forceinline__ float sqr(const float3& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } - __device__ __forceinline__ float sqr(const float4& val) - { - return val.x * val.x + val.y * val.y + val.z * val.z; - } +#include "mog2.hpp" - __device__ __forceinline__ float sum(float val) - { - return val; - } - __device__ __forceinline__ float sum(const float3& val) - { - return val.x + val.y + val.z; - } - __device__ __forceinline__ float sum(const float4& val) - { - return val.x + val.y + val.z; - } - - template - __device__ __forceinline__ void swap(Ptr2D& ptr, int x, int y, int k, int rows) - { - typename Ptr2D::elem_type val = ptr(k * rows + y, x); - ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); - ptr((k + 1) * rows + y, x) = val; - } - - /////////////////////////////////////////////////////////////// - // MOG2 +namespace cv +{ +namespace cuda +{ +namespace device +{ +namespace mog2 +{ +/////////////////////////////////////////////////////////////// +// Utility - __constant__ int c_nmixtures; - __constant__ float c_Tb; - __constant__ float c_TB; - __constant__ float c_Tg; - __constant__ float c_varInit; - __constant__ float c_varMin; - __constant__ float c_varMax; - __constant__ float c_tau; - __constant__ unsigned char c_shadowVal; +__device__ __forceinline__ float cvt(uchar val) +{ + return val; +} +__device__ __forceinline__ float3 cvt(const uchar3 &val) +{ + return make_float3(val.x, val.y, val.z); +} +__device__ __forceinline__ float4 cvt(const uchar4 &val) +{ + return make_float4(val.x, val.y, val.z, val.w); +} - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal) - { - varMin = ::fminf(varMin, varMax); - varMax = ::fmaxf(varMin, varMax); - - cudaSafeCall( cudaMemcpyToSymbol(c_nmixtures, &nmixtures, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tb, &Tb, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_TB, &TB, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_Tg, &Tg, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varInit, &varInit, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMin, &varMin, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_varMax, &varMax, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_tau, &tau, sizeof(float)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_shadowVal, &shadowVal, sizeof(unsigned char)) ); - } +__device__ __forceinline__ float sqr(float val) +{ + return val * val; +} +__device__ __forceinline__ float sqr(const float3 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} +__device__ __forceinline__ float sqr(const float4 &val) +{ + return val.x * val.x + val.y * val.y + val.z * val.z; +} - template - __global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, - PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, - const float alphaT, const float alpha1, const float prune) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; +__device__ __forceinline__ float sum(float val) +{ + return val; +} +__device__ __forceinline__ float sum(const float3 &val) +{ + return val.x + val.y + val.z; +} +__device__ __forceinline__ float sum(const float4 &val) +{ + return val.x + val.y + val.z; +} - if (x >= frame.cols || y >= frame.rows) - return; +template +__device__ __forceinline__ void swap(Ptr2D &ptr, int x, int y, int k, int rows) +{ + typename Ptr2D::elem_type val = ptr(k * rows + y, x); + ptr(k * rows + y, x) = ptr((k + 1) * rows + y, x); + ptr((k + 1) * rows + y, x) = val; +} + +/////////////////////////////////////////////////////////////// +// MOG2 + +template +__global__ void mog2(const PtrStepSz frame, PtrStepb fgmask, PtrStepb modesUsed, + PtrStepf gmm_weight, PtrStepf gmm_variance, PtrStep gmm_mean, + const float alphaT, const float alpha1, const float prune, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT pix = cvt(frame(y, x)); + if (x < frame.cols && y < frame.rows) + { + WorkT pix = cvt(frame(y, x)); - //calculate distances to the modes (+ sort) - //here we need to go in descending order!!! + //calculate distances to the modes (+ sort) + //here we need to go in descending order!!! - bool background = false; // true - the pixel classified as background + bool background = false; // true - the pixel classified as background - //internal: + //internal: - bool fitsPDF = false; //if it remains zero a new GMM mode will be added + bool fitsPDF = false; //if it remains zero a new GMM mode will be added - int nmodes = modesUsed(y, x); - int nNewModes = nmodes; //current number of modes in GMM + int nmodes = modesUsed(y, x); + const int nNewModes = nmodes; //current number of modes in GMM - float totalWeight = 0.0f; + float totalWeight = 0.0f; - //go through all modes + //go through all modes - for (int mode = 0; mode < nmodes; ++mode) + for (int mode = 0; mode < nmodes; ++mode) + { + //need only weight if fit is found + float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; + int swap_count = 0; + //fit not found yet + if (!fitsPDF) { - //need only weight if fit is found - float weight = alpha1 * gmm_weight(mode * frame.rows + y, x) + prune; - int swap_count = 0; - //fit not found yet - if (!fitsPDF) - { - //check if it belongs to some of the remaining modes - float var = gmm_variance(mode * frame.rows + y, x); - - WorkT mean = gmm_mean(mode * frame.rows + y, x); + //check if it belongs to some of the remaining modes + const float var = gmm_variance(mode * frame.rows + y, x); - //calculate difference and distance - WorkT diff = mean - pix; - float dist2 = sqr(diff); + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - //background? - Tb - usually larger than Tg - if (totalWeight < c_TB && dist2 < c_Tb * var) - background = true; + //calculate difference and distance + const WorkT diff = mean - pix; + const float dist2 = sqr(diff); - //check fit - if (dist2 < c_Tg * var) - { - //belongs to the mode - fitsPDF = true; + //background? - Tb - usually larger than Tg + if (totalWeight < constants->TB_ && dist2 < constants->Tb_ * var) + background = true; - //update distribution + //check fit + if (dist2 < constants->Tg_ * var) + { + //belongs to the mode + fitsPDF = true; - //update weight - weight += alphaT; - float k = alphaT / weight; + //update distribution - //update mean - gmm_mean(mode * frame.rows + y, x) = mean - k * diff; + //update weight + weight += alphaT; + float k = alphaT / weight; - //update variance - float varnew = var + k * (dist2 - var); + //update mean + gmm_mean(mode * frame.rows + y, x) = mean - k * diff; - //limit the variance - varnew = ::fmaxf(varnew, c_varMin); - varnew = ::fminf(varnew, c_varMax); + //update variance + float varnew = var + k * (dist2 - var); - gmm_variance(mode * frame.rows + y, x) = varnew; + //limit the variance + varnew = ::fmaxf(varnew, constants->varMin_); + varnew = ::fminf(varnew, constants->varMax_); - //sort - //all other weights are at the same place and - //only the matched (iModes) is higher -> just find the new place for it + gmm_variance(mode * frame.rows + y, x) = varnew; - for (int i = mode; i > 0; --i) - { - //check one up - if (weight < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //all other weights are at the same place and + //only the matched (iModes) is higher -> just find the new place for it - swap_count++; - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = mode; i > 0; --i) + { + //check one up + if (weight < gmm_weight((i - 1) * frame.rows + y, x)) + break; - //belongs to the mode - bFitsPDF becomes 1 + swap_count++; + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } - } // !fitsPDF - //check prune - if (weight < -prune) - { - weight = 0.0f; - nmodes--; + //belongs to the mode - bFitsPDF becomes 1 } + } // !fitsPDF - gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value - totalWeight += weight; + //check prune + if (weight < -prune) + { + weight = 0.0f; + nmodes--; } - //renormalize weights + gmm_weight((mode - swap_count) * frame.rows + y, x) = weight; //update weight by the calculated value + totalWeight += weight; + } - totalWeight = 1.f / totalWeight; - for (int mode = 0; mode < nmodes; ++mode) - gmm_weight(mode * frame.rows + y, x) *= totalWeight; + //renormalize weights - nmodes = nNewModes; + totalWeight = 1.f / totalWeight; + for (int mode = 0; mode < nmodes; ++mode) + gmm_weight(mode * frame.rows + y, x) *= totalWeight; - //make new mode if needed and exit + nmodes = nNewModes; - if (!fitsPDF) - { - // replace the weakest or add a new one - int mode = nmodes == c_nmixtures ? c_nmixtures - 1 : nmodes++; + //make new mode if needed and exit - if (nmodes == 1) - gmm_weight(mode * frame.rows + y, x) = 1.f; - else - { - gmm_weight(mode * frame.rows + y, x) = alphaT; + if (!fitsPDF) + { + // replace the weakest or add a new one + const int mode = nmodes == constants->nmixtures_ ? constants->nmixtures_ - 1 : nmodes++; - // renormalize all other weights + if (nmodes == 1) + gmm_weight(mode * frame.rows + y, x) = 1.f; + else + { + gmm_weight(mode * frame.rows + y, x) = alphaT; - for (int i = 0; i < nmodes - 1; ++i) - gmm_weight(i * frame.rows + y, x) *= alpha1; - } + // renormalize all other weights - // init + for (int i = 0; i < nmodes - 1; ++i) + gmm_weight(i * frame.rows + y, x) *= alpha1; + } - gmm_mean(mode * frame.rows + y, x) = pix; - gmm_variance(mode * frame.rows + y, x) = c_varInit; + // init - //sort - //find the new place for it + gmm_mean(mode * frame.rows + y, x) = pix; + gmm_variance(mode * frame.rows + y, x) = constants->varInit_; - for (int i = nmodes - 1; i > 0; --i) - { - // check one up - if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) - break; + //sort + //find the new place for it - //swap one up - swap(gmm_weight, x, y, i - 1, frame.rows); - swap(gmm_variance, x, y, i - 1, frame.rows); - swap(gmm_mean, x, y, i - 1, frame.rows); - } + for (int i = nmodes - 1; i > 0; --i) + { + // check one up + if (alphaT < gmm_weight((i - 1) * frame.rows + y, x)) + break; + + //swap one up + swap(gmm_weight, x, y, i - 1, frame.rows); + swap(gmm_variance, x, y, i - 1, frame.rows); + swap(gmm_mean, x, y, i - 1, frame.rows); } + } - //set the number of modes - modesUsed(y, x) = nmodes; + //set the number of modes + modesUsed(y, x) = nmodes; - bool isShadow = false; - if (detectShadows && !background) - { - float tWeight = 0.0f; + bool isShadow = false; + if (detectShadows && !background) + { + float tWeight = 0.0f; - // check all the components marked as background: - for (int mode = 0; mode < nmodes; ++mode) - { - WorkT mean = gmm_mean(mode * frame.rows + y, x); + // check all the components marked as background: + for (int mode = 0; mode < nmodes; ++mode) + { + const WorkT mean = gmm_mean(mode * frame.rows + y, x); - WorkT pix_mean = pix * mean; + const WorkT pix_mean = pix * mean; - float numerator = sum(pix_mean); - float denominator = sqr(mean); + const float numerator = sum(pix_mean); + const float denominator = sqr(mean); - // no division by zero allowed - if (denominator == 0) - break; - - // if tau < a < 1 then also check the color distortion - if (numerator <= denominator && numerator >= c_tau * denominator) - { - float a = numerator / denominator; + // no division by zero allowed + if (denominator == 0) + break; - WorkT dD = a * mean - pix; + // if tau < a < 1 then also check the color distortion + else if (numerator <= denominator && numerator >= constants->tau_ * denominator) + { + const float a = numerator / denominator; - if (sqr(dD) < c_Tb * gmm_variance(mode * frame.rows + y, x) * a * a) - { - isShadow = true; - break; - } - }; + WorkT dD = a * mean - pix; - tWeight += gmm_weight(mode * frame.rows + y, x); - if (tWeight > c_TB) + if (sqr(dD) < constants->Tb_ * gmm_variance(mode * frame.rows + y, x) * a * a) + { + isShadow = true; break; - } - } + } + }; - fgmask(y, x) = background ? 0 : isShadow ? c_shadowVal : 255; + tWeight += gmm_weight(mode * frame.rows + y, x); + if (tWeight > constants->TB_) + break; + } } - template - void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - const float alpha1 = 1.0f - alphaT; + fgmask(y, x) = background ? 0 : isShadow ? constants->shadowVal_ : 255; + } +} - if (detectShadows) - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); +template +void mog2_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } - else - { - cudaSafeCall( cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1) ); + const float alpha1 = 1.0f - alphaT; - mog2<<>>((PtrStepSz) frame, fgmask, modesUsed, - weight, variance, (PtrStepSz) mean, - alphaT, alpha1, prune); - } + if (detectShadows) + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - cudaSafeCall( cudaGetLastError() ); + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } + else + { + cudaSafeCall(cudaFuncSetCacheConfig(mog2, cudaFuncCachePreferL1)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + mog2<<>>((PtrStepSz)frame, fgmask, modesUsed, + weight, variance, (PtrStepSz)mean, + alphaT, alpha1, prune, constants); + } - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, - float alphaT, float prune, bool detectShadows, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, mog2_caller, 0, mog2_caller, mog2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, stream); - } +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, + float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); - template - __global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst) + static const func_t funcs[] = { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; + 0, mog2_caller, 0, mog2_caller, mog2_caller}; - if (x >= modesUsed.cols || y >= modesUsed.rows) - return; + funcs[cn](frame, fgmask, modesUsed, weight, variance, mean, alphaT, prune, detectShadows, constants, stream); +} - int nmodes = modesUsed(y, x); +template +__global__ void getBackgroundImage2(const PtrStepSzb modesUsed, const PtrStepf gmm_weight, const PtrStep gmm_mean, PtrStep dst, const Constants *const constants) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - WorkT meanVal = VecTraits::all(0.0f); - float totalWeight = 0.0f; + if (x >= modesUsed.cols || y >= modesUsed.rows) + return; - for (int mode = 0; mode < nmodes; ++mode) - { - float weight = gmm_weight(mode * modesUsed.rows + y, x); + int nmodes = modesUsed(y, x); - WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); - meanVal = meanVal + weight * mean; + WorkT meanVal = VecTraits::all(0.0f); + float totalWeight = 0.0f; - totalWeight += weight; + for (int mode = 0; mode < nmodes; ++mode) + { + float weight = gmm_weight(mode * modesUsed.rows + y, x); - if(totalWeight > c_TB) - break; - } + WorkT mean = gmm_mean(mode * modesUsed.rows + y, x); + meanVal = meanVal + weight * mean; - meanVal = meanVal * (1.f / totalWeight); + totalWeight += weight; - dst(y, x) = saturate_cast(meanVal); - } + if (totalWeight > constants->TB_) + break; + } - template - void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); + meanVal = meanVal * (1.f / totalWeight); - cudaSafeCall( cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1) ); + dst(y, x) = saturate_cast(meanVal); +} - getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz) mean, (PtrStepSz) dst); - cudaSafeCall( cudaGetLastError() ); +template +void getBackgroundImage2_caller(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + dim3 block(32, 8); + dim3 grid(divUp(modesUsed.cols, block.x), divUp(modesUsed.rows, block.y)); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + cudaSafeCall(cudaFuncSetCacheConfig(getBackgroundImage2, cudaFuncCachePreferL1)); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); + getBackgroundImage2<<>>(modesUsed, weight, (PtrStepSz)mean, (PtrStepSz)dst, constants); + cudaSafeCall(cudaGetLastError()); - static const func_t funcs[] = - { - 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller - }; + if (stream == 0) + cudaSafeCall(cudaDeviceSynchronize()); +} - funcs[cn](modesUsed, weight, mean, dst, stream); - } - } -}}} +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream) +{ + typedef void (*func_t)(PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + static const func_t funcs[] = + { + 0, getBackgroundImage2_caller, 0, getBackgroundImage2_caller, getBackgroundImage2_caller}; + + funcs[cn](modesUsed, weight, mean, dst, constants, stream); +} +} // namespace mog2 +} // namespace device +} // namespace cuda +} // namespace cv #endif /* CUDA_DISABLER */ diff --git a/modules/cudabgsegm/src/cuda/mog2.hpp b/modules/cudabgsegm/src/cuda/mog2.hpp new file mode 100644 index 0000000..5b21551 --- /dev/null +++ b/modules/cudabgsegm/src/cuda/mog2.hpp @@ -0,0 +1,37 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_CUDA_MOG2_H +#define OPENCV_CUDA_MOG2_H + +#include "opencv2/core/cuda.hpp" + +struct CUstream_st; +typedef struct CUstream_st *cudaStream_t; + +namespace cv { namespace cuda { + +class Stream; + +namespace device { namespace mog2 { + +typedef struct +{ + float Tb_; + float TB_; + float Tg_; + float varInit_; + float varMin_; + float varMax_; + float tau_; + int nmixtures_; + unsigned char shadowVal_; +} Constants; + +void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, const Constants *const constants, cudaStream_t stream); +void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, const Constants *const constants, cudaStream_t stream); + +} } } } + +#endif /* OPENCV_CUDA_MOG2_H */ diff --git a/modules/cudabgsegm/src/mog2.cpp b/modules/cudabgsegm/src/mog2.cpp index e727dcf..47135a0 100644 --- a/modules/cudabgsegm/src/mog2.cpp +++ b/modules/cudabgsegm/src/mog2.cpp @@ -41,209 +41,207 @@ //M*/ #include "precomp.hpp" +#include "cuda/mog2.hpp" using namespace cv; using namespace cv::cuda; +using namespace cv::cuda::device::mog2; #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) { throw_no_cuda(); return Ptr(); } +Ptr cv::cuda::createBackgroundSubtractorMOG2(int, double, bool) +{ + throw_no_cuda(); + return Ptr(); +} #else -namespace cv { namespace cuda { namespace device +namespace { - namespace mog2 - { - void loadConstants(int nmixtures, float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal); - void mog2_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzf variance, PtrStepSzb mean, float alphaT, float prune, bool detectShadows, cudaStream_t stream); - void getBackgroundImage2_gpu(int cn, PtrStepSzb modesUsed, PtrStepSzf weight, PtrStepSzb mean, PtrStepSzb dst, cudaStream_t stream); - } -}}} +// default parameters of gaussian background detection algorithm +const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 +const float defaultVarThreshold = 4.0f * 4.0f; +const int defaultNMixtures = 5; // maximal number of Gaussians in mixture +const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test +const float defaultVarThresholdGen = 3.0f * 3.0f; +const float defaultVarInit = 15.0f; // initial variance for new components +const float defaultVarMax = 5.0f * defaultVarInit; +const float defaultVarMin = 4.0f; + +// additional parameters +const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components +const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection +const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation + +class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 +{ +public: + MOG2Impl(int history, double varThreshold, bool detectShadows); + ~MOG2Impl(); -namespace + void apply(InputArray image, OutputArray fgmask, double learningRate = -1) CV_OVERRIDE; + void apply(InputArray image, OutputArray fgmask, double learningRate, Stream &stream) CV_OVERRIDE; + + void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; + void getBackgroundImage(OutputArray backgroundImage, Stream &stream) const CV_OVERRIDE; + + int getHistory() const CV_OVERRIDE { return history_; } + void setHistory(int history) CV_OVERRIDE { history_ = history; } + + int getNMixtures() const CV_OVERRIDE { return constantsHost_.nmixtures_; } + void setNMixtures(int nmixtures) CV_OVERRIDE { constantsHost_.nmixtures_ = nmixtures; } + + double getBackgroundRatio() const CV_OVERRIDE { return constantsHost_.TB_; } + void setBackgroundRatio(double ratio) CV_OVERRIDE { constantsHost_.TB_ = (float)ratio; } + + double getVarThreshold() const CV_OVERRIDE { return constantsHost_.Tb_; } + void setVarThreshold(double varThreshold) CV_OVERRIDE { constantsHost_.Tb_ = (float)varThreshold; } + + double getVarThresholdGen() const CV_OVERRIDE { return constantsHost_.Tg_; } + void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { constantsHost_.Tg_ = (float)varThresholdGen; } + + double getVarInit() const CV_OVERRIDE { return constantsHost_.varInit_; } + void setVarInit(double varInit) CV_OVERRIDE { constantsHost_.varInit_ = (float)varInit; } + + double getVarMin() const CV_OVERRIDE { return constantsHost_.varMin_; } + void setVarMin(double varMin) CV_OVERRIDE { constantsHost_.varMin_ = ::fminf((float)varMin, constantsHost_.varMax_); } + + double getVarMax() const CV_OVERRIDE { return constantsHost_.varMax_; } + void setVarMax(double varMax) CV_OVERRIDE { constantsHost_.varMax_ = ::fmaxf(constantsHost_.varMin_, (float)varMax); } + + double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } + void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float)ct; } + + bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } + void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } + + int getShadowValue() const CV_OVERRIDE { return constantsHost_.shadowVal_; } + void setShadowValue(int value) CV_OVERRIDE { constantsHost_.shadowVal_ = (uchar)value; } + + double getShadowThreshold() const CV_OVERRIDE { return constantsHost_.tau_; } + void setShadowThreshold(double threshold) CV_OVERRIDE { constantsHost_.tau_ = (float)threshold; } + +private: + void initialize(Size frameSize, int frameType, Stream &stream); + + Constants constantsHost_; + Constants *constantsDevice_; + + int history_; + float ct_; + bool detectShadows_; + + Size frameSize_; + int frameType_; + int nframes_; + + GpuMat weight_; + GpuMat variance_; + GpuMat mean_; + + //keep track of number of modes per pixel + GpuMat bgmodelUsedModes_; +}; + +MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : frameSize_(0, 0), frameType_(0), nframes_(0) +{ + history_ = history > 0 ? history : defaultHistory; + detectShadows_ = detectShadows; + ct_ = defaultCT; + + setNMixtures(defaultNMixtures); + setBackgroundRatio(defaultBackgroundRatio); + setVarInit(defaultVarInit); + setVarMin(defaultVarMin); + setVarMax(defaultVarMax); + setVarThreshold(varThreshold > 0 ? (float)varThreshold : defaultVarThreshold); + setVarThresholdGen(defaultVarThresholdGen); + + setShadowValue(defaultShadowValue); + setShadowThreshold(defaultShadowThreshold); + + cudaSafeCall(cudaMalloc((void **)&constantsDevice_, sizeof(Constants))); +} + +MOG2Impl::~MOG2Impl() +{ + cudaFree(constantsDevice_); +} + +void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) +{ + apply(image, fgmask, learningRate, Stream::Null()); +} + +void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream &stream) { - // default parameters of gaussian background detection algorithm - const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2 - const float defaultVarThreshold = 4.0f * 4.0f; - const int defaultNMixtures = 5; // maximal number of Gaussians in mixture - const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test - const float defaultVarThresholdGen = 3.0f * 3.0f; - const float defaultVarInit = 15.0f; // initial variance for new components - const float defaultVarMax = 5.0f * defaultVarInit; - const float defaultVarMin = 4.0f; - - // additional parameters - const float defaultCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components - const unsigned char defaultShadowValue = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection - const float defaultShadowThreshold = 0.5f; // Tau - shadow threshold, see the paper for explanation - - class MOG2Impl CV_FINAL : public cuda::BackgroundSubtractorMOG2 - { - public: - MOG2Impl(int history, double varThreshold, bool detectShadows); - - void apply(InputArray image, OutputArray fgmask, double learningRate=-1) CV_OVERRIDE; - void apply(InputArray image, OutputArray fgmask, double learningRate, Stream& stream) CV_OVERRIDE; - - void getBackgroundImage(OutputArray backgroundImage) const CV_OVERRIDE; - void getBackgroundImage(OutputArray backgroundImage, Stream& stream) const CV_OVERRIDE; - - int getHistory() const CV_OVERRIDE { return history_; } - void setHistory(int history) CV_OVERRIDE { history_ = history; } - - int getNMixtures() const CV_OVERRIDE { return nmixtures_; } - void setNMixtures(int nmixtures) CV_OVERRIDE { nmixtures_ = nmixtures; } - - double getBackgroundRatio() const CV_OVERRIDE { return backgroundRatio_; } - void setBackgroundRatio(double ratio) CV_OVERRIDE { backgroundRatio_ = (float) ratio; } - - double getVarThreshold() const CV_OVERRIDE { return varThreshold_; } - void setVarThreshold(double varThreshold) CV_OVERRIDE { varThreshold_ = (float) varThreshold; } - - double getVarThresholdGen() const CV_OVERRIDE { return varThresholdGen_; } - void setVarThresholdGen(double varThresholdGen) CV_OVERRIDE { varThresholdGen_ = (float) varThresholdGen; } - - double getVarInit() const CV_OVERRIDE { return varInit_; } - void setVarInit(double varInit) CV_OVERRIDE { varInit_ = (float) varInit; } - - double getVarMin() const CV_OVERRIDE { return varMin_; } - void setVarMin(double varMin) CV_OVERRIDE { varMin_ = (float) varMin; } - - double getVarMax() const CV_OVERRIDE { return varMax_; } - void setVarMax(double varMax) CV_OVERRIDE { varMax_ = (float) varMax; } - - double getComplexityReductionThreshold() const CV_OVERRIDE { return ct_; } - void setComplexityReductionThreshold(double ct) CV_OVERRIDE { ct_ = (float) ct; } - - bool getDetectShadows() const CV_OVERRIDE { return detectShadows_; } - void setDetectShadows(bool detectShadows) CV_OVERRIDE { detectShadows_ = detectShadows; } - - int getShadowValue() const CV_OVERRIDE { return shadowValue_; } - void setShadowValue(int value) CV_OVERRIDE { shadowValue_ = (uchar) value; } + using namespace cv::cuda::device::mog2; + + GpuMat frame = _frame.getGpuMat(); - double getShadowThreshold() const CV_OVERRIDE { return shadowThreshold_; } - void setShadowThreshold(double threshold) CV_OVERRIDE { shadowThreshold_ = (float) threshold; } - - private: - void initialize(Size frameSize, int frameType); - - int history_; - int nmixtures_; - float backgroundRatio_; - float varThreshold_; - float varThresholdGen_; - float varInit_; - float varMin_; - float varMax_; - float ct_; - bool detectShadows_; - uchar shadowValue_; - float shadowThreshold_; - - Size frameSize_; - int frameType_; - int nframes_; - - GpuMat weight_; - GpuMat variance_; - GpuMat mean_; - - //keep track of number of modes per pixel - GpuMat bgmodelUsedModes_; - }; - - MOG2Impl::MOG2Impl(int history, double varThreshold, bool detectShadows) : - frameSize_(0, 0), frameType_(0), nframes_(0) - { - history_ = history > 0 ? history : defaultHistory; - varThreshold_ = varThreshold > 0 ? (float) varThreshold : defaultVarThreshold; - detectShadows_ = detectShadows; - - nmixtures_ = defaultNMixtures; - backgroundRatio_ = defaultBackgroundRatio; - varInit_ = defaultVarInit; - varMax_ = defaultVarMax; - varMin_ = defaultVarMin; - varThresholdGen_ = defaultVarThresholdGen; - ct_ = defaultCT; - shadowValue_ = defaultShadowValue; - shadowThreshold_ = defaultShadowThreshold; - } + int ch = frame.channels(); + int work_ch = ch; - void MOG2Impl::apply(InputArray image, OutputArray fgmask, double learningRate) - { - apply(image, fgmask, learningRate, Stream::Null()); - } + if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) + initialize(frame.size(), frame.type(), stream); - void MOG2Impl::apply(InputArray _frame, OutputArray _fgmask, double learningRate, Stream& stream) - { - using namespace cv::cuda::device::mog2; + _fgmask.create(frameSize_, CV_8UC1); + GpuMat fgmask = _fgmask.getGpuMat(); - GpuMat frame = _frame.getGpuMat(); + fgmask.setTo(Scalar::all(0), stream); - int ch = frame.channels(); - int work_ch = ch; + ++nframes_; + learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); + CV_Assert(learningRate >= 0); - if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.channels()) - initialize(frame.size(), frame.type()); + mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, + (float)learningRate, static_cast(-learningRate * ct_), detectShadows_, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const +{ + getBackgroundImage(backgroundImage, Stream::Null()); +} + +void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream &stream) const +{ + using namespace cv::cuda::device::mog2; + + _backgroundImage.create(frameSize_, frameType_); + GpuMat backgroundImage = _backgroundImage.getGpuMat(); + + getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, constantsDevice_, StreamAccessor::getStream(stream)); +} + +void MOG2Impl::initialize(cv::Size frameSize, int frameType, Stream &stream) +{ + using namespace cv::cuda::device::mog2; - _fgmask.create(frameSize_, CV_8UC1); - GpuMat fgmask = _fgmask.getGpuMat(); + CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4); - fgmask.setTo(Scalar::all(0), stream); + frameSize_ = frameSize; + frameType_ = frameType; + nframes_ = 0; - ++nframes_; - learningRate = learningRate >= 0 && nframes_ > 1 ? learningRate : 1.0 / std::min(2 * nframes_, history_); - CV_Assert( learningRate >= 0 ); + const int ch = CV_MAT_CN(frameType); + const int work_ch = ch; - mog2_gpu(frame, frame.channels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, - (float) learningRate, static_cast(-learningRate * ct_), detectShadows_, StreamAccessor::getStream(stream)); - } + // for each gaussian mixture of each pixel bg model we store ... + // the mixture weight (w), + // the mean (nchannels values) and + // the covariance + weight_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + variance_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC1); + mean_.create(frameSize.height * getNMixtures(), frameSize_.width, CV_32FC(work_ch)); - void MOG2Impl::getBackgroundImage(OutputArray backgroundImage) const - { - getBackgroundImage(backgroundImage, Stream::Null()); - } - - void MOG2Impl::getBackgroundImage(OutputArray _backgroundImage, Stream& stream) const - { - using namespace cv::cuda::device::mog2; - - _backgroundImage.create(frameSize_, frameType_); - GpuMat backgroundImage = _backgroundImage.getGpuMat(); - - getBackgroundImage2_gpu(backgroundImage.channels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, StreamAccessor::getStream(stream)); - } - - void MOG2Impl::initialize(cv::Size frameSize, int frameType) - { - using namespace cv::cuda::device::mog2; - - CV_Assert( frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4 ); - - frameSize_ = frameSize; - frameType_ = frameType; - nframes_ = 0; - - int ch = CV_MAT_CN(frameType); - int work_ch = ch; - - // for each gaussian mixture of each pixel bg model we store ... - // the mixture weight (w), - // the mean (nchannels values) and - // the covariance - weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1); - mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); - - //make the array for keeping track of the used modes per pixel - all zeros at start - bgmodelUsedModes_.create(frameSize_, CV_8UC1); - bgmodelUsedModes_.setTo(Scalar::all(0)); + //make the array for keeping track of the used modes per pixel - all zeros at start + bgmodelUsedModes_.create(frameSize_, CV_8UC1); + bgmodelUsedModes_.setTo(Scalar::all(0)); - loadConstants(nmixtures_, varThreshold_, backgroundRatio_, varThresholdGen_, varInit_, varMin_, varMax_, shadowThreshold_, shadowValue_); - } + cudaSafeCall(cudaMemcpyAsync(constantsDevice_, &constantsHost_, sizeof(Constants), cudaMemcpyHostToDevice, StreamAccessor::getStream(stream))); } +} // namespace Ptr cv::cuda::createBackgroundSubtractorMOG2(int history, double varThreshold, bool detectShadows) { -- 2.7.4 From 4733a19babec760ba237b8c277bb1de664a641c1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 13:20:42 +0300 Subject: [PATCH 08/16] Merge pull request #16194 from alalek:fix_16192 * imgproc(test): resize(LANCZOS4) reproducer 16192 * imgproc: fix resize LANCZOS4 coefficients generation --- modules/imgproc/src/resize.cpp | 23 +++++++++++++---------- modules/imgproc/test/test_imgwarp.cpp | 13 +++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/modules/imgproc/src/resize.cpp b/modules/imgproc/src/resize.cpp index cc967cf..02f7881 100644 --- a/modules/imgproc/src/resize.cpp +++ b/modules/imgproc/src/resize.cpp @@ -920,20 +920,23 @@ static inline void interpolateLanczos4( float x, float* coeffs ) static const double cs[][2]= {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}}; - if( x < FLT_EPSILON ) - { - for( int i = 0; i < 8; i++ ) - coeffs[i] = 0; - coeffs[3] = 1; - return; - } - float sum = 0; double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(y0), c0= std::cos(y0); for(int i = 0; i < 8; i++ ) { - double y = -(x+3-i)*CV_PI*0.25; - coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y)); + float y0_ = (x+3-i); + if (fabs(y0_) >= 1e-6f) + { + double y = -y0_*CV_PI*0.25; + coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y)); + } + else + { + // special handling for 'x' values: + // - ~0.0: 0 0 0 1 0 0 0 0 + // - ~1.0: 0 0 0 0 1 0 0 0 + coeffs[i] = 1e30f; + } sum += coeffs[i]; } diff --git a/modules/imgproc/test/test_imgwarp.cpp b/modules/imgproc/test/test_imgwarp.cpp index 400426a..232f374 100644 --- a/modules/imgproc/test/test_imgwarp.cpp +++ b/modules/imgproc/test/test_imgwarp.cpp @@ -1708,6 +1708,19 @@ TEST(Resize, Area_half) } } +TEST(Resize, lanczos4_regression_16192) +{ + Size src_size(11, 17); + Size dst_size(11, 153); + Mat src(src_size, CV_8UC3, Scalar::all(128)); + Mat dst(dst_size, CV_8UC3, Scalar::all(255)); + + cv::resize(src, dst, dst_size, 0, 0, INTER_LANCZOS4); + + Mat expected(dst_size, CV_8UC3, Scalar::all(128)); + EXPECT_EQ(cvtest::norm(dst, expected, NORM_INF), 0) << dst(Rect(0,0,8,8)); +} + TEST(Imgproc_Warp, multichannel) { static const int inter_types[] = {INTER_NEAREST, INTER_AREA, INTER_CUBIC, -- 2.7.4 From b379969c6341cb643bd989643ceffa9dccbe8abe Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 19 Dec 2019 14:35:56 +0300 Subject: [PATCH 09/16] Test: avoid duplicated test cases --- modules/calib3d/test/test_undistort.cpp | 2 +- modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/calib3d/test/test_undistort.cpp b/modules/calib3d/test/test_undistort.cpp index eec91b0..c1ec206 100644 --- a/modules/calib3d/test/test_undistort.cpp +++ b/modules/calib3d/test/test_undistort.cpp @@ -1290,7 +1290,7 @@ void CV_UndistortMapTest::prepare_to_validation( int ) } } -TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); } +TEST(Calib3d_UndistortImgproc, accuracy) { CV_UndistortTest test; test.safe_run(); } TEST(Calib3d_InitUndistortMap, accuracy) { CV_UndistortMapTest test; test.safe_run(); } TEST(Calib3d_UndistortPoints, inputShape) diff --git a/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp b/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp index 6310fb6..7f99e74 100644 --- a/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp +++ b/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp @@ -31,7 +31,7 @@ INSTANTIATE_TEST_CASE_P(Filter2DTestGPU, Filter2DTest, cv::Size(7, 7)), Values(cv::BORDER_DEFAULT))); -INSTANTIATE_TEST_CASE_P(BoxFilterTestCPU, BoxFilterTest, +INSTANTIATE_TEST_CASE_P(BoxFilterTestGPU, BoxFilterTest, Combine(Values(/*CV_8UC1,*/ CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1), Values(cv::Size(1280, 720), cv::Size(640, 480)), -- 2.7.4 From ed788229ed746ff1a399073ed0ba19579c823257 Mon Sep 17 00:00:00 2001 From: Sajarin Date: Thu, 19 Dec 2019 10:15:59 -0500 Subject: [PATCH 10/16] Merge pull request #16165 from sajarindider:macOS_install * doc: added macOS installation guide * doc: added clarification and corrections * docs: introduction entry, lowercase file names and ids --- .../macos_install/macos_install.markdown | 116 +++++++++++++++++++++ .../table_of_content_introduction.markdown | 8 ++ 2 files changed, 124 insertions(+) create mode 100644 doc/tutorials/introduction/macos_install/macos_install.markdown diff --git a/doc/tutorials/introduction/macos_install/macos_install.markdown b/doc/tutorials/introduction/macos_install/macos_install.markdown new file mode 100644 index 0000000..016e32a --- /dev/null +++ b/doc/tutorials/introduction/macos_install/macos_install.markdown @@ -0,0 +1,116 @@ +Installation in MacOS {#tutorial_macos_install} +===================== + +The following steps have been tested for MacOSX (Mavericks) but should work with other versions as well. + +Required Packages +----------------- + +- CMake 3.9 or higher +- Git +- Python 2.7 or later and Numpy 1.5 or later + +This tutorial will assume you have [Python](https://docs.python.org/3/using/mac.html), +[Numpy](https://docs.scipy.org/doc/numpy-1.10.1/user/install.html) and +[Git](https://www.atlassian.com/git/tutorials/install-git) installed on your machine. + +@note +OSX comes with Python 2.7 by default, you will need to install Python 3.8 if you want to use it specifically. + +@note +If you XCode and XCode Command Line-Tools installed, you already have git installed on your machine. + +Installing CMake +---------------- +-# Find the version for your system and download CMake from their release's [page](https://cmake.org/download/) + +-# Install the dmg package and launch it from Applications. That will give you the UI app of CMake + +-# From the CMake app window, choose menu Tools --> Install For Command Line Use. + +-# Install folder will be /usr/bin/ by default, submit it by choosing Install command line links. + +-# Test that it works by running + @code{.bash} + cmake --version + @endcode + +Getting OpenCV Source Code +-------------------------- + +You can use the latest stable OpenCV version or you can grab the latest snapshot from our +[Git repository](https://github.com/opencv/opencv.git). + +### Getting the Latest Stable OpenCV Version + +- Go to our [downloads page](http://opencv.org/releases.html). +- Download the source archive and unpack it. + +### Getting the Cutting-edge OpenCV from the Git Repository + +Launch Git client and clone [OpenCV repository](http://github.com/opencv/opencv). +If you need modules from [OpenCV contrib repository](http://github.com/opencv/opencv_contrib) then clone it as well. + +For example +@code{.bash} +cd ~/ +git clone https://github.com/opencv/opencv.git +git clone https://github.com/opencv/opencv_contrib.git +@endcode +Building OpenCV from Source Using CMake +--------------------------------------- + +-# Create a temporary directory, which we denote as ``, where you want to put + the generated Makefiles, project files as well the object files and output binaries and enter + there. + + For example + @code{.bash} + mkdir build_opencv + cd build_opencv + @endcode + + @note It is good practice to keep clean your source code directories. Create build directory outside of source tree. + +-# Configuring. Run `cmake [] ` + + For example + @code{.bash} + cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_EXAMPLES=ON ../opencv + @endcode + + or cmake-gui + + - set full path to OpenCV source code, e.g. `/home/user/opencv` + - set full path to ``, e.g. `/home/user/build_opencv` + - set optional parameters + - run: "Configure" + - run: "Generate" + +-# Description of some parameters + - build type: `CMAKE_BUILD_TYPE=Release` (or `Debug`) + - to build with modules from opencv_contrib set `OPENCV_EXTRA_MODULES_PATH` to `/modules` + - set `BUILD_DOCS=ON` for building documents (doxygen is required) + - set `BUILD_EXAMPLES=ON` to build all examples + +-# [optional] Building python. Set the following python parameters: + - `PYTHON3_EXECUTABLE = ` + - `PYTHON3_INCLUDE_DIR = /usr/include/python` + - `PYTHON3_NUMPY_INCLUDE_DIRS = + /usr/lib/python/dist-packages/numpy/core/include/` + @note + To specify Python2 versions, you can replace `PYTHON3_` with `PYTHON2_` in the above parameters. + +-# Build. From build directory execute *make*, it is recommended to do this in several threads + + For example + @code{.bash} + make -j7 # runs 7 jobs in parallel + @endcode + +-# To use OpenCV in your CMake-based projects through `find_package(OpenCV)` specify `OpenCV_DIR=` variable. + +@note +You can also use a package manager like [Homebrew](https://brew.sh/) +or [pip](https://pip.pypa.io/en/stable/) to install releases of OpenCV only (Not the cutting edge). diff --git a/doc/tutorials/introduction/table_of_content_introduction.markdown b/doc/tutorials/introduction/table_of_content_introduction.markdown index 2f239f5..b6dc8bd 100644 --- a/doc/tutorials/introduction/table_of_content_introduction.markdown +++ b/doc/tutorials/introduction/table_of_content_introduction.markdown @@ -110,6 +110,14 @@ Additionally you can find very basic sample source code to introduce you to the Modify Android camera preview with OpenCL +- @subpage tutorial_macos_install + + _Compatibility:_ \> OpenCV 3.4.x + + _Author:_ [\@sajarindider](https://github.com/sajarindider) + + We will learn how to setup OpenCV in MacOS. + - @subpage tutorial_ios_install _Compatibility:_ \> OpenCV 2.4.2 -- 2.7.4 From 64e6cf9fe50ef25d317d3529a42d4862b93f002a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 18:16:47 +0300 Subject: [PATCH 11/16] release: OpenCV 3.4.9 --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 2a41194..233aed3 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -8,7 +8,7 @@ #define CV_VERSION_MAJOR 3 #define CV_VERSION_MINOR 4 #define CV_VERSION_REVISION 9 -#define CV_VERSION_STATUS "-pre" +#define CV_VERSION_STATUS "" #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) -- 2.7.4 From 97b6068c46b1506f7e7029b81a1973ba2b4443e8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Dec 2019 19:31:59 +0000 Subject: [PATCH 12/16] dnn(test): don't require downloaded data --- modules/dnn/test/test_model.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 5704c4b..7a4de4e 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -253,7 +253,7 @@ TEST_P(Test_Model, Keypoints_pose) #endif Mat inp = imread(_tf("pose.png")); - std::string weights = _tf("onnx/models/lightweight_pose_estimation.onnx"); + std::string weights = _tf("onnx/models/lightweight_pose_estimation.onnx", false); Mat exp = blobFromNPY(_tf("keypoints_exp.npy")); @@ -274,7 +274,7 @@ TEST_P(Test_Model, Keypoints_face) #endif Mat inp = imread(_tf("gray_face.png"), 0); - std::string weights = _tf("onnx/models/facial_keypoints.onnx"); + std::string weights = _tf("onnx/models/facial_keypoints.onnx", false); Mat exp = blobFromNPY(_tf("facial_keypoints_exp.npy")); Size size{224, 224}; -- 2.7.4 From 1fac1421e5523b2f223d69e28f0e3e5055a64c35 Mon Sep 17 00:00:00 2001 From: Yashas Samaga B L Date: Fri, 20 Dec 2019 19:06:32 +0530 Subject: [PATCH 13/16] Merge pull request #16010 from YashasSamaga:cuda4dnn-fp16-tests * enable tests for DNN_TARGET_CUDA_FP16 * disable deconvolution tests * disable shortcut tests * fix typos and some minor changes * dnn(test): skip CUDA FP16 test too (run_pool_max) --- modules/dnn/test/test_backends.cpp | 126 +++++++++++++++++++++++------ modules/dnn/test/test_caffe_importer.cpp | 63 ++++++++++++--- modules/dnn/test/test_common.impl.hpp | 5 +- modules/dnn/test/test_darknet_importer.cpp | 42 ++++++++-- modules/dnn/test/test_halide_layers.cpp | 27 +++++-- modules/dnn/test/test_layers.cpp | 43 +++++++++- modules/dnn/test/test_model.cpp | 55 ++++++++++--- modules/dnn/test/test_onnx_importer.cpp | 35 ++++++-- modules/dnn/test/test_tf_importer.cpp | 94 +++++++++++++++++---- modules/dnn/test/test_torch_importer.cpp | 78 +++++++++++++++--- 10 files changed, 461 insertions(+), 107 deletions(-) diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 2e8c9ec..a5297c7 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -168,6 +168,8 @@ TEST_P(DNNTestNetwork, ENet) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); processNet("dnn/Enet-model-best.net", "", Size(512, 512), "l367_Deconvolution", target == DNN_TARGET_OPENCL ? "dnn/halide_scheduler_opencl_enet.yml" : "dnn/halide_scheduler_enet.yml", @@ -182,11 +184,11 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); - float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0; - float diffSquares = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; + float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0; + float iouDiff = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : FLT_MIN; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", - inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh); + inp, "detection_out", "", scoreDiff, iouDiff, detectionConfThresh); expectNoFallbacksFromIE(net); } @@ -201,10 +203,19 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe_Different_Width_Height) #endif Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 560), Scalar(127.5, 127.5, 127.5), false); - float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.029 : 0.0; - float diffSquares = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : 0.0; + float scoreDiff = 0.0, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.029; + iouDiff = 0.09; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.03; + iouDiff = 0.08; + } processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", - inp, "detection_out", "", diffScores, diffSquares); + inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -216,11 +227,20 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow) Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.095 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : 0.0; float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.216 : 0.2; + float scoreDiff = 0.0, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.095; + iouDiff = 0.09; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.007; + iouDiff = 0.08; + } processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", - inp, "detection_out", "", l1, lInf, detectionConfThresh); + inp, "detection_out", "", scoreDiff, iouDiff, detectionConfThresh); expectNoFallbacksFromIE(net); } @@ -240,10 +260,19 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v1_TensorFlow_Different_Width_Height) Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 560), Scalar(), false); - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0; + float scoreDiff = 0.0, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.012; + iouDiff = 0.06; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.007; + iouDiff = 0.06; + } processNet("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", "dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", - inp, "detection_out", "", l1, lInf); + inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -255,10 +284,19 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow) Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.013 : 2e-5; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0; + float scoreDiff = 2e-5, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.013; + iouDiff = 0.062; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.02; + iouDiff = 0.07; + } processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", - inp, "detection_out", "", l1, lInf, 0.25); + inp, "detection_out", "", scoreDiff, iouDiff, 0.25); expectNoFallbacksFromIE(net); } @@ -268,12 +306,25 @@ TEST_P(DNNTestNetwork, SSD_VGG16) CV_TEST_TAG_DEBUG_VERYLONG); if (backend == DNN_BACKEND_HALIDE && target == DNN_TARGET_CPU) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); // TODO HALIDE_CPU - double scoreThreshold = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0325 : 0.0; - const float lInf = (target == DNN_TARGET_MYRIAD) ? 0.032 : 0.0; Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); + float scoreDiff = 0.0, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16) + { + scoreDiff = 0.0325; + } + else if (target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.0325; + iouDiff = 0.032; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.03; + } + processNet("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", - "dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreThreshold, lInf); + "dnn/ssd_vgg16.prototxt", inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -384,10 +435,19 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) applyTestTag(CV_TEST_TAG_DNN_SKIP_HALIDE); Mat sample = imread(findDataFile("dnn/street.png")); Mat inp = blobFromImage(sample, 1.0f, Size(300, 300), Scalar(), false); - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.015 : 0.0; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0; + float scoreDiff = 0.0, iouDiff = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.015; + iouDiff = 0.0731; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.015; + iouDiff = 0.08; + } processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", - inp, "detection_out", "", l1, lInf); + inp, "detection_out", "", scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -400,11 +460,18 @@ TEST_P(DNNTestNetwork, DenseNet_121) float l1 = 0.0, lInf = 0.0; if (target == DNN_TARGET_OPENCL_FP16) { - l1 = 2e-2; lInf = 9e-2; + l1 = 2e-2; + lInf = 9e-2; } else if (target == DNN_TARGET_MYRIAD) { - l1 = 0.1; lInf = 0.6; + l1 = 0.1; + lInf = 0.6; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.008; + lInf = 0.05; } processNet("dnn/DenseNet_121.caffemodel", "dnn/DenseNet_121.prototxt", Size(224, 224), "", "", l1, lInf); if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) @@ -431,8 +498,17 @@ TEST_P(DNNTestNetwork, FastNeuralStyle_eccv16) Mat img = imread(findDataFile("dnn/googlenet_1.png")); Mat inp = blobFromImage(img, 1.0, Size(320, 240), Scalar(103.939, 116.779, 123.68), false, false); // Output image has values in range [-143.526, 148.539]. - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.4 : 4e-5; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 7.45 : 2e-3; + float l1 = 4e-5, lInf = 2e-3; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 0.4; + lInf = 7.45; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.3; + lInf = 7.2; + } processNet("dnn/fast_neural_style_eccv16_starry_night.t7", "", inp, "", "", l1, lInf); #if defined(HAVE_INF_ENGINE) && INF_ENGINE_VER_MAJOR_GE(2019010000) expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 0607c7d..d0996db 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -150,8 +150,17 @@ TEST_P(Test_Caffe_nets, Axpy) } } } - float l1 = (target == DNN_TARGET_OPENCL_FP16) ? 2e-4 : 1e-5; - float lInf = (target == DNN_TARGET_OPENCL_FP16) ? 1e-3 : 1e-4; + float l1 = 1e-5, lInf = 1e-4; + if (target == DNN_TARGET_OPENCL_FP16) + { + l1 = 2e-4; + lInf = 1e-3; + } + else if(target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.0002; + lInf = 0.0007; + } normAssert(ref, out, "", l1, lInf); } @@ -287,8 +296,17 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy) ASSERT_EQ(out.size[2], 100); - const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5; - const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4; + float scores_diff = 1e-5, boxes_iou_diff = 1e-4; + if (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) + { + scores_diff = 1.5e-2; + boxes_iou_diff = 6.3e-2; + } + else if (targetId == DNN_TARGET_CUDA_FP16) + { + scores_diff = 0.015; + boxes_iou_diff = 0.07; + } Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); normAssertDetections(ref, out, "", FLT_MIN, scores_diff, boxes_iou_diff); @@ -477,11 +495,21 @@ TEST_P(Test_Caffe_nets, Colorization) Mat out = net.forward(); // Reference output values are in range [-29.1, 69.5] - double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.25 : 4e-4; - double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5.3 : 3e-3; - if (target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + double l1 = 4e-4, lInf = 3e-3; + if (target == DNN_TARGET_OPENCL_FP16) { - l1 = 0.5; lInf = 11; + l1 = 0.25; + lInf = 5.3; + } + else if (target == DNN_TARGET_MYRIAD) + { + l1 = (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) ? 0.5 : 0.25; + lInf = (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) ? 11 : 5.3; + } + else if(target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.21; + lInf = 4.5; } normAssert(out, ref, "", l1, lInf); expectNoFallbacksFromIE(net); @@ -518,6 +546,10 @@ TEST_P(Test_Caffe_nets, DenseNet_121) { l1 = 0.11; lInf = 0.5; } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.04; lInf = 0.2; + } normAssert(outs[0], ref, "", l1, lInf); if (target != DNN_TARGET_MYRIAD || getInferenceEngineVPUType() != CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) expectNoFallbacksFromIE(model); @@ -663,6 +695,8 @@ TEST_P(Test_Caffe_nets, FasterRCNN_zf) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + if (target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); static Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); @@ -680,8 +714,17 @@ TEST_P(Test_Caffe_nets, RFCN) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); - double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 4e-3 : default_l1; - double iouDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? 8e-2 : default_lInf; + float scoreDiff = default_l1, iouDiff = default_lInf; + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + { + scoreDiff = 4e-3; + iouDiff = 8e-2; + } + if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.0034; + iouDiff = 0.11; + } static Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref, scoreDiff, iouDiff); diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index 8721b64..16114d5 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -239,9 +239,8 @@ testing::internal::ParamGenerator< tuple > dnnBackendsAndTarget #ifdef HAVE_CUDA if(withCUDA) { - //for (auto target : getAvailableTargets(DNN_BACKEND_CUDA)) - // targets.push_back(make_tuple(DNN_BACKEND_CUDA, target)); - targets.push_back(make_tuple(DNN_BACKEND_CUDA, DNN_TARGET_CUDA)); + for (auto target : getAvailableTargets(DNN_BACKEND_CUDA)) + targets.push_back(make_tuple(DNN_BACKEND_CUDA, target)); } #endif diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp index eced695..2a60659 100644 --- a/modules/dnn/test/test_darknet_importer.cpp +++ b/modules/dnn/test/test_darknet_importer.cpp @@ -320,9 +320,18 @@ TEST_P(Test_Darknet_nets, YoloVoc) 1, 6, 0.667770f, 0.446555f, 0.453578f, 0.499986f, 0.519167f, // a car 1, 6, 0.844947f, 0.637058f, 0.460398f, 0.828508f, 0.66427f); // a car - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4; double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4; + double scoreDiff = 8e-5, iouDiff = 3e-4; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 1e-2; + iouDiff = 0.018; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.03; + iouDiff = 0.018; + } std::string config_file = "yolo-voc.cfg"; std::string weights_file = "yolo-voc.weights"; @@ -353,8 +362,17 @@ TEST_P(Test_Darknet_nets, TinyYoloVoc) 1, 6, 0.651450f, 0.460526f, 0.458019f, 0.522527f, 0.5341f, // a car 1, 6, 0.928758f, 0.651024f, 0.463539f, 0.823784f, 0.654998f); // a car - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 8e-3 : 8e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4; + double scoreDiff = 8e-5, iouDiff = 3e-4; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 8e-3; + iouDiff = 0.018; + } + else if(target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.008; + iouDiff = 0.02; + } std::string config_file = "tiny-yolo-voc.cfg"; std::string weights_file = "tiny-yolo-voc.weights"; @@ -453,9 +471,17 @@ TEST_P(Test_Darknet_nets, YOLOv3) 1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496305f, 0.522258f, // a car 1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821038f, 0.663947f); // a car - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.006 : 8e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.018 : 3e-4; - + double scoreDiff = 8e-5, iouDiff = 3e-4; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.006; + iouDiff = 0.018; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.04; + iouDiff = 0.03; + } std::string config_file = "yolov3.cfg"; std::string weights_file = "yolov3.weights"; @@ -501,6 +527,8 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets()); TEST_P(Test_Darknet_layers, shortcut) { + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); testDarknetLayer("shortcut"); testDarknetLayer("shortcut_leaky"); testDarknetLayer("shortcut_unequal"); diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp index 11668b4..a68dd19 100644 --- a/modules/dnn/test/test_halide_layers.cpp +++ b/modules/dnn/test/test_halide_layers.cpp @@ -16,7 +16,7 @@ using namespace cv; using namespace cv::dnn; using namespace testing; -static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool skipCheck = false, bool randInput = true) +static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool skipCheck = false, bool randInput = true, double l1 = 0.0, double lInf = 0.0) { DNNTestLayer::checkBackend(backendId, targetId); if (randInput) @@ -33,8 +33,12 @@ static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool if (skipCheck) return; - double l1, lInf; - DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf); + double default_l1, default_lInf; + DNNTestLayer::getDefaultThresholds(backendId, targetId, &default_l1, &default_lInf); + if (l1 == 0.0) + l1 = default_l1; + if (lInf == 0.0) + lInf = default_lInf; #if 0 std::cout << "l1=" << l1 << " lInf=" << lInf << std::endl; std::cout << outputDefault.reshape(1, outputDefault.total()).t() << std::endl; @@ -43,11 +47,11 @@ static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool normAssert(outputDefault, outputHalide, "", l1, lInf); } -static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId, bool skipCheck = false) +static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId, bool skipCheck = false, double l1 = 0.0, double lInf = 0.0) { Net net; net.addLayerToPrev(params.name, params.type, params); - test(input, net, backendId, targetId, skipCheck); + test(input, net, backendId, targetId, skipCheck, true, l1, lInf); } static inline testing::internal::ParamGenerator > dnnBackendsAndTargetsWithHalide() @@ -174,6 +178,9 @@ TEST_P(Deconvolution, Accuracy) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); #endif + if (targetId == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + int sz[] = {inChannels, outChannels / group, kernel.height, kernel.width}; Mat weights(4, &sz[0], CV_32F); randu(weights, -1.0f, 1.0f); @@ -414,7 +421,11 @@ TEST_P(FullyConnected, Accuracy) int sz[] = {1, inChannels, inSize.height, inSize.width}; Mat input(4, &sz[0], CV_32F); - test(lp, input, backendId, targetId); + + double l1 = 0.0; + if (targetId == DNN_TARGET_CUDA_FP16) + l1 = 0.015; + test(lp, input, backendId, targetId, false, true, l1); } INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine( @@ -497,7 +508,7 @@ TEST_P(Test_Halide_layers, MaxPoolUnpool) //////////////////////////////////////////////////////////////////////////////// static const int kNumChannels = 3; -void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId) +void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId, double l1 = 0.0, double lInf = 0.0) { EXPECT_FALSE(lp.name.empty()); @@ -517,7 +528,7 @@ void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId) int sz[] = {1, kNumChannels, 10, 10}; Mat input(4, &sz[0], CV_32F); - test(input, net, backendId, targetId); + test(input, net, backendId, targetId, false, true, l1, lInf); } typedef TestWithParam > > BatchNorm; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index e4ac578..b3fa22f 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -141,6 +141,8 @@ TEST_P(Test_Caffe_layers, Convolution) TEST_P(Test_Caffe_layers, DeConvolution) { + if(target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); testLayerUsingCaffeModels("layer_deconvolution", true, false); } @@ -372,7 +374,13 @@ TEST_P(Test_Caffe_layers, Conv_Elu) net.setPreferableTarget(target); Mat out = net.forward(); - normAssert(ref, out, "", default_l1, default_lInf); + double l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.0002; + lInf = 0.0005; + } + normAssert(ref, out, "", l1, lInf); } class Layer_LSTM_Test : public ::testing::Test @@ -843,6 +851,11 @@ TEST_P(Test_Caffe_layers, PriorBox_repeated) double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-5; double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-3 : 1e-4; + if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 7e-5; + lInf = 0.0005; + } normAssert(out, ref, "", l1, lInf); } @@ -876,7 +889,9 @@ TEST_P(Test_Caffe_layers, PriorBox_squares) 0.25, 0.0, 1.0, 1.0, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f); - double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-5 : 1e-5; + double l1 = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + l1 = 2e-5; normAssert(out.reshape(1, 4), ref, "", l1); } @@ -1225,6 +1240,11 @@ TEST_P(Test_DLDT_two_inputs, as_backend) // Output values are in range [0, 637.5]. double l1 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.06 : 1e-6; double lInf = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 0.3 : 1e-5; + if (targetId == DNN_TARGET_CUDA_FP16) + { + l1 = 0.06; + lInf = 0.3; + } normAssert(out, ref, "", l1, lInf); } @@ -1537,8 +1557,17 @@ TEST_P(Layer_Test_ShuffleChannel, Accuracy) net.setPreferableTarget(targetId); Mat out = net.forward(); - double l1 = (targetId == DNN_TARGET_OPENCL_FP16) ? 5e-2 : 1e-5; - double lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 7e-2 : 1e-4; + double l1 = 1e-5, lInf = 1e-4; + if (targetId == DNN_TARGET_OPENCL_FP16) + { + l1 = 5e-2; + lInf = 7e-2; + } + else if (targetId == DNN_TARGET_CUDA_FP16) + { + l1 = 0.06; + lInf = 0.07; + } for (int n = 0; n < inpShapeVec[0]; ++n) { for (int c = 0; c < inpShapeVec[1]; ++c) @@ -1593,6 +1622,9 @@ TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0_truncate) int backendId = get<0>(get<1>(GetParam())); int targetId = get<1>(get<1>(GetParam())); + if (backendId == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); + Net net; LayerParams lp; lp.type = "Eltwise"; @@ -1656,6 +1688,9 @@ TEST_P(Layer_Test_Eltwise_unequal, accuracy_input_0) int backendId = get<0>(get<1>(GetParam())); int targetId = get<1>(get<1>(GetParam())); + if (backendId == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); + Net net; LayerParams lp; lp.type = "Eltwise"; diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index 7a4de4e..bbe4ce4 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -157,9 +157,13 @@ TEST_P(Test_Model, DetectRegion) bool swapRB = true; double confThreshold = 0.24; - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1e-2 : 8e-5; - double iouDiff = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 1.6e-2 : 1e-5; double nmsThreshold = (target == DNN_TARGET_MYRIAD) ? 0.397 : 0.4; + double scoreDiff = 8e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 1e-2; + iouDiff = 1.6e-2; + } testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, @@ -188,11 +192,15 @@ TEST_P(Test_Model, DetectionOutput) Scalar mean = Scalar(102.9801, 115.9465, 122.7717); Size size{800, 600}; - double scoreDiff = (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) ? - 4e-3 : default_l1; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16) ? 1.8e-1 : 1e-5; + double scoreDiff = default_l1, iouDiff = 1e-5; float confThreshold = 0.8; double nmsThreshold = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_CUDA_FP16) + { + if (backend == DNN_BACKEND_OPENCV) + scoreDiff = 4e-3; + iouDiff = 1.8e-1; + } testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean); @@ -232,10 +240,22 @@ TEST_P(Test_Model, DetectionMobilenetSSD) double scale = 1.0 / 127.5; Size size{300, 300}; - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.7e-2 : 1e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || (target == DNN_TARGET_MYRIAD && - getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)) ? 6.91e-2 : 1e-5; - + double scoreDiff = 1e-5, iouDiff = 1e-5; + if (target == DNN_TARGET_OPENCL_FP16) + { + scoreDiff = 1.7e-2; + iouDiff = 6.91e-2; + } + else if (target == DNN_TARGET_MYRIAD) + { + scoreDiff = 1.7e-2; + if (getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + iouDiff = 6.91e-2; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 4e-4; + } float confThreshold = FLT_MIN; double nmsThreshold = 0.0; @@ -263,6 +283,10 @@ TEST_P(Test_Model, Keypoints_pose) Scalar mean = Scalar(128, 128, 128); bool swapRB = false; + // Ref. Range: [58.6875, 508.625] + if (target == DNN_TARGET_CUDA_FP16) + norm = 20; // l1 = 1.5, lInf = 20 + testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB); } @@ -283,8 +307,11 @@ TEST_P(Test_Model, Keypoints_face) Scalar mean = Scalar(); bool swapRB = false; - testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB); + // Ref. Range: [-1.1784188, 1.7758257] + if (target == DNN_TARGET_CUDA_FP16) + norm = 0.004; // l1 = 0.0006, lInf = 0.004 + testKeypointsModel(weights, "", inp, exp, norm, size, mean, scale, swapRB); } TEST_P(Test_Model, Detection_normalized) @@ -301,10 +328,14 @@ TEST_P(Test_Model, Detection_normalized) double scale = 1.0 / 127.5; Size size{300, 300}; - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : 1e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : 1e-5; + double scoreDiff = 1e-5, iouDiff = 1e-5; float confThreshold = FLT_MIN; double nmsThreshold = 0.0; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD || target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 5e-3; + iouDiff = 0.09; + } testDetectModel(weights_file, config_file, img_path, refClassIds, refConfidences, refBoxes, scoreDiff, iouDiff, confThreshold, nmsThreshold, size, mean, scale); } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index ce8a43a..7f4a18c 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -590,8 +590,17 @@ TEST_P(Test_ONNX_nets, TinyYolov2) #endif // output range: [-11; 8] - double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.017 : default_l1; - double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.14 : default_lInf; + double l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 0.017; + lInf = 0.14; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.018; + lInf = 0.16; + } testONNXModels("tiny_yolo2", pb, l1, lInf); } @@ -620,17 +629,23 @@ TEST_P(Test_ONNX_nets, LResNet100E_IR) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); } - double l1 = default_l1; - double lInf = default_lInf; + double l1 = default_l1, lInf = default_lInf; // output range: [-3; 3] - if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) { + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + { l1 = 0.009; lInf = 0.035; } - else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_CPU) { + else if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_CPU) + { l1 = 4.6e-5; lInf = 1.9e-4; } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.008; + lInf = 0.04; + } testONNXModels("LResNet100E_IR", pb, l1, lInf); } @@ -747,8 +762,12 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics) net.setPreferableTarget(target); // output range [-5, 11] - float l1 = 0.0013; - float lInf = 0.009; + float l1 = 0.0013, lInf = 0.009; + if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.008; + lInf = 0.04; + } checkBackend(&input0, &ref0); net.setInput(input0); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index c49ed51..f563e25 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -225,8 +225,17 @@ TEST_P(Test_TensorFlow_layers, slim_batch_norm) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // Output values range: [-40.0597, 207.827] - double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.041 : default_l1; - double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.33 : default_lInf; + double l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 0.041; + lInf = 0.33; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.005; + lInf = 0.33; + } runTensorFlowNet("slim_batch_norm", false, l1, lInf); } @@ -300,9 +309,8 @@ TEST_P(Test_TensorFlow_layers, AvePooling3D) TEST_P(Test_TensorFlow_layers, deconvolution) { - if(backend == DNN_BACKEND_CUDA) - applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); /* bugged */ - + if (backend == DNN_BACKEND_CUDA) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA); runTensorFlowNet("deconvolution"); runTensorFlowNet("deconvolution_same"); runTensorFlowNet("deconvolution_stride_2_same"); @@ -428,8 +436,16 @@ TEST_P(Test_TensorFlow_nets, MobileNet_SSD) net.setInput(inp); Mat out = net.forward(); - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0043 : default_l1; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.037 : default_lInf; + double scoreDiff = default_l1, iouDiff = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.0043; + iouDiff = 0.037; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + iouDiff = 0.04; + } normAssertDetections(ref, out, "", 0.2, scoreDiff, iouDiff); #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_RELEASE >= 2019010000 expectNoFallbacksFromIE(net); @@ -466,8 +482,17 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD) 0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527, 0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384); - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0097 : default_l1; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf; + double scoreDiff = default_l1, iouDiff = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.0097; + iouDiff = 0.09; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 6e-3; + iouDiff = 0.05; + } normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -489,10 +514,18 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD) Mat out = net.forward(); Mat ref = blobFromNPY(findDataFile("dnn/tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy")); - float scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 1.5e-5; - float iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.012 : 1e-3; + float scoreDiff = 1.5e-5, iouDiff = 1e-3; float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.35 : 0.3; - + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.011; + iouDiff = 0.012; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.006; + iouDiff = 0.01; + } #if defined(INF_ENGINE_RELEASE) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) @@ -530,6 +563,9 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + checkBackend(); double scoresDiff = backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 ? 2.9e-5 : 1e-5; @@ -574,8 +610,17 @@ TEST_P(Test_TensorFlow_nets, MobileNet_v1_SSD_PPN) net.setInput(blob); Mat out = net.forward(); - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.048 : 1.1e-5; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.058 : default_lInf; + double scoreDiff = 1.1e-5, iouDiff = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 0.048; + iouDiff = 0.058; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 0.006; + iouDiff = 0.05; + } normAssertDetections(ref, out, "", 0.45, scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -604,8 +649,17 @@ TEST_P(Test_TensorFlow_nets, opencv_face_detector_uint8) 0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477, 0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494, 0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); - double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 4e-3 : 3.4e-3; - double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.024 : 1e-2; + double scoreDiff = 3.4e-3, iouDiff = 1e-2; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + scoreDiff = 4e-3; + iouDiff = 0.024; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + scoreDiff = 4e-3; + iouDiff = 0.02; + } normAssertDetections(ref, out, "", 0.9, scoreDiff, iouDiff); expectNoFallbacksFromIE(net); } @@ -673,6 +727,11 @@ TEST_P(Test_TensorFlow_nets, EAST_text_detection) lInf_scores = 0.41; l1_geometry = 0.28; lInf_geometry = 5.94; } + else if (target == DNN_TARGET_CUDA_FP16) + { + lInf_scores = 0.1; + l1_geometry = 0.3; lInf_geometry = 7; + } else { l1_geometry = 1e-4, lInf_geometry = 3e-3; @@ -695,7 +754,8 @@ TEST_P(Test_TensorFlow_layers, fp16_weights) runTensorFlowNet("fp16_padding_valid", false, l1, lInf); // Reference output values are in range [0.0889, 1.651] runTensorFlowNet("fp16_max_pool_even", false, (target == DNN_TARGET_MYRIAD) ? 0.003 : l1, lInf); - if (target == DNN_TARGET_MYRIAD) { + if (target == DNN_TARGET_MYRIAD) + { l1 = 0.0041; lInf = 0.024; } diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 5343fae..1f4bc1f 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -112,8 +112,17 @@ public: TEST_P(Test_Torch_layers, run_convolution) { // Output reference values are in range [23.4018, 72.0181] - double l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.08 : default_l1; - double lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.42 : default_lInf; + double l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 0.08; + lInf = 0.42; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.08; + lInf = 0.5; + } runTorchNet("net_conv", "", false, true, true, l1, lInf); } @@ -121,7 +130,10 @@ TEST_P(Test_Torch_layers, run_pool_max) { if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); - runTorchNet("net_pool_max", "", true); + if (target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + double l1 = 0.0, lInf = 0.0; + runTorchNet("net_pool_max", "", true, false, true, l1, lInf); } TEST_P(Test_Torch_layers, run_pool_ave) @@ -145,9 +157,17 @@ TEST_P(Test_Torch_layers, run_reshape) TEST_P(Test_Torch_layers, run_reshape_single_sample) { // Reference output values in range [14.4586, 18.4492]. - runTorchNet("net_reshape_single_sample", "", false, false, true, - (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.033 : default_l1, - (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.05 : default_lInf); + double l1 = default_l1, lInf = default_lInf; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 0.033; + lInf = 0.05; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.01; + } + runTorchNet("net_reshape_single_sample", "", false, false, true, l1, lInf); } TEST_P(Test_Torch_layers, run_linear) @@ -164,8 +184,16 @@ TEST_P(Test_Torch_layers, run_concat) TEST_P(Test_Torch_layers, run_depth_concat) { - runTorchNet("net_depth_concat", "", false, true, true, 0.0, - target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0); + double lInf = 0.0; + if (target == DNN_TARGET_OPENCL_FP16) + { + lInf = 0.021; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + lInf = 0.03; + } + runTorchNet("net_depth_concat", "", false, true, true, 0.0, lInf); } TEST_P(Test_Torch_layers, run_deconv) @@ -211,9 +239,18 @@ TEST_P(Test_Torch_layers, net_conv_gemm_lrn) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); - runTorchNet("net_conv_gemm_lrn", "", false, true, true, - target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0, - target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0); + double l1 = 0.0, lInf = 0.0; + if (target == DNN_TARGET_OPENCL_FP16) + { + l1 = 0.046; + lInf = 0.023; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.0042; + lInf = 0.021; + } + runTorchNet("net_conv_gemm_lrn", "", false, true, true, l1, lInf); } TEST_P(Test_Torch_layers, net_inception_block) @@ -291,8 +328,17 @@ TEST_P(Test_Torch_nets, OpenFace_accuracy) // Reference output values are in range [-0.17212, 0.263492] // on Myriad problem layer: l4_Pooling - does not use pads_begin - float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 2e-3 : 1e-5; - float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : 1e-3; + float l1 = 1e-5, lInf = 1e-3; + if (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) + { + l1 = 2e-3; + lInf = 5e-3; + } + else if (target == DNN_TARGET_CUDA_FP16) + { + l1 = 0.0004; + lInf = 0.0012; + } Mat outRef = readTorchBlob(_tf("net_openface_output.dat"), true); normAssert(out, outRef, "", l1, lInf); } @@ -343,6 +389,8 @@ TEST_P(Test_Torch_nets, ENet_accuracy) checkBackend(); if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) throw SkipTestException(""); + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target != DNN_TARGET_CPU) { if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); @@ -448,6 +496,10 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy) else EXPECT_LE(normL1, 0.6f); } + else if(target == DNN_TARGET_CUDA_FP16) + { + normAssert(out, refBlob, "", 0.6, 25); + } else normAssert(out, refBlob, "", 0.5, 1.1); } -- 2.7.4 From ee4feb4b09d144c8bfabf539fd81b513cddf399e Mon Sep 17 00:00:00 2001 From: shimat Date: Fri, 20 Dec 2019 22:38:51 +0900 Subject: [PATCH 14/16] Merge pull request #16208 from shimat:fix_compare_16f * add cv::compare test when Mat type == CV_16F * add assertion in cv::compare when src.depth() == CV_16F * cv::compare assertion minor fix * core: add more checks --- modules/core/src/arithm.cpp | 24 ++++++++++++++++++++---- modules/core/test/test_arithm.cpp | 8 ++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 167af8f..d6418bc 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -61,7 +61,9 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl { int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); size_t esz = CV_ELEM_SIZE(buftype); - getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); + BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype); + CV_Assert(cvtFn); + cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); // unroll the scalar if( scn < cn ) { @@ -196,7 +198,10 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, cn = (int)CV_ELEM_SIZE(type1); } else + { func = tab[depth1]; + } + CV_Assert(func); Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize2D(src1, src2, dst); @@ -270,6 +275,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, } else func = tab[depth1]; + CV_Assert(func); if( !haveScalar ) { @@ -745,6 +751,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, (cvtdst ? wsz : 0) + (haveMask ? dsz : 0); BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)]; + CV_Assert(func); if( !haveScalar ) { @@ -1228,17 +1235,23 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) _InputArray::KindFlag kind1 = _src1.kind(), kind2 = _src2.kind(); Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + int depth1 = src1.depth(), depth2 = src2.depth(); + if (depth1 == CV_16F || depth2 == CV_16F) + CV_Error(Error::StsNotImplemented, "Unsupported depth value CV_16F"); + if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() ) { int cn = src1.channels(); _dst.create(src1.size(), CV_8UC(cn)); Mat dst = _dst.getMat(); Size sz = getContinuousSize2D(src1, src2, dst, src1.channels()); - getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op); + BinaryFuncC cmpFn = getCmpFunc(depth1); + CV_Assert(cmpFn); + cmpFn(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op); return; } - int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth(); + int cn = src1.channels(); _dst.create(src1.dims, src1.size, CV_8UC(cn)); src1 = src1.reshape(1); src2 = src2.reshape(1); @@ -1247,6 +1260,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) size_t esz = std::max(src1.elemSize(), (size_t)1); size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz; BinaryFuncC func = getCmpFunc(depth1); + CV_Assert(func); if( !haveScalar ) { @@ -1275,7 +1289,9 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) else { double fval=0; - getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0); + BinaryFunc cvtFn = getConvertFunc(depth2, CV_64F); + CV_Assert(cvtFn); + cvtFn(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0); if( fval < getMinVal(depth1) ) { dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0); diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 7d3542d..75a7004 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2021,6 +2021,14 @@ TEST(Compare, regression_8999) EXPECT_THROW(cv::compare(A, B, C, CMP_LT), cv::Exception); } +TEST(Compare, regression_16F_do_not_crash) +{ + cv::Mat mat1(2, 2, CV_16F, cv::Scalar(1)); + cv::Mat mat2(2, 2, CV_16F, cv::Scalar(2)); + cv::Mat dst; + EXPECT_THROW(cv::compare(mat1, mat2, dst, cv::CMP_EQ), cv::Exception); +} + TEST(Core_minMaxIdx, regression_9207_1) { -- 2.7.4 From bda89a6469aa79ecd8713967916bd754bff1d931 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 20 Dec 2019 16:44:16 +0300 Subject: [PATCH 15/16] release: OpenCV 4.2.0 --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 6c8510f..29855b1 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -8,7 +8,7 @@ #define CV_VERSION_MAJOR 4 #define CV_VERSION_MINOR 2 #define CV_VERSION_REVISION 0 -#define CV_VERSION_STATUS "-pre" +#define CV_VERSION_STATUS "" #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) -- 2.7.4 From d3b7e11b444331a33d2b3b5424f7baf379da90d9 Mon Sep 17 00:00:00 2001 From: Tae-Young Chung Date: Fri, 6 Mar 2020 09:15:03 +0900 Subject: [PATCH 16/16] Apply symbolic link to files to support OpenCV3 In OpenCV4, there are some changes which don't support compatibility. For example, header files's location is changed and pc file's name is also changed. To support compatibility, create symbolic links to them. Signed-off-by: Tae-Young Chung --- packaging/opencv.spec | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/packaging/opencv.spec b/packaging/opencv.spec index ab66af7..e5b4e18 100644 --- a/packaging/opencv.spec +++ b/packaging/opencv.spec @@ -1,7 +1,7 @@ Name: opencv Summary: OpenCV library Version: 4.2.0 -Release: 1 +Release: 2 Group: Development/Libraries License: BSD-3-Clause and LGPL-2.1+ Source0: %{name}-%{version}.tar.gz @@ -52,6 +52,11 @@ ARCH=x86_64 %ifarch %ix86 ARCH=i386 %endif + +# compatibility to opencv3 +sed -i 's/opencv4.pc/opencv.pc/g' cmake/OpenCVGenPkgconfig.cmake + + mkdir -p build cd build cmake .. -DCMAKE_INSTALL_PREFIX=/usr \ @@ -222,6 +227,21 @@ cd build rm -rf %{buildroot} %make_install +# To support backward compatibility with OpenCV3 +# Header files +ln -sf %{_includedir}/opencv4/opencv2 %{buildroot}%{_includedir}/opencv2 +# pkgconfig +ln -sf %{_libdir}/pkgconfig/opencv.pc %{buildroot}%{_libdir}/pkgconfig/%{name}4.pc +# cascades files +mkdir -p %{buildroot}%{_datadir}/OpenCV +ln -sf %{_datadir}/opencv4/haarcascades %{buildroot}%{_datadir}/OpenCV/haarcascades +ln -sf %{_datadir}/opencv4/lbpcascades %{buildroot}%{_datadir}/OpenCV/lbpcascades + +ln -sf %{_libdir}/cmake/opencv4/OpenCVConfig-version.cmake %{buildroot}%{_datadir}/OpenCV/OpenCVConfig-version.cmake +ln -sf %{_libdir}/cmake/opencv4/OpenCVConfig.cmake %{buildroot}%{_datadir}/OpenCV/OpenCVConfig.cmake +ln -sf %{_libdir}/cmake/opencv4/OpenCVModules-release.cmake %{buildroot}%{_datadir}/OpenCV/OpenCVModules-release.cmake +ln -sf %{_libdir}/cmake/opencv4/OpenCVModules.cmake %{buildroot}%{_datadir}/OpenCV/OpenCVModules.cmake + %clean rm -rf %{buildroot} @@ -241,6 +261,9 @@ rm -rf %{buildroot} /usr/share/opencv4/haarcascades/*frontalface_alt* /usr/share/opencv4/haarcascades/*smile* /usr/share/opencv4/lbpcascades/*frontalface* +/usr/share/OpenCV/OpenCV* +/usr/share/OpenCV/haarcascades +/usr/share/OpenCV/lbpcascades %exclude /usr/share/opencv4/haarcascades/*eye* %exclude /usr/share/opencv4/haarcascades/*catface* -- 2.7.4