From e9729a9601f54f5d4ea3e822139445c55b7d102b Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Sat, 16 Aug 2014 00:29:10 +0400 Subject: [PATCH] multiple yet minor fixes to make most of the tests pass on Mac with Iris graphics --- modules/core/src/ocl.cpp | 2 +- modules/core/src/parallel.cpp | 5 +++++ modules/imgcodecs/test/test_main.cpp | 2 +- modules/imgproc/src/filter.cpp | 3 +-- modules/imgproc/src/floodfill.cpp | 7 ++++--- modules/imgproc/src/morph.cpp | 9 +++++++-- modules/imgproc/src/opencl/filter2DSmall.cl | 15 +++++++++++++-- modules/imgproc/src/opencl/filterSmall.cl | 15 +++++++++++++-- modules/imgproc/src/opencl/morph.cl | 13 ++++--------- modules/imgproc/src/opencl/resize.cl | 6 +++++- modules/imgproc/src/undistort.cpp | 2 +- modules/stitching/src/opencl/multibandblend.cl | 16 ++++++++++++++++ modules/videoio/test/test_main.cpp | 2 +- 13 files changed, 72 insertions(+), 25 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index bc83bf8..d279c02 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1765,7 +1765,7 @@ struct Device::Impl if (vendorName_ == "Advanced Micro Devices, Inc." || vendorName_ == "AMD") vendorID_ = VENDOR_AMD; - else if (vendorName_ == "Intel(R) Corporation") + else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0) vendorID_ = VENDOR_INTEL; else if (vendorName_ == "NVIDIA Corporation") vendorID_ = VENDOR_NVIDIA; diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 27d7ecc..6ebc02d 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -240,6 +240,11 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, { ProxyLoopBody pbody(body, range, nstripes); cv::Range stripeRange = pbody.stripeRange(); + if( stripeRange.end - stripeRange.start == 1 ) + { + body(range); + return; + } #if defined HAVE_TBB diff --git a/modules/imgcodecs/test/test_main.cpp b/modules/imgcodecs/test/test_main.cpp index 461e7fa..4727b95 100644 --- a/modules/imgcodecs/test/test_main.cpp +++ b/modules/imgcodecs/test/test_main.cpp @@ -1,3 +1,3 @@ #include "test_precomp.hpp" -CV_TEST_MAIN("imgcodecs") +CV_TEST_MAIN("highgui") diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index bb2040b..558f898 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -410,8 +410,7 @@ void FilterEngine::apply(const Mat& src, Mat& dst, dstOfs.y + srcRoi.height <= dst.rows ); int y = start(src, srcRoi, isolated); - proceed( src.ptr(y) - + srcRoi.x*src.elemSize(), + proceed( src.ptr() + y*src.step + srcRoi.x*src.elemSize(), (int)src.step, endY - startY, dst.ptr(dstOfs.y) + dstOfs.x*dst.elemSize(), (int)dst.step ); diff --git a/modules/imgproc/src/floodfill.cpp b/modules/imgproc/src/floodfill.cpp index 70a8d1e..54a1fc9 100644 --- a/modules/imgproc/src/floodfill.cpp +++ b/modules/imgproc/src/floodfill.cpp @@ -180,13 +180,14 @@ floodFill_CnIR( Mat& image, Point seed, for( k = 0; k < 3; k++ ) { dir = data[k][0]; - img = image.ptr<_Tp>(YC + dir); - int left = data[k][1]; - int right = data[k][2]; if( (unsigned)(YC + dir) >= (unsigned)roi.height ) continue; + img = image.ptr<_Tp>(YC + dir); + int left = data[k][1]; + int right = data[k][2]; + for( i = left; i <= right; i++ ) { if( (unsigned)i < (unsigned)roi.width && img[i] == val0 ) diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 9923f3b..347e80b 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1531,7 +1531,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) && ((ksize.width < 5 && ksize.height < 5 && esz <= 4) || (ksize.width == 5 && ksize.height == 5 && cn == 1)) && - (iterations == 1)) + (iterations == 1) && cn == 1) { if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat)) return true; @@ -1543,13 +1543,18 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel, return true; } -#ifdef ANDROID +#if defined ANDROID size_t localThreads[2] = { 16, 8 }; #else size_t localThreads[2] = { 16, 16 }; #endif size_t globalThreads[2] = { ssize.width, ssize.height }; +#ifdef __APPLE__ + if( actual_op != MORPH_ERODE && actual_op != MORPH_DILATE ) + localThreads[0] = localThreads[1] = 4; +#endif + if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1)) return false; diff --git a/modules/imgproc/src/opencl/filter2DSmall.cl b/modules/imgproc/src/opencl/filter2DSmall.cl index 67edef2..564bbcf 100755 --- a/modules/imgproc/src/opencl/filter2DSmall.cl +++ b/modules/imgproc/src/opencl/filter2DSmall.cl @@ -188,7 +188,7 @@ inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels) } #endif -WT getBorderPixel(const struct RectCoords bounds, int2 coord, +inline WT getBorderPixel(const struct RectCoords bounds, int2 coord, __global const uchar* srcptr, int srcstep) { #ifdef BORDER_CONSTANT @@ -231,7 +231,18 @@ inline WT readSrcPixelSingle(int2 pos, __global const uchar* srcptr, #define vload1(OFFSET, PTR) (*(PTR + OFFSET)) #define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE) #define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE) -#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE) +//#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE) + +#if PX_LOAD_VEC_SIZE == 1 +#define PX_LOAD_FLOAT_VEC_CONV (float) +#elif PX_LOAD_VEC_SIZE == 2 +#define PX_LOAD_FLOAT_VEC_CONV convert_float2 +#elif PX_LOAD_VEC_SIZE == 3 +#define PX_LOAD_FLOAT_VEC_CONV convert_float3 +#elif PX_LOAD_VEC_SIZE == 4 +#define PX_LOAD_FLOAT_VEC_CONV convert_float4 +#endif + #define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE) #define float1 float diff --git a/modules/imgproc/src/opencl/filterSmall.cl b/modules/imgproc/src/opencl/filterSmall.cl index c996fb8..8cec365 100755 --- a/modules/imgproc/src/opencl/filterSmall.cl +++ b/modules/imgproc/src/opencl/filterSmall.cl @@ -164,7 +164,18 @@ inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels) #define vload1(OFFSET, PTR) (*(PTR + OFFSET)) #define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE) #define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE) -#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE) + +#if PX_LOAD_VEC_SIZE == 1 +#define PX_LOAD_FLOAT_VEC_CONV (float) +#elif PX_LOAD_VEC_SIZE == 2 +#define PX_LOAD_FLOAT_VEC_CONV convert_float2 +#elif PX_LOAD_VEC_SIZE == 3 +#define PX_LOAD_FLOAT_VEC_CONV convert_float3 +#elif PX_LOAD_VEC_SIZE == 4 +#define PX_LOAD_FLOAT_VEC_CONV convert_float4 +#endif + +//#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE) #define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE) @@ -267,7 +278,7 @@ __constant WT1 kernelData[] = { COEFF }; // workaround for bug in Intel HD graphics drivers (10.18.10.3496 or older) #define WA_CONVERT_1 CAT(convert_uint, cn) #define WA_CONVERT_2 CAT(convert_, srcT) -#define MORPH_OP(A, B) WA_CONVERT_2(min(WA_CONVERT_1(A), WA_CONVERT_1(B))) +#define MORPH_OP(A, B) ((A) < (B) ? (A) : (B)) #else #define MORPH_OP(A, B) min((A), (B)) #endif diff --git a/modules/imgproc/src/opencl/morph.cl b/modules/imgproc/src/opencl/morph.cl index f78af89..f3d64ca 100644 --- a/modules/imgproc/src/opencl/morph.cl +++ b/modules/imgproc/src/opencl/morph.cl @@ -88,13 +88,7 @@ #ifdef OP_ERODE #if defined INTEL_DEVICE && defined DEPTH_0 -// workaround for bug in Intel HD graphics drivers (10.18.10.3496 or older) -#define __CAT(x, y) x##y -#define CAT(x, y) __CAT(x, y) -#define WA_CONVERT_1 CAT(convert_uint, cn) -#define WA_CONVERT_2 CAT(convert_, T) -#define convert_uint1 convert_uint -#define MORPH_OP(A, B) WA_CONVERT_2(min(WA_CONVERT_1(A), WA_CONVERT_1(B))) +#define MORPH_OP(A, B) ((A) < (B) ? (A) : (B)) #else #define MORPH_OP(A, B) min((A), (B)) #endif @@ -104,7 +98,8 @@ #endif #define PROCESS(y, x) \ - res = MORPH_OP(res, LDS_DAT[mad24(l_y + y, width, l_x + x)]); + temp = LDS_DAT[mad24(l_y + y, width, l_x + x)]; \ + res = MORPH_OP(res, temp); // BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii #define ELEM(i, l_edge, r_edge, elem1, elem2) (i) < (l_edge) | (i) >= (r_edge) ? (elem1) : (elem2) @@ -158,7 +153,7 @@ __kernel void morph(__global const uchar * srcptr, int src_step, int src_offset, if (gidx < cols && gidy < rows) { - T res = (T)(VAL); + T res = (T)(VAL), temp; PROCESS_ELEMS; int dst_index = mad24(gidy, dst_step, mad24(gidx, TSIZE, dst_offset)); diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl index 55ef069..67603e4 100644 --- a/modules/imgproc/src/opencl/resize.cl +++ b/modules/imgproc/src/opencl/resize.cl @@ -71,17 +71,21 @@ #if cn == 1 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x +#define INTERMEDIATE_TYPE float #elif cn == 2 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy +#define INTERMEDIATE_TYPE float2 #elif cn == 3 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz +#define INTERMEDIATE_TYPE float3 #elif cn == 4 #define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z) +#define INTERMEDIATE_TYPE float4 #endif #define __CAT(x, y) x##y #define CAT(x, y) __CAT(x, y) -#define INTERMEDIATE_TYPE CAT(float, cn) +//#define INTERMEDIATE_TYPE CAT(float, cn) #define float1 float #if depth == 0 diff --git a/modules/imgproc/src/undistort.cpp b/modules/imgproc/src/undistort.cpp index e5da24b..1a19fdb 100644 --- a/modules/imgproc/src/undistort.cpp +++ b/modules/imgproc/src/undistort.cpp @@ -131,7 +131,7 @@ void cv::initUndistortRectifyMap( InputArray _cameraMatrix, InputArray _distCoef for( int i = 0; i < size.height; i++ ) { float* m1f = map1.ptr(i); - float* m2f = map2.ptr(i); + float* m2f = map2.empty() ? 0 : map2.ptr(i); short* m1 = (short*)m1f; ushort* m2 = (ushort*)m2f; double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8]; diff --git a/modules/stitching/src/opencl/multibandblend.cl b/modules/stitching/src/opencl/multibandblend.cl index 72d3de0..8273ed1 100644 --- a/modules/stitching/src/opencl/multibandblend.cl +++ b/modules/stitching/src/opencl/multibandblend.cl @@ -221,8 +221,18 @@ #if defined(DEFINE_feed) #define workType TYPE(weight_T1, src_CN) + +#if src_DEPTH == 3 && src_CN == 3 +#define convertSrcToWorkType convert_float3 +#else #define convertSrcToWorkType CONVERT_TO(workType) +#endif + +#if dst_DEPTH == 3 && dst_CN == 3 +#define convertToDstType convert_short3 +#else #define convertToDstType CONVERT_TO(dst_T) // sat_rte provides incompatible results with CPU path +#endif __kernel void feed( DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(weight), @@ -250,9 +260,15 @@ __kernel void feed( #if defined(DEFINE_normalizeUsingWeightMap) +#if mat_DEPTH == 3 && mat_CN == 3 +#define workType float3 +#define convertSrcToWorkType convert_float3 +#define convertToDstType convert_short3 +#else #define workType TYPE(weight_T1, mat_CN) #define convertSrcToWorkType CONVERT_TO(workType) #define convertToDstType CONVERT_TO(mat_T) // sat_rte provides incompatible results with CPU path +#endif #if weight_DEPTH >= CV_32F #define WEIGHT_EPS 1e-5f diff --git a/modules/videoio/test/test_main.cpp b/modules/videoio/test/test_main.cpp index 3ef2a37..4727b95 100644 --- a/modules/videoio/test/test_main.cpp +++ b/modules/videoio/test/test_main.cpp @@ -1,3 +1,3 @@ #include "test_precomp.hpp" -CV_TEST_MAIN("videoio") +CV_TEST_MAIN("highgui") -- 2.7.4