if (vendorName_ == "Advanced Micro Devices, Inc." ||
vendorName_ == "AMD")
vendorID_ = VENDOR_AMD;
- else if (vendorName_ == "Intel(R) Corporation")
+ else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
vendorID_ = VENDOR_INTEL;
else if (vendorName_ == "NVIDIA Corporation")
vendorID_ = VENDOR_NVIDIA;
{
ProxyLoopBody pbody(body, range, nstripes);
cv::Range stripeRange = pbody.stripeRange();
+ if( stripeRange.end - stripeRange.start == 1 )
+ {
+ body(range);
+ return;
+ }
#if defined HAVE_TBB
#include "test_precomp.hpp"
-CV_TEST_MAIN("imgcodecs")
+CV_TEST_MAIN("highgui")
dstOfs.y + srcRoi.height <= dst.rows );
int y = start(src, srcRoi, isolated);
- proceed( src.ptr(y)
- + srcRoi.x*src.elemSize(),
+ proceed( src.ptr() + y*src.step + srcRoi.x*src.elemSize(),
(int)src.step, endY - startY,
dst.ptr(dstOfs.y) +
dstOfs.x*dst.elemSize(), (int)dst.step );
for( k = 0; k < 3; k++ )
{
dir = data[k][0];
- img = image.ptr<_Tp>(YC + dir);
- int left = data[k][1];
- int right = data[k][2];
if( (unsigned)(YC + dir) >= (unsigned)roi.height )
continue;
+ img = image.ptr<_Tp>(YC + dir);
+ int left = data[k][1];
+ int right = data[k][2];
+
for( i = left; i <= right; i++ )
{
if( (unsigned)i < (unsigned)roi.width && img[i] == val0 )
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
(ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
- (iterations == 1))
+ (iterations == 1) && cn == 1)
{
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
return true;
return true;
}
-#ifdef ANDROID
+#if defined ANDROID
size_t localThreads[2] = { 16, 8 };
#else
size_t localThreads[2] = { 16, 16 };
#endif
size_t globalThreads[2] = { ssize.width, ssize.height };
+#ifdef __APPLE__
+ if( actual_op != MORPH_ERODE && actual_op != MORPH_DILATE )
+ localThreads[0] = localThreads[1] = 4;
+#endif
+
if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
return false;
}
#endif
-WT getBorderPixel(const struct RectCoords bounds, int2 coord,
+inline WT getBorderPixel(const struct RectCoords bounds, int2 coord,
__global const uchar* srcptr, int srcstep)
{
#ifdef BORDER_CONSTANT
#define vload1(OFFSET, PTR) (*(PTR + OFFSET))
#define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE)
-#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
+//#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
+
+#if PX_LOAD_VEC_SIZE == 1
+#define PX_LOAD_FLOAT_VEC_CONV (float)
+#elif PX_LOAD_VEC_SIZE == 2
+#define PX_LOAD_FLOAT_VEC_CONV convert_float2
+#elif PX_LOAD_VEC_SIZE == 3
+#define PX_LOAD_FLOAT_VEC_CONV convert_float3
+#elif PX_LOAD_VEC_SIZE == 4
+#define PX_LOAD_FLOAT_VEC_CONV convert_float4
+#endif
+
#define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE)
#define float1 float
#define vload1(OFFSET, PTR) (*(PTR + OFFSET))
#define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE)
-#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
+
+#if PX_LOAD_VEC_SIZE == 1
+#define PX_LOAD_FLOAT_VEC_CONV (float)
+#elif PX_LOAD_VEC_SIZE == 2
+#define PX_LOAD_FLOAT_VEC_CONV convert_float2
+#elif PX_LOAD_VEC_SIZE == 3
+#define PX_LOAD_FLOAT_VEC_CONV convert_float3
+#elif PX_LOAD_VEC_SIZE == 4
+#define PX_LOAD_FLOAT_VEC_CONV convert_float4
+#endif
+
+//#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
#define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE)
// workaround for bug in Intel HD graphics drivers (10.18.10.3496 or older)
#define WA_CONVERT_1 CAT(convert_uint, cn)
#define WA_CONVERT_2 CAT(convert_, srcT)
-#define MORPH_OP(A, B) WA_CONVERT_2(min(WA_CONVERT_1(A), WA_CONVERT_1(B)))
+#define MORPH_OP(A, B) ((A) < (B) ? (A) : (B))
#else
#define MORPH_OP(A, B) min((A), (B))
#endif
#ifdef OP_ERODE
#if defined INTEL_DEVICE && defined DEPTH_0
-// workaround for bug in Intel HD graphics drivers (10.18.10.3496 or older)
-#define __CAT(x, y) x##y
-#define CAT(x, y) __CAT(x, y)
-#define WA_CONVERT_1 CAT(convert_uint, cn)
-#define WA_CONVERT_2 CAT(convert_, T)
-#define convert_uint1 convert_uint
-#define MORPH_OP(A, B) WA_CONVERT_2(min(WA_CONVERT_1(A), WA_CONVERT_1(B)))
+#define MORPH_OP(A, B) ((A) < (B) ? (A) : (B))
#else
#define MORPH_OP(A, B) min((A), (B))
#endif
#endif
#define PROCESS(y, x) \
- res = MORPH_OP(res, LDS_DAT[mad24(l_y + y, width, l_x + x)]);
+ temp = LDS_DAT[mad24(l_y + y, width, l_x + x)]; \
+ res = MORPH_OP(res, temp);
// BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) < (l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
if (gidx < cols && gidy < rows)
{
- T res = (T)(VAL);
+ T res = (T)(VAL), temp;
PROCESS_ELEMS;
int dst_index = mad24(gidy, dst_step, mad24(gidx, TSIZE, dst_offset));
#if cn == 1
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x
+#define INTERMEDIATE_TYPE float
#elif cn == 2
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy
+#define INTERMEDIATE_TYPE float2
#elif cn == 3
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz
+#define INTERMEDIATE_TYPE float3
#elif cn == 4
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z)
+#define INTERMEDIATE_TYPE float4
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
-#define INTERMEDIATE_TYPE CAT(float, cn)
+//#define INTERMEDIATE_TYPE CAT(float, cn)
#define float1 float
#if depth == 0
for( int i = 0; i < size.height; i++ )
{
float* m1f = map1.ptr<float>(i);
- float* m2f = map2.ptr<float>(i);
+ float* m2f = map2.empty() ? 0 : map2.ptr<float>(i);
short* m1 = (short*)m1f;
ushort* m2 = (ushort*)m2f;
double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8];
#if defined(DEFINE_feed)
#define workType TYPE(weight_T1, src_CN)
+
+#if src_DEPTH == 3 && src_CN == 3
+#define convertSrcToWorkType convert_float3
+#else
#define convertSrcToWorkType CONVERT_TO(workType)
+#endif
+
+#if dst_DEPTH == 3 && dst_CN == 3
+#define convertToDstType convert_short3
+#else
#define convertToDstType CONVERT_TO(dst_T) // sat_rte provides incompatible results with CPU path
+#endif
__kernel void feed(
DECLARE_MAT_ARG(src), DECLARE_MAT_ARG(weight),
#if defined(DEFINE_normalizeUsingWeightMap)
+#if mat_DEPTH == 3 && mat_CN == 3
+#define workType float3
+#define convertSrcToWorkType convert_float3
+#define convertToDstType convert_short3
+#else
#define workType TYPE(weight_T1, mat_CN)
#define convertSrcToWorkType CONVERT_TO(workType)
#define convertToDstType CONVERT_TO(mat_T) // sat_rte provides incompatible results with CPU path
+#endif
#if weight_DEPTH >= CV_32F
#define WEIGHT_EPS 1e-5f
#include "test_precomp.hpp"
-CV_TEST_MAIN("videoio")
+CV_TEST_MAIN("highgui")