From c5552788c59a48ba57e81a4c2f81588dd8b5fcc0 Mon Sep 17 00:00:00 2001 From: Chuanbo Weng Date: Wed, 17 Sep 2014 19:28:07 +0800 Subject: [PATCH] Use vload to read unaligned data instead of dereference operator. According to opencl 1.2 spec 6.1.5: For arguments to a __kernel function declared to be a pointer to a data type, the OpenCL compiler can assume that the pointee is always appropriately aligned as required by the data type. The behavior of an unaligned load or store is undefined, except for the vloadn, vload_halfn, vstoren, and vstore_halfn functions defined in section 6.12.7. Original code read data of type T from address not aligned by multiple of sizeof(T), so the result is incorrect. With this patch, the cases ./opencv_perf_imgproc --gtest_filter=OCL_ImgSize_TmplSize_Method_MatType_MatchTemplate.MatchTemplate/* could work well with beignet 0.9.3. Signed-off-by: Chuanbo Weng --- modules/imgproc/src/opencl/match_template.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/src/opencl/match_template.cl b/modules/imgproc/src/opencl/match_template.cl index c6c9468..f1bce5e 100644 --- a/modules/imgproc/src/opencl/match_template.cl +++ b/modules/imgproc/src/opencl/match_template.cl @@ -161,7 +161,7 @@ __kernel void matchTemplate_Naive_CCORR(__global const uchar * srcptr, int src_s for (int j = 0; j < template_cols; ++j) { T temp = (T)(template[j]); - T src = *(__global const T*)(srcptr + ind + j*(int)sizeof(T1)); + T src = vload4(0, (__global const T1*)(srcptr + ind + j*(int)sizeof(T1))); sum = mad(convertToWT(src), convertToWT(temp), sum); -- 2.7.4