From 19544b3d54c575ab2f8893663b11d5ec032cc928 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 15 Nov 2010 07:57:30 +0000 Subject: [PATCH] fixed gpu linear filters --- modules/gpu/src/cuda/filters.cu | 182 +++++++++++++--------------------------- modules/gpu/src/filtering.cpp | 63 +++++--------- 2 files changed, 78 insertions(+), 167 deletions(-) diff --git a/modules/gpu/src/cuda/filters.cu b/modules/gpu/src/cuda/filters.cu index d6390e5..6749fe6 100644 --- a/modules/gpu/src/cuda/filters.cu +++ b/modules/gpu/src/cuda/filters.cu @@ -72,7 +72,7 @@ namespace cv { namespace gpu { namespace filters namespace filter_krnls { - template + template __global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) { __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; @@ -102,7 +102,7 @@ namespace filter_krnls if (threadX < width) { - typedef typename TypeVec::vec_t sum_t; + typedef typename TypeVec::cn>::vec_t sum_t; sum_t sum = VecTraits::all(0); sDataRow += threadIdx.x + blockDim.x - anchor; @@ -119,7 +119,7 @@ namespace filter_krnls namespace cv { namespace gpu { namespace filters { - template + template void linearRowFilter_caller(const DevMem2D_& src, const DevMem2D_& dst, int anchor) { const int BLOCK_DIM_X = 16; @@ -128,85 +128,50 @@ namespace cv { namespace gpu { namespace filters dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); - filter_krnls::linearRowFilter<<>>(src.data, src.step/src.elemSize(), + filter_krnls::linearRowFilter<<>>(src.data, src.step/src.elemSize(), dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows); cudaSafeCall( cudaThreadSynchronize() ); } - template - inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) + template + void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) { typedef void (*caller_t)(const DevMem2D_& src, const DevMem2D_& dst, int anchor); static const caller_t callers[] = - {linearRowFilter_caller<0 , CN, T, D>, linearRowFilter_caller<1 , CN, T, D>, - linearRowFilter_caller<2 , CN, T, D>, linearRowFilter_caller<3 , CN, T, D>, - linearRowFilter_caller<4 , CN, T, D>, linearRowFilter_caller<5 , CN, T, D>, - linearRowFilter_caller<6 , CN, T, D>, linearRowFilter_caller<7 , CN, T, D>, - linearRowFilter_caller<8 , CN, T, D>, linearRowFilter_caller<9 , CN, T, D>, - linearRowFilter_caller<10, CN, T, D>, linearRowFilter_caller<11, CN, T, D>, - linearRowFilter_caller<12, CN, T, D>, linearRowFilter_caller<13, CN, T, D>, - linearRowFilter_caller<14, CN, T, D>, linearRowFilter_caller<15, CN, T, D>}; + {linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>, + linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>, + linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>, + linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>, + linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>, + linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>, + linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>, + linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>}; loadLinearKernel(kernel, ksize); + callers[ksize]((DevMem2D_)src, (DevMem2D_)dst, anchor); } - template void linearRowFilter_gpu<4, uchar4, uchar4>(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); - /* void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor); - }*/ - void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<4, char4, uchar4>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<1, int, int>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<1, int, float>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<1, float, int>(src, dst, kernel, ksize, anchor); - } - void linearRowFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearRowFilter_gpu<1 ,float, float>(src, dst, kernel, ksize, anchor); - } + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearRowFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); }}} namespace filter_krnls { - template + template __global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height) { __shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3]; @@ -238,7 +203,7 @@ namespace filter_krnls if (threadY < height) { - typedef typename TypeVec::vec_t sum_t; + typedef typename TypeVec::cn>::vec_t sum_t; sum_t sum = VecTraits::all(0); sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step; @@ -255,7 +220,7 @@ namespace filter_krnls namespace cv { namespace gpu { namespace filters { - template + template void linearColumnFilter_caller(const DevMem2D_& src, const DevMem2D_& dst, int anchor) { const int BLOCK_DIM_X = 16; @@ -264,78 +229,45 @@ namespace cv { namespace gpu { namespace filters dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); - filter_krnls::linearColumnFilter<<>>(src.data, src.step/src.elemSize(), + filter_krnls::linearColumnFilter<<>>(src.data, src.step/src.elemSize(), dst.data, dst.step/dst.elemSize(), anchor, src.cols, src.rows); cudaSafeCall( cudaThreadSynchronize() ); } - template - inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) + template + void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) { typedef void (*caller_t)(const DevMem2D_& src, const DevMem2D_& dst, int anchor); static const caller_t callers[] = - {linearColumnFilter_caller<0 , CN, T, D>, linearColumnFilter_caller<1 , CN, T, D>, - linearColumnFilter_caller<2 , CN, T, D>, linearColumnFilter_caller<3 , CN, T, D>, - linearColumnFilter_caller<4 , CN, T, D>, linearColumnFilter_caller<5 , CN, T, D>, - linearColumnFilter_caller<6 , CN, T, D>, linearColumnFilter_caller<7 , CN, T, D>, - linearColumnFilter_caller<8 , CN, T, D>, linearColumnFilter_caller<9 , CN, T, D>, - linearColumnFilter_caller<10, CN, T, D>, linearColumnFilter_caller<11, CN, T, D>, - linearColumnFilter_caller<12, CN, T, D>, linearColumnFilter_caller<13, CN, T, D>, - linearColumnFilter_caller<14, CN, T, D>, linearColumnFilter_caller<15, CN, T, D>}; + {linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>, + linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>, + linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>, + linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>, + linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>, + linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>, + linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>, + linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>}; loadLinearKernel(kernel, ksize); + callers[ksize]((DevMem2D_)src, (DevMem2D_)dst, anchor); } - void linearColumnFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<4, char4, uchar4>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<1, int, int>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<1, int, float>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<1, float, int>(src, dst, kernel, ksize, anchor); - } - void linearColumnFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor) - { - linearColumnFilter_gpu<1, float, float>(src, dst, kernel, ksize, anchor); - } + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); + template void linearColumnFilter_gpu(const DevMem2D&, const DevMem2D&, const float[], int , int); }}} ///////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/filtering.cpp b/modules/gpu/src/filtering.cpp index 961e547..b8dcaaf 100644 --- a/modules/gpu/src/filtering.cpp +++ b/modules/gpu/src/filtering.cpp @@ -577,34 +577,11 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke namespace cv { namespace gpu { namespace filters { - template + template void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - //void linearRowFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearRowFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - - void linearColumnFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); - void linearColumnFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); + template + void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor); }}} namespace @@ -656,13 +633,12 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R}; static const gpuFilter1D_t gpuFilter1D_callers[6][6] = { - {linearRowFilter_gpu<4, uchar4, uchar4>/*linearRowFilter_gpu_8u_8u_c4*/,linearRowFilter_gpu_8u_8s_c4,0,0,0,0}, - - {linearRowFilter_gpu_8s_8u_c4,linearRowFilter_gpu_8s_8s_c4,0,0,0,0}, - {0,0,linearRowFilter_gpu_16u_16u_c2,linearRowFilter_gpu_16u_16s_c2,0,0}, - {0,0,linearRowFilter_gpu_16s_16u_c2,linearRowFilter_gpu_16s_16s_c2,0,0}, - {0,0,0,0,linearRowFilter_gpu_32s_32s_c1, linearRowFilter_gpu_32s_32f_c1}, - {0,0,0,0,linearRowFilter_gpu_32f_32s_c1, linearRowFilter_gpu_32f_32f_c1} + {linearRowFilter_gpu,linearRowFilter_gpu,0,0,0,0}, + {linearRowFilter_gpu,linearRowFilter_gpu,0,0,0,0}, + {0,0,linearRowFilter_gpu,linearRowFilter_gpu,0,0}, + {0,0,linearRowFilter_gpu,linearRowFilter_gpu,0,0}, + {0,0,0,0,linearRowFilter_gpu, linearRowFilter_gpu}, + {0,0,0,0,linearRowFilter_gpu, linearRowFilter_gpu} }; if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4)) @@ -686,6 +662,9 @@ Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, Mat cont_krnl = temp.reshape(1, 1); int ksize = cont_krnl.cols; + + CV_Assert(ksize < 16); + normalizeAnchor(anchor, ksize); return Ptr(new GpuLinearRowFilter(ksize, anchor, cont_krnl, @@ -736,17 +715,14 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R}; static const gpuFilter1D_t gpuFilter1D_callers[6][6] = { - {linearColumnFilter_gpu_8u_8u_c4,linearColumnFilter_gpu_8u_8s_c4,0,0,0,0}, - {linearColumnFilter_gpu_8s_8u_c4,linearColumnFilter_gpu_8s_8s_c4,0,0,0,0}, - {0,0,linearColumnFilter_gpu_16u_16u_c2,linearColumnFilter_gpu_16u_16s_c2,0,0}, - {0,0,linearColumnFilter_gpu_16s_16u_c2,linearColumnFilter_gpu_16s_16s_c2,0,0}, - {0,0,0,0,linearColumnFilter_gpu_32s_32s_c1, linearColumnFilter_gpu_32s_32f_c1}, - {0,0,0,0,linearColumnFilter_gpu_32f_32s_c1, linearColumnFilter_gpu_32f_32f_c1} + {linearColumnFilter_gpu,linearColumnFilter_gpu,0,0,0,0}, + {linearColumnFilter_gpu,linearColumnFilter_gpu,0,0,0,0}, + {0,0,linearColumnFilter_gpu,linearColumnFilter_gpu,0,0}, + {0,0,linearColumnFilter_gpu,linearColumnFilter_gpu,0,0}, + {0,0,0,0,linearColumnFilter_gpu, linearColumnFilter_gpu}, + {0,0,0,0,linearColumnFilter_gpu, linearColumnFilter_gpu} }; - double kernelMin; - minMaxLoc(columnKernel, &kernelMin); - if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4)) { GpuMat gpu_col_krnl; @@ -768,6 +744,9 @@ Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds Mat cont_krnl = temp.reshape(1, 1); int ksize = cont_krnl.cols; + + CV_Assert(ksize < 16); + normalizeAnchor(anchor, ksize); return Ptr(new GpuLinearColumnFilter(ksize, anchor, cont_krnl, -- 2.7.4