From: Alexey Spizhevoy Date: Mon, 31 Jan 2011 13:31:59 +0000 (+0000) Subject: removed linear_filters_beta.cu as its functionality was moved into filters.cu X-Git-Tag: accepted/2.0/20130307.220821~3584 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fa446e7e3562adbadba62e88afeaf08c7281eb71;p=profile%2Fivi%2Fopencv.git removed linear_filters_beta.cu as its functionality was moved into filters.cu --- diff --git a/modules/gpu/src/cuda/linear_filters_beta.cu b/modules/gpu/src/cuda/linear_filters_beta.cu deleted file mode 100644 index 5a5184c..0000000 --- a/modules/gpu/src/cuda/linear_filters_beta.cu +++ /dev/null @@ -1,266 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/gpu/devmem2d.hpp" -#include "opencv2/gpu/device/border_interpolate.hpp" -#include "safe_call.hpp" -#include "internal_shared.hpp" - -#define BLOCK_DIM_X 16 -#define BLOCK_DIM_Y 16 -#define MAX_KERNEL_SIZE 16 - -using namespace cv::gpu; -using namespace cv::gpu::device; - -namespace cv { namespace gpu { namespace linear_filters { - - -// Global linear kernel data storage -__constant__ float ckernel[MAX_KERNEL_SIZE]; - - -void loadKernel(const float* kernel, int ksize) -{ - cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float))); -} - - -template -__global__ void rowFilterKernel(const DevMem2D_ src, PtrStepf dst, - int anchor, B border) -{ - __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3]; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - float* srow = smem + threadIdx.y * blockDim.x * 3; - - if (y < src.rows) - { - const T* src_row = src.ptr(y); - - srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row); - - srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row); - - srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row); - - __syncthreads(); - - if (x < src.cols) - { - srow += threadIdx.x + blockDim.x - anchor; - - float sum = 0.f; - for (int i = 0; i < ksize; ++i) - sum += srow[i] * ckernel[i]; - - dst.ptr(y)[x] = sum; - } - } -} - - -template -void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor) -{ - dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); - dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y)); - - B border(src.cols); - - if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1)) - cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, " - "try bigger image or another border extrapolation mode", __FILE__, __LINE__); - - rowFilterKernel<<>>(src, dst, anchor, border); - cudaSafeCall(cudaThreadSynchronize()); -} - - -template -void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, - const float* kernel, int ksize) -{ - typedef void (*Caller)(const DevMem2D_, PtrStepf, int); - - static const Caller callers[] = - { - 0, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller, - rowFilterCaller, rowFilterCaller - }; - - loadKernel(kernel, ksize); - callers[ksize](src, dst, anchor); -} - - -template -void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, - const float* kernel, int ksize, int brd_interp) -{ - typedef void (*Caller)(const DevMem2D_, PtrStepf, int, const float*, int); - - static const Caller callers[] = - { - rowFilterCaller >, - rowFilterCaller > - }; - - callers[brd_interp](src, dst, anchor, kernel, ksize); -} - - -template void rowFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); -template void rowFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); - - -template -__global__ void colFilterKernel(const DevMem2D_ src, PtrStepf dst, int anchor, B border) -{ - __shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3]; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - const int smem_step = blockDim.x; - - float* scol = smem + threadIdx.x; - - if (x < src.cols) - { - const T* src_col = src.data + x; - - scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col); - - scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col); - - scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col); - - __syncthreads(); - - if (y < src.rows) - { - scol += (threadIdx.y + blockDim.y - anchor)* smem_step; - - float sum = 0.f; - for(int i = 0; i < ksize; ++i) - sum += scol[i * smem_step] * ckernel[i]; - - dst.ptr(y)[x] = sum; - } - } -} - - -template -void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor) -{ - dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); - dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y)); - - B border(src.rows, src.step / src.elem_size); - - if (src.step - border.step * src.elem_size != 0) - cv::gpu::error("colFilterCaller: src step must be multiple of its element size", - __FILE__, __LINE__); - - if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1)) - cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, " - "try bigger image or another border extrapolation mode", __FILE__, __LINE__); - - colFilterKernel<<>>(src, dst, anchor, border); - cudaSafeCall(cudaThreadSynchronize()); -} - - -template -void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, - const float* kernel, int ksize) -{ - typedef void (*Caller)(const DevMem2D_, PtrStepf, int); - - static const Caller callers[] = - { - 0, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller, - colFilterCaller, colFilterCaller - }; - - loadKernel(kernel, ksize); - callers[ksize](src, dst, anchor); -} - - -template -void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, - const float* kernel, int ksize, int brd_interp) -{ - typedef void (*Caller)(const DevMem2D_, PtrStepf, int, const float*, int); - - static const Caller callers[] = - { - colFilterCaller >, - colFilterCaller > - }; - - callers[brd_interp](src, dst, anchor, kernel, ksize); -} - - -template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); -template void colFilterCaller(const DevMem2D_, PtrStepf, int, const float*, int, int); - -}}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 16d8e6b..c2e0fcf 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -986,22 +986,10 @@ namespace cv { namespace gpu { namespace imgproc { }}} -namespace cv { namespace gpu { namespace linear_filters { - - template - void rowFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, const float* kernel, - int ksize, int brd_interp); - - template - void colFilterCaller(const DevMem2D_ src, PtrStepf dst, int anchor, const float* kernel, - int ksize, int brd_interp); - -}}} - namespace { template - void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType) + void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType) { double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize; if (ksize < 0) @@ -1013,42 +1001,28 @@ namespace GpuMat tmp_buf(src.size(), CV_32F); Dx.create(src.size(), CV_32F); Dy.create(src.size(), CV_32F); - Mat kx, ky; - - getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F); - kx = kx.reshape(1, 1) * scale; - ky = ky.reshape(1, 1); - - linear_filters::rowFilterCaller( - src, tmp_buf, kx.cols >> 1, kx.ptr(0), kx.cols, - gpuBorderType); - linear_filters::colFilterCaller( - tmp_buf, Dx, ky.cols >> 1, ky.ptr(0), ky.cols, - gpuBorderType); - - getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F); - kx = kx.reshape(1, 1); - ky = ky.reshape(1, 1) * scale; - - linear_filters::rowFilterCaller( - src, tmp_buf, kx.cols >> 1, kx.ptr(0), kx.cols, - gpuBorderType); - - linear_filters::colFilterCaller( - tmp_buf, Dy, ky.cols >> 1, ky.ptr(0), ky.cols, - gpuBorderType); + if (ksize > 0) + { + Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType); + Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType); + } + else + { + Scharr(src, Dx, CV_32F, 1, 0, scale, borderType); + Scharr(src, Dy, CV_32F, 0, 1, scale, borderType); + } } - void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType) + void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType) { switch (src.type()) { case CV_8U: - extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + extractCovData(src, Dx, Dy, blockSize, ksize, borderType); break; case CV_32F: - extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + extractCovData(src, Dx, Dy, blockSize, ksize, borderType); break; default: CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix"); @@ -1090,7 +1064,7 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); GpuMat Dx, Dy; - extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + extractCovData(src, Dx, Dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType); } @@ -1104,7 +1078,7 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); GpuMat Dx, Dy; - extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType); + extractCovData(src, Dx, Dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType); }