From: Vladislav Vinogradov Date: Wed, 17 Apr 2013 14:14:35 +0000 (+0400) Subject: gpuimgproc module for image processing X-Git-Tag: submit/tizen/20180620.034203~3^2~3902^2~35 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e41aea0acf0d6d40b03a2f38499f135504101752;p=platform%2Fupstream%2Fopencv.git gpuimgproc module for image processing --- diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 55fc1007e7..ee66608a2d 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -4,7 +4,7 @@ endif() set(the_description "GPU-accelerated Computer Vision") -ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters OPTIONAL opencv_gpunvidia) +ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc OPTIONAL opencv_gpunvidia) ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda") diff --git a/modules/gpu/doc/gpu.rst b/modules/gpu/doc/gpu.rst index de52ceaba1..6c082ccd17 100644 --- a/modules/gpu/doc/gpu.rst +++ b/modules/gpu/doc/gpu.rst @@ -8,7 +8,6 @@ gpu. GPU-accelerated Computer Vision introduction initalization_and_information data_structures - image_processing object_detection feature_detection_and_description camera_calibration_and_3d_reconstruction diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst deleted file mode 100644 index 69e5003743..0000000000 --- a/modules/gpu/doc/image_processing.rst +++ /dev/null @@ -1,1065 +0,0 @@ -Image Processing -================ - -.. highlight:: cpp - - - -gpu::meanShiftFiltering ---------------------------- -Performs mean-shift filtering for each point of the source image. - -.. ocv:function:: void gpu::meanShiftFiltering( const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() ) - - :param src: Source image. Only ``CV_8UC4`` images are supported for now. - - :param dst: Destination image containing the color of mapped points. It has the same size and type as ``src`` . - - :param sp: Spatial window radius. - - :param sr: Color window radius. - - :param criteria: Termination criteria. See :ocv:class:`TermCriteria`. - -It maps each point of the source image into another point. As a result, you have a new color and new position of each point. - - - -gpu::meanShiftProc ----------------------- -Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images. - -.. ocv:function:: void gpu::meanShiftProc( const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria=TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream=Stream::Null() ) - - :param src: Source image. Only ``CV_8UC4`` images are supported for now. - - :param dstr: Destination image containing the color of mapped points. The size and type is the same as ``src`` . - - :param dstsp: Destination image containing the position of mapped points. The size is the same as ``src`` size. The type is ``CV_16SC2`` . - - :param sp: Spatial window radius. - - :param sr: Color window radius. - - :param criteria: Termination criteria. See :ocv:class:`TermCriteria`. - -.. seealso:: :ocv:func:`gpu::meanShiftFiltering` - - - -gpu::meanShiftSegmentation ------------------------------- -Performs a mean-shift segmentation of the source image and eliminates small segments. - -.. ocv:function:: void gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)) - - :param src: Source image. Only ``CV_8UC4`` images are supported for now. - - :param dst: Segmented image with the same size and type as ``src`` . - - :param sp: Spatial window radius. - - :param sr: Color window radius. - - :param minsize: Minimum segment size. Smaller segments are merged. - - :param criteria: Termination criteria. See :ocv:class:`TermCriteria`. - - - -gpu::integral ------------------ -Computes an integral image. - -.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null()) - - :param src: Source image. Only ``CV_8UC1`` images are supported for now. - - :param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`integral` - - - -gpu::sqrIntegral --------------------- -Computes a squared integral image. - -.. ocv:function:: void gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null()) - - :param src: Source image. Only ``CV_8UC1`` images are supported for now. - - :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into ``CV_64FC1`` . - - :param stream: Stream for the asynchronous version. - - - -gpu::columnSum ------------------- -Computes a vertical (column) sum. - -.. ocv:function:: void gpu::columnSum(const GpuMat& src, GpuMat& sum) - - :param src: Source image. Only ``CV_32FC1`` images are supported for now. - - :param sum: Destination image of the ``CV_32FC1`` type. - - - -gpu::cornerHarris ---------------------- -Computes the Harris cornerness criteria at each image pixel. - -.. ocv:function:: void gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101) - - :param src: Source image. Only ``CV_8UC1`` and ``CV_32FC1`` images are supported for now. - - :param dst: Destination image containing cornerness values. It has the same size as ``src`` and ``CV_32FC1`` type. - - :param blockSize: Neighborhood size. - - :param ksize: Aperture parameter for the Sobel operator. - - :param k: Harris detector free parameter. - - :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now. - -.. seealso:: :ocv:func:`cornerHarris` - - - -gpu::cornerMinEigenVal --------------------------- -Computes the minimum eigen value of a 2x2 derivative covariation matrix at each pixel (the cornerness criteria). - -.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101) - -.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101) - -.. ocv:function:: void gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null()) - - :param src: Source image. Only ``CV_8UC1`` and ``CV_32FC1`` images are supported for now. - - :param dst: Destination image containing cornerness values. The size is the same. The type is ``CV_32FC1`` . - - :param blockSize: Neighborhood size. - - :param ksize: Aperture parameter for the Sobel operator. - - :param borderType: Pixel extrapolation method. Only ``BORDER_REFLECT101`` and ``BORDER_REPLICATE`` are supported for now. - -.. seealso:: :ocv:func:`cornerMinEigenVal` - - - -gpu::mulSpectrums ---------------------- -Performs a per-element multiplication of two Fourier spectrums. - -.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() ) - - :param a: First spectrum. - - :param b: Second spectrum with the same size and type as ``a`` . - - :param c: Destination spectrum. - - :param flags: Mock parameter used for CPU/GPU interfaces similarity. - - :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication. - - Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. - -.. seealso:: :ocv:func:`mulSpectrums` - - - -gpu::mulAndScaleSpectrums ------------------------------ -Performs a per-element multiplication of two Fourier spectrums and scales the result. - -.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() ) - - :param a: First spectrum. - - :param b: Second spectrum with the same size and type as ``a`` . - - :param c: Destination spectrum. - - :param flags: Mock parameter used for CPU/GPU interfaces similarity. - - :param scale: Scale constant. - - :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication. - - Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. - -.. seealso:: :ocv:func:`mulSpectrums` - - - -gpu::dft ------------- -Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. - -.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() ) - - :param src: Source matrix (real or complex). - - :param dst: Destination matrix (real or complex). - - :param dft_size: Size of a discrete Fourier transform. - - :param flags: Optional flags: - - * **DFT_ROWS** transforms each individual row of the source matrix. - - * **DFT_SCALE** scales the result: divide it by the number of elements in the transform (obtained from ``dft_size`` ). - - * **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively). - - * **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real. - -Use to handle real matrices ( ``CV32FC1`` ) and complex matrices in the interleaved format ( ``CV32FC2`` ). - -The source matrix should be continuous, otherwise reallocation and data copying is performed. The function chooses an operation mode depending on the flags, size, and channel count of the source matrix: - - * If the source matrix is complex and the output is not specified as real, the destination matrix is complex and has the ``dft_size`` size and ``CV_32FC2`` type. The destination matrix contains a full result of the DFT (forward or inverse). - - * If the source matrix is complex and the output is specified as real, the function assumes that its input is the result of the forward transform (see the next item). The destination matrix has the ``dft_size`` size and ``CV_32FC1`` type. It contains the result of the inverse DFT. - - * If the source matrix is real (its type is ``CV_32FC1`` ), forward DFT is performed. The result of the DFT is packed into complex ( ``CV_32FC2`` ) matrix. So, the width of the destination matrix is ``dft_size.width / 2 + 1`` . But if the source is a single column, the height is reduced instead of the width. - -.. seealso:: :ocv:func:`dft` - - -gpu::ConvolveBuf ----------------- -.. ocv:struct:: gpu::ConvolveBuf - -Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. :: - - struct CV_EXPORTS ConvolveBuf - { - Size result_size; - Size block_size; - Size user_block_size; - Size dft_size; - int spect_len; - - GpuMat image_spect, templ_spect, result_spect; - GpuMat image_block, templ_block, result_data; - - void create(Size image_size, Size templ_size); - static Size estimateBlockSize(Size result_size, Size templ_size); - }; - -You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed. - -gpu::ConvolveBuf::create ------------------------- -.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size) - -Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments. - - -gpu::convolve ------------------ -Computes a convolution (or cross-correlation) of two images. - -.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false) - -.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() ) - - :param image: Source image. Only ``CV_32FC1`` images are supported for now. - - :param templ: Template image. The size is not greater than the ``image`` size. The type is the same as ``image`` . - - :param result: Result image. If ``image`` is *W x H* and ``templ`` is *w x h*, then ``result`` must be *W-w+1 x H-h+1*. - - :param ccorr: Flags to evaluate cross-correlation instead of convolution. - - :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::filter2D` - -gpu::MatchTemplateBuf ---------------------- -.. ocv:struct:: gpu::MatchTemplateBuf - -Class providing memory buffers for :ocv:func:`gpu::matchTemplate` function, plus it allows to adjust some specific parameters. :: - - struct CV_EXPORTS MatchTemplateBuf - { - Size user_block_size; - GpuMat imagef, templf; - std::vector images; - std::vector image_sums; - std::vector image_sqsums; - }; - -You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed. - -gpu::matchTemplate ----------------------- -Computes a proximity map for a raster template and an image where the template is searched for. - -.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null()) - -.. ocv:function:: void gpu::matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null()) - - :param image: Source image. ``CV_32F`` and ``CV_8U`` depth images (1..4 channels) are supported for now. - - :param templ: Template image with the size and type the same as ``image`` . - - :param result: Map containing comparison results ( ``CV_32FC1`` ). If ``image`` is *W x H* and ``templ`` is *w x h*, then ``result`` must be *W-w+1 x H-h+1*. - - :param method: Specifies the way to compare the template with the image. - - :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::MatchTemplateBuf`. - - :param stream: Stream for the asynchronous version. - - The following methods are supported for the ``CV_8U`` depth images for now: - - * ``CV_TM_SQDIFF`` - * ``CV_TM_SQDIFF_NORMED`` - * ``CV_TM_CCORR`` - * ``CV_TM_CCORR_NORMED`` - * ``CV_TM_CCOEFF`` - * ``CV_TM_CCOEFF_NORMED`` - - The following methods are supported for the ``CV_32F`` images for now: - - * ``CV_TM_SQDIFF`` - * ``CV_TM_CCORR`` - -.. seealso:: :ocv:func:`matchTemplate` - - -gpu::remap --------------- -Applies a generic geometrical transformation to an image. - -.. ocv:function:: void gpu::remap( const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() ) - - :param src: Source image. - - :param dst: Destination image with the size the same as ``xmap`` and the type the same as ``src`` . - - :param xmap: X values. Only ``CV_32FC1`` type is supported. - - :param ymap: Y values. Only ``CV_32FC1`` type is supported. - - :param interpolation: Interpolation method (see :ocv:func:`resize` ). ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now. - - :param borderMode: Pixel extrapolation method (see :ocv:func:`borderInterpolate` ). ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now. - - :param borderValue: Value used in case of a constant border. By default, it is 0. - - :param stream: Stream for the asynchronous version. - -The function transforms the source image using the specified map: - -.. math:: - - \texttt{dst} (x,y) = \texttt{src} (xmap(x,y), ymap(x,y)) - -Values of pixels with non-integer coordinates are computed using the bilinear interpolation. - -.. seealso:: :ocv:func:`remap` - - - -gpu::cvtColor ------------------ -Converts an image from one color space to another. - -.. ocv:function:: void gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null()) - - :param src: Source image with ``CV_8U`` , ``CV_16U`` , or ``CV_32F`` depth and 1, 3, or 4 channels. - - :param dst: Destination image with the same size and depth as ``src`` . - - :param code: Color space conversion code. For details, see :ocv:func:`cvtColor` . Conversion to/from Luv and Bayer color spaces is not supported. - - :param dcn: Number of channels in the destination image. If the parameter is 0, the number of the channels is derived automatically from ``src`` and the ``code`` . - - :param stream: Stream for the asynchronous version. - -3-channel color spaces (like ``HSV``, ``XYZ``, and so on) can be stored in a 4-channel image for better performance. - -.. seealso:: :ocv:func:`cvtColor` - - - -gpu::swapChannels ------------------ -Exchanges the color channels of an image in-place. - -.. ocv:function:: void gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null()) - - :param image: Source image. Supports only ``CV_8UC4`` type. - - :param dstOrder: Integer array describing how channel values are permutated. The n-th entry of the array contains the number of the channel that is stored in the n-th channel of the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR channel order. - - :param stream: Stream for the asynchronous version. - -The methods support arbitrary permutations of the original channels, including replication. - - - -gpu::resize ---------------- -Resizes an image. - -.. ocv:function:: void gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()) - - :param src: Source image. - - :param dst: Destination image with the same type as ``src`` . The size is ``dsize`` (when it is non-zero) or the size is computed from ``src.size()`` , ``fx`` , and ``fy`` . - - :param dsize: Destination image size. If it is zero, it is computed as: - - .. math:: - \texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))} - - Either ``dsize`` or both ``fx`` and ``fy`` must be non-zero. - - :param fx: Scale factor along the horizontal axis. If it is zero, it is computed as: - - .. math:: - - \texttt{(double)dsize.width/src.cols} - - :param fy: Scale factor along the vertical axis. If it is zero, it is computed as: - - .. math:: - - \texttt{(double)dsize.height/src.rows} - - :param interpolation: Interpolation method. ``INTER_NEAREST`` , ``INTER_LINEAR`` and ``INTER_CUBIC`` are supported for now. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`resize` - - - -gpu::warpAffine -------------------- -Applies an affine transformation to an image. - -.. ocv:function:: void gpu::warpAffine( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or ``CV_32F`` depth and 1, 3, or 4 channels are supported. - - :param dst: Destination image with the same type as ``src`` . The size is ``dsize`` . - - :param M: *2x3* transformation matrix. - - :param dsize: Size of the destination image. - - :param flags: Combination of interpolation methods (see :ocv:func:`resize`) and the optional flag ``WARP_INVERSE_MAP`` specifying that ``M`` is an inverse transformation ( ``dst=>src`` ). Only ``INTER_NEAREST`` , ``INTER_LINEAR`` , and ``INTER_CUBIC`` interpolation methods are supported. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`warpAffine` - - - -gpu::buildWarpAffineMaps ------------------------- -Builds transformation maps for affine transformation. - -.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null()) - - :param M: *2x3* transformation matrix. - - :param inverse: Flag specifying that ``M`` is an inverse transformation ( ``dst=>src`` ). - - :param dsize: Size of the destination image. - - :param xmap: X values with ``CV_32FC1`` type. - - :param ymap: Y values with ``CV_32FC1`` type. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::warpAffine` , :ocv:func:`gpu::remap` - - - -gpu::warpPerspective ------------------------- -Applies a perspective transformation to an image. - -.. ocv:function:: void gpu::warpPerspective( const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, Scalar borderValue=Scalar(), Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8U`` , ``CV_16U`` , ``CV_32S`` , or ``CV_32F`` depth and 1, 3, or 4 channels are supported. - - :param dst: Destination image with the same type as ``src`` . The size is ``dsize`` . - - :param M: *3x3* transformation matrix. - - :param dsize: Size of the destination image. - - :param flags: Combination of interpolation methods (see :ocv:func:`resize` ) and the optional flag ``WARP_INVERSE_MAP`` specifying that ``M`` is the inverse transformation ( ``dst => src`` ). Only ``INTER_NEAREST`` , ``INTER_LINEAR`` , and ``INTER_CUBIC`` interpolation methods are supported. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`warpPerspective` - - - -gpu::buildWarpPerspectiveMaps ------------------------------ -Builds transformation maps for perspective transformation. - -.. ocv:function:: void gpu::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null()) - - :param M: *3x3* transformation matrix. - - :param inverse: Flag specifying that ``M`` is an inverse transformation ( ``dst=>src`` ). - - :param dsize: Size of the destination image. - - :param xmap: X values with ``CV_32FC1`` type. - - :param ymap: Y values with ``CV_32FC1`` type. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::warpPerspective` , :ocv:func:`gpu::remap` - - - -gpu::rotate ---------------- -Rotates an image around the origin (0,0) and then shifts it. - -.. ocv:function:: void gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()) - - :param src: Source image. Supports 1, 3 or 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32F`` depth. - - :param dst: Destination image with the same type as ``src`` . The size is ``dsize`` . - - :param dsize: Size of the destination image. - - :param angle: Angle of rotation in degrees. - - :param xShift: Shift along the horizontal axis. - - :param yShift: Shift along the vertical axis. - - :param interpolation: Interpolation method. Only ``INTER_NEAREST`` , ``INTER_LINEAR`` , and ``INTER_CUBIC`` are supported. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::warpAffine` - - - -gpu::copyMakeBorder ------------------------ -Forms a border around an image. - -.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null()) - - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and ``CV_32FC1`` types are supported. - - :param dst: Destination image with the same type as ``src``. The size is ``Size(src.cols+left+right, src.rows+top+bottom)`` . - - :param top: - - :param bottom: - - :param left: - - :param right: Number of pixels in each direction from the source image rectangle to extrapolate. For example: ``top=1, bottom=1, left=1, right=1`` mean that 1 pixel-wide border needs to be built. - - :param borderType: Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now. - - :param value: Border value. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`copyMakeBorder` - - - -gpu::rectStdDev -------------------- -Computes a standard deviation of integral images. - -.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null()) - - :param src: Source image. Only the ``CV_32SC1`` type is supported. - - :param sqr: Squared source image. Only the ``CV_32FC1`` type is supported. - - :param dst: Destination image with the same type and size as ``src`` . - - :param rect: Rectangular window. - - :param stream: Stream for the asynchronous version. - - - -gpu::evenLevels -------------------- -Computes levels with even distribution. - -.. ocv:function:: void gpu::evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel) - - :param levels: Destination array. ``levels`` has 1 row, ``nLevels`` columns, and the ``CV_32SC1`` type. - - :param nLevels: Number of computed levels. ``nLevels`` must be at least 2. - - :param lowerLevel: Lower boundary value of the lowest level. - - :param upperLevel: Upper boundary value of the greatest level. - - - -gpu::histEven ------------------ -Calculates a histogram with evenly distributed bins. - -.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() ) - -.. ocv:function:: void gpu::histEven( const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8U``, ``CV_16U``, or ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately. - - :param hist: Destination histogram with one row, ``histSize`` columns, and the ``CV_32S`` type. - - :param histSize: Size of the histogram. - - :param lowerLevel: Lower boundary of lowest-level bin. - - :param upperLevel: Upper boundary of highest-level bin. - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - - :param stream: Stream for the asynchronous version. - - - -gpu::histRange ------------------- -Calculates a histogram with bins determined by the ``levels`` array. - -.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null()) - - :param src: Source image. ``CV_8U`` , ``CV_16U`` , or ``CV_16S`` depth and 1 or 4 channels are supported. For a four-channel image, all channels are processed separately. - - :param hist: Destination histogram with one row, ``(levels.cols-1)`` columns, and the ``CV_32SC1`` type. - - :param levels: Number of levels in the histogram. - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - - :param stream: Stream for the asynchronous version. - - - -gpu::calcHist ------------------- -Calculates histogram for one channel 8-bit image. - -.. ocv:function:: void gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null()) - - :param src: Source image. - - :param hist: Destination histogram with one row, 256 columns, and the ``CV_32SC1`` type. - - :param stream: Stream for the asynchronous version. - - - -gpu::equalizeHist ------------------- -Equalizes the histogram of a grayscale image. - -.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null()) - - :param src: Source image. - - :param dst: Destination image. - - :param hist: Destination histogram with one row, 256 columns, and the ``CV_32SC1`` type. - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`equalizeHist` - - - -gpu::buildWarpPlaneMaps ------------------------ -Builds plane warping maps. - -.. ocv:function:: void gpu::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, const Mat & T, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() ) - - :param stream: Stream for the asynchronous version. - - - -gpu::buildWarpCylindricalMaps ------------------------------ -Builds cylindrical warping maps. - -.. ocv:function:: void gpu::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() ) - - :param stream: Stream for the asynchronous version. - - - -gpu::buildWarpSphericalMaps ---------------------------- -Builds spherical warping maps. - -.. ocv:function:: void gpu::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat & K, const Mat& R, float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream=Stream::Null() ) - - :param stream: Stream for the asynchronous version. - - - -gpu::pyrDown -------------------- -Smoothes an image and downsamples it. - -.. ocv:function:: void gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source image. - - :param dst: Destination image. Will have ``Size((src.cols+1)/2, (src.rows+1)/2)`` size and the same type as ``src`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`pyrDown` - - - -gpu::pyrUp -------------------- -Upsamples an image and then smoothes it. - -.. ocv:function:: void gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source image. - - :param dst: Destination image. Will have ``Size(src.cols*2, src.rows*2)`` size and the same type as ``src`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`pyrUp` - - - -gpu::blendLinear -------------------- -Performs linear blending of two images. - -.. ocv:function:: void gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, GpuMat& result, Stream& stream = Stream::Null()) - - :param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth. - - :param img2: Second image. Must have the same size and the same type as ``img1`` . - - :param weights1: Weights for first image. Must have tha same size as ``img1`` . Supports only ``CV_32F`` type. - - :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type. - - :param result: Destination image. - - :param stream: Stream for the asynchronous version. - - -gpu::bilateralFilter --------------------- -Performs bilateral filtering of passed image - -.. ocv:function:: void gpu::bilateralFilter( const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode=BORDER_DEFAULT, Stream& stream=Stream::Null() ) - - :param src: Source image. Supports only (channles != 2 && depth() != CV_8S && depth() != CV_32S && depth() != CV_64F). - - :param dst: Destination imagwe. - - :param kernel_size: Kernel window size. - - :param sigma_color: Filter sigma in the color space. - - :param sigma_spatial: Filter sigma in the coordinate space. - - :param borderMode: Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now. - - :param stream: Stream for the asynchronous version. - -.. seealso:: - - :ocv:func:`bilateralFilter`, - - -gpu::nonLocalMeans -------------------- -Performs pure non local means denoising without any simplification, and thus it is not fast. - -.. ocv:function:: void gpu::nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null()) - - :param src: Source image. Supports only CV_8UC1, CV_8UC2 and CV_8UC3. - - :param dst: Destination image. - - :param h: Filter sigma regulating filter strength for color. - - :param search_window: Size of search window. - - :param block_size: Size of block used for computing weights. - - :param borderMode: Border type. See :ocv:func:`borderInterpolate` for details. ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , ``BORDER_CONSTANT`` , ``BORDER_REFLECT`` and ``BORDER_WRAP`` are supported for now. - - :param stream: Stream for the asynchronous version. - -.. seealso:: - - :ocv:func:`fastNlMeansDenoising` - -gpu::FastNonLocalMeansDenoising -------------------------------- -.. ocv:class:: gpu::FastNonLocalMeansDenoising - - :: - - class FastNonLocalMeansDenoising - { - public: - //! Simple method, recommended for grayscale images (though it supports multichannel images) - void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()) - //! Processes luminance and color components separatelly - void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()) - }; - -The class implements fast approximate Non Local Means Denoising algorithm. - -gpu::FastNonLocalMeansDenoising::simpleMethod() ------------------------------------------------ -Perform image denoising using Non-local Means Denoising algorithm http://www.ipol.im/pub/algo/bcm_non_local_means_denoising with several computational optimizations. Noise expected to be a gaussian white noise - -.. ocv:function:: void gpu::FastNonLocalMeansDenoising::simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()) - - :param src: Input 8-bit 1-channel, 2-channel or 3-channel image. - - :param dst: Output image with the same size and type as ``src`` . - - :param h: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise - - :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels - - :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels - - :param stream: Stream for the asynchronous invocations. - -This function expected to be applied to grayscale images. For colored images look at ``FastNonLocalMeansDenoising::labMethod``. - -.. seealso:: - - :ocv:func:`fastNlMeansDenoising` - -gpu::FastNonLocalMeansDenoising::labMethod() --------------------------------------------- -Modification of ``FastNonLocalMeansDenoising::simpleMethod`` for color images - -.. ocv:function:: void gpu::FastNonLocalMeansDenoising::labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()) - - :param src: Input 8-bit 3-channel image. - - :param dst: Output image with the same size and type as ``src`` . - - :param h_luminance: Parameter regulating filter strength. Big h value perfectly removes noise but also removes image details, smaller h value preserves details but also preserves some noise - - :param float: The same as h but for color components. For most images value equals 10 will be enought to remove colored noise and do not distort colors - - :param search_window: Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd. Affect performance linearly: greater search_window - greater denoising time. Recommended value 21 pixels - - :param block_size: Size in pixels of the template patch that is used to compute weights. Should be odd. Recommended value 7 pixels - - :param stream: Stream for the asynchronous invocations. - -The function converts image to CIELAB colorspace and then separately denoise L and AB components with given h parameters using ``FastNonLocalMeansDenoising::simpleMethod`` function. - -.. seealso:: - - :ocv:func:`fastNlMeansDenoisingColored` - -gpu::alphaComp -------------------- -Composites two images using alpha opacity values contained in each image. - -.. ocv:function:: void gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null()) - - :param img1: First image. Supports ``CV_8UC4`` , ``CV_16UC4`` , ``CV_32SC4`` and ``CV_32FC4`` types. - - :param img2: Second image. Must have the same size and the same type as ``img1`` . - - :param dst: Destination image. - - :param alpha_op: Flag specifying the alpha-blending operation: - - * **ALPHA_OVER** - * **ALPHA_IN** - * **ALPHA_OUT** - * **ALPHA_ATOP** - * **ALPHA_XOR** - * **ALPHA_PLUS** - * **ALPHA_OVER_PREMUL** - * **ALPHA_IN_PREMUL** - * **ALPHA_OUT_PREMUL** - * **ALPHA_ATOP_PREMUL** - * **ALPHA_XOR_PREMUL** - * **ALPHA_PLUS_PREMUL** - * **ALPHA_PREMUL** - - :param stream: Stream for the asynchronous version. - - - -gpu::Canny -------------------- -Finds edges in an image using the [Canny86]_ algorithm. - -.. ocv:function:: void gpu::Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) - -.. ocv:function:: void gpu::Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) - -.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false) - -.. ocv:function:: void gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false) - - :param image: Single-channel 8-bit input image. - - :param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type. - - :param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type. - - :param edges: Output edge map. It has the same size and type as ``image`` . - - :param low_thresh: First threshold for the hysteresis procedure. - - :param high_thresh: Second threshold for the hysteresis procedure. - - :param apperture_size: Aperture size for the :ocv:func:`Sobel` operator. - - :param L2gradient: Flag indicating whether a more accurate :math:`L_2` norm :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}` should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default :math:`L_1` norm :math:`=|dI/dx|+|dI/dy|` is enough ( ``L2gradient=false`` ). - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - -.. seealso:: :ocv:func:`Canny` - - - -gpu::HoughLines ---------------- -Finds lines in a binary image using the classical Hough transform. - -.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) - -.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) - - :param src: 8-bit, single-channel binary source image. - - :param lines: Output vector of lines. Each line is represented by a two-element vector :math:`(\rho, \theta)` . :math:`\rho` is the distance from the coordinate origin :math:`(0,0)` (top-left corner of the image). :math:`\theta` is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ). - - :param rho: Distance resolution of the accumulator in pixels. - - :param theta: Angle resolution of the accumulator in radians. - - :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). - - :param doSort: Performs lines sort by votes. - - :param maxLines: Maximum number of output lines. - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - -.. seealso:: :ocv:func:`HoughLines` - - - -gpu::HoughLinesDownload ------------------------ -Downloads results from :ocv:func:`gpu::HoughLines` to host memory. - -.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) - - :param d_lines: Result of :ocv:func:`gpu::HoughLines` . - - :param h_lines: Output host array. - - :param h_votes: Optional output array for line's votes. - -.. seealso:: :ocv:func:`gpu::HoughLines` - - - -gpu::HoughCircles ------------------ -Finds circles in a grayscale image using the Hough transform. - -.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096) - -.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096) - - :param src: 8-bit, single-channel grayscale input image. - - :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector :math:`(x, y, radius)` . - - :param method: Detection method to use. Currently, the only implemented method is ``CV_HOUGH_GRADIENT`` , which is basically *21HT* , described in [Yuen90]_. - - :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if ``dp=1`` , the accumulator has the same resolution as the input image. If ``dp=2`` , the accumulator has half as big width and height. - - :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed. - - :param cannyThreshold: The higher threshold of the two passed to the :ocv:func:`gpu::Canny` edge detector (the lower one is twice smaller). - - :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. - - :param minRadius: Minimum circle radius. - - :param maxRadius: Maximum circle radius. - - :param maxCircles: Maximum number of output circles. - - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). - -.. seealso:: :ocv:func:`HoughCircles` - - - -gpu::HoughCirclesDownload -------------------------- -Downloads results from :ocv:func:`gpu::HoughCircles` to host memory. - -.. ocv:function:: void gpu::HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles) - - :param d_circles: Result of :ocv:func:`gpu::HoughCircles` . - - :param h_circles: Output host array. - -.. seealso:: :ocv:func:`gpu::HoughCircles` diff --git a/modules/gpu/include/opencv2/gpu.hpp b/modules/gpu/include/opencv2/gpu.hpp index 19fd7c93e9..7397321232 100644 --- a/modules/gpu/include/opencv2/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu.hpp @@ -52,6 +52,8 @@ #include "opencv2/core/gpumat.hpp" #include "opencv2/gpuarithm.hpp" #include "opencv2/gpufilters.hpp" +#include "opencv2/gpuimgproc.hpp" + #include "opencv2/imgproc.hpp" #include "opencv2/objdetect.hpp" #include "opencv2/features2d.hpp" @@ -60,280 +62,7 @@ namespace cv { namespace gpu { ////////////////////////////// Image processing ////////////////////////////// -enum { ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, - ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL}; - -//! Composite two images using alpha opacity values contained in each image -//! Supports CV_8UC4, CV_16UC4, CV_32SC4 and CV_32FC4 types -CV_EXPORTS void alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream = Stream::Null()); - -//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] -//! supports only CV_32FC1 map type -CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, - int interpolation, int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), - Stream& stream = Stream::Null()); - -//! Does mean shift filtering on GPU. -CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), - Stream& stream = Stream::Null()); - -//! Does mean shift procedure on GPU. -CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), - Stream& stream = Stream::Null()); - -//! Does mean shift segmentation with elimination of small regions. -CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); - -//! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV. -//! Supported types of input disparity: CV_8U, CV_16S. -//! Output disparity has CV_8UC4 type in BGRA format (alpha = 255). -CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null()); - -//! Reprojects disparity image to 3D space. -//! Supports CV_8U and CV_16S types of input disparity. -//! The output is a 3- or 4-channel floating-point matrix. -//! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map. -//! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify. -CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null()); - -//! converts image from one color space to another -CV_EXPORTS void cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn = 0, Stream& stream = Stream::Null()); - -enum -{ - // Bayer Demosaicing (Malvar, He, and Cutler) - COLOR_BayerBG2BGR_MHT = 256, - COLOR_BayerGB2BGR_MHT = 257, - COLOR_BayerRG2BGR_MHT = 258, - COLOR_BayerGR2BGR_MHT = 259, - - COLOR_BayerBG2RGB_MHT = COLOR_BayerRG2BGR_MHT, - COLOR_BayerGB2RGB_MHT = COLOR_BayerGR2BGR_MHT, - COLOR_BayerRG2RGB_MHT = COLOR_BayerBG2BGR_MHT, - COLOR_BayerGR2RGB_MHT = COLOR_BayerGB2BGR_MHT, - - COLOR_BayerBG2GRAY_MHT = 260, - COLOR_BayerGB2GRAY_MHT = 261, - COLOR_BayerRG2GRAY_MHT = 262, - COLOR_BayerGR2GRAY_MHT = 263 -}; -CV_EXPORTS void demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn = -1, Stream& stream = Stream::Null()); - -//! swap channels -//! dstOrder - Integer array describing how channel values are permutated. The n-th entry -//! of the array contains the number of the channel that is stored in the n-th channel of -//! the output image. E.g. Given an RGBA image, aDstOrder = [3,2,1,0] converts this to ABGR -//! channel order. -CV_EXPORTS void swapChannels(GpuMat& image, const int dstOrder[4], Stream& stream = Stream::Null()); - -//! Routines for correcting image color gamma -CV_EXPORTS void gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward = true, Stream& stream = Stream::Null()); - -//! resizes the image -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA -CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); - -//! warps the image using affine transformation -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC -CV_EXPORTS void warpAffine(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, - int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null()); - -CV_EXPORTS void buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null()); - -//! warps the image using perspective transformation -//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC -CV_EXPORTS void warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size dsize, int flags = INTER_LINEAR, - int borderMode = BORDER_CONSTANT, Scalar borderValue = Scalar(), Stream& stream = Stream::Null()); - -CV_EXPORTS void buildWarpPerspectiveMaps(const Mat& M, bool inverse, Size dsize, GpuMat& xmap, GpuMat& ymap, Stream& stream = Stream::Null()); - -//! builds plane warping maps -CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, float scale, - GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null()); - -//! builds cylindrical warping maps -CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale, - GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null()); - -//! builds spherical warping maps -CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale, - GpuMat& map_x, GpuMat& map_y, Stream& stream = Stream::Null()); - -//! rotates an image around the origin (0,0) and then shifts it -//! supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC -//! supports 1, 3 or 4 channels images with CV_8U, CV_16U or CV_32F depth -CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, - int interpolation = INTER_LINEAR, Stream& stream = Stream::Null()); - -//! computes Harris cornerness criteria at each image pixel -CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101); -CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101); -CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, - int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null()); - -//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria -CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101); -CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType=BORDER_REFLECT101); -CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, - int borderType=BORDER_REFLECT101, Stream& stream = Stream::Null()); - -struct CV_EXPORTS MatchTemplateBuf -{ - Size user_block_size; - GpuMat imagef, templf; - std::vector images; - std::vector image_sums; - std::vector image_sqsums; -}; - -//! computes the proximity map for the raster template and the image where the template is searched for -CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, Stream &stream = Stream::Null()); - -//! computes the proximity map for the raster template and the image where the template is searched for -CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method, MatchTemplateBuf &buf, Stream& stream = Stream::Null()); - -//! smoothes the source image and downsamples it -CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! upsamples the source image and then smoothes it -CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! performs linear blending of two images -//! to avoid accuracy errors sum of weigths shouldn't be very close to zero -CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, - GpuMat& result, Stream& stream = Stream::Null()); - -//! Performa bilateral filtering of passsed image -CV_EXPORTS void bilateralFilter(const GpuMat& src, GpuMat& dst, int kernel_size, float sigma_color, float sigma_spatial, - int borderMode = BORDER_DEFAULT, Stream& stream = Stream::Null()); - -//! Brute force non-local means algorith (slow but universal) -CV_EXPORTS void nonLocalMeans(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, int borderMode = BORDER_DEFAULT, Stream& s = Stream::Null()); - -//! Fast (but approximate)version of non-local means algorith similar to CPU function (running sums technique) -class CV_EXPORTS FastNonLocalMeansDenoising -{ -public: - //! Simple method, recommended for grayscale images (though it supports multichannel images) - void simpleMethod(const GpuMat& src, GpuMat& dst, float h, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); - - //! Processes luminance and color components separatelly - void labMethod(const GpuMat& src, GpuMat& dst, float h_luminance, float h_color, int search_window = 21, int block_size = 7, Stream& s = Stream::Null()); - -private: - - GpuMat buffer, extended_src_buffer; - GpuMat lab, l, ab; -}; - -struct CV_EXPORTS CannyBuf -{ - void create(const Size& image_size, int apperture_size = 3); - void release(); - - GpuMat dx, dy; - GpuMat mag; - GpuMat map; - GpuMat st1, st2; - Ptr filterDX, filterDY; -}; - -CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); -CV_EXPORTS void Canny(const GpuMat& image, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); -CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); -CV_EXPORTS void Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); - -class CV_EXPORTS ImagePyramid -{ -public: - inline ImagePyramid() : nLayers_(0) {} - inline ImagePyramid(const GpuMat& img, int nLayers, Stream& stream = Stream::Null()) - { - build(img, nLayers, stream); - } - - void build(const GpuMat& img, int nLayers, Stream& stream = Stream::Null()); - - void getLayer(GpuMat& outImg, Size outRoi, Stream& stream = Stream::Null()) const; - - inline void release() - { - layer0_.release(); - pyramid_.clear(); - nLayers_ = 0; - } - -private: - GpuMat layer0_; - std::vector pyramid_; - int nLayers_; -}; - -//! HoughLines - -struct HoughLinesBuf -{ - GpuMat accum; - GpuMat list; -}; - -CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); -CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); -CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); - -//! HoughLinesP - -//! finds line segments in the black-n-white image using probabalistic Hough transform -CV_EXPORTS void HoughLinesP(const GpuMat& image, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines = 4096); - -//! HoughCircles - -struct HoughCirclesBuf -{ - GpuMat edges; - GpuMat accum; - GpuMat list; - CannyBuf cannyBuf; -}; - -CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); -CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); -CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles); - -//! finds arbitrary template in the grayscale image using Generalized Hough Transform -//! Ballard, D.H. (1981). Generalizing the Hough transform to detect arbitrary shapes. Pattern Recognition 13 (2): 111-122. -//! Guil, N., González-Linares, J.M. and Zapata, E.L. (1999). Bidimensional shape detection using an invariant approach. Pattern Recognition 32 (6): 1025-1038. -class CV_EXPORTS GeneralizedHough_GPU : public cv::Algorithm -{ -public: - static Ptr create(int method); - - virtual ~GeneralizedHough_GPU(); - //! set template to search - void setTemplate(const GpuMat& templ, int cannyThreshold = 100, Point templCenter = Point(-1, -1)); - void setTemplate(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter = Point(-1, -1)); - - //! find template on image - void detect(const GpuMat& image, GpuMat& positions, int cannyThreshold = 100); - void detect(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions); - - void download(const GpuMat& d_positions, OutputArray h_positions, OutputArray h_votes = noArray()); - - void release(); - -protected: - virtual void setTemplateImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Point templCenter) = 0; - virtual void detectImpl(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, GpuMat& positions) = 0; - virtual void releaseImpl() = 0; - -private: - GpuMat edges_; - CannyBuf cannyBuf_; -}; ///////////////////////////// Calibration 3D ////////////////////////////////// @@ -351,68 +80,11 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c //////////////////////////////// Image Labeling //////////////////////////////// -//!performs labeling via graph cuts of a 2D regular 4-connected graph. -CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, - GpuMat& buf, Stream& stream = Stream::Null()); -//!performs labeling via graph cuts of a 2D regular 8-connected graph. -CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& topLeft, GpuMat& topRight, - GpuMat& bottom, GpuMat& bottomLeft, GpuMat& bottomRight, - GpuMat& labels, - GpuMat& buf, Stream& stream = Stream::Null()); - -//! compute mask for Generalized Flood fill componetns labeling. -CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null()); - -//! performs connected componnents labeling. -CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, int flags = 0, Stream& stream = Stream::Null()); ////////////////////////////////// Histograms ////////////////////////////////// -//! Compute levels with even distribution. levels will have 1 row and nLevels cols and CV_32SC1 type. -CV_EXPORTS void evenLevels(GpuMat& levels, int nLevels, int lowerLevel, int upperLevel); -//! Calculates histogram with evenly distributed bins for signle channel source. -//! Supports CV_8UC1, CV_16UC1 and CV_16SC1 source types. -//! Output hist will have one row and histSize cols and CV_32SC1 type. -CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); -CV_EXPORTS void histEven(const GpuMat& src, GpuMat& hist, GpuMat& buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); -//! Calculates histogram with evenly distributed bins for four-channel source. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4 and CV_16SC4 source types. -//! Output hist[i] will have one row and histSize[i] cols and CV_32SC1 type. -CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); -CV_EXPORTS void histEven(const GpuMat& src, GpuMat hist[4], GpuMat& buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); -//! Calculates histogram with bins determined by levels array. -//! levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! Supports CV_8UC1, CV_16UC1, CV_16SC1 and CV_32FC1 source types. -//! Output hist will have one row and (levels.cols-1) cols and CV_32SC1 type. -CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, Stream& stream = Stream::Null()); -CV_EXPORTS void histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream = Stream::Null()); -//! Calculates histogram with bins determined by levels array. -//! All levels must have one row and CV_32SC1 type if source has integer type or CV_32FC1 otherwise. -//! All channels of source are processed separately. -//! Supports CV_8UC4, CV_16UC4, CV_16SC4 and CV_32FC4 source types. -//! Output hist[i] will have one row and (levels[i].cols-1) cols and CV_32SC1 type. -CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()); -CV_EXPORTS void histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], GpuMat& buf, Stream& stream = Stream::Null()); - -//! Calculates histogram for 8u one channel image -//! Output hist will have one row, 256 cols and CV32SC1 type. -CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, Stream& stream = Stream::Null()); -CV_EXPORTS void calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null()); - -//! normalizes the grayscale image brightness and contrast by normalizing its histogram -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream& stream = Stream::Null()); -CV_EXPORTS void equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& stream = Stream::Null()); - -class CV_EXPORTS CLAHE : public cv::CLAHE -{ -public: - using cv::CLAHE::apply; - virtual void apply(InputArray src, OutputArray dst, Stream& stream) = 0; -}; -CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + //////////////////////////////// StereoBM_GPU //////////////////////////////// @@ -1097,52 +769,7 @@ public: GpuMat buf; }; -class CV_EXPORTS GoodFeaturesToTrackDetector_GPU -{ -public: - explicit GoodFeaturesToTrackDetector_GPU(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, - int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); - - //! return 1 rows matrix with CV_32FC2 type - void operator ()(const GpuMat& image, GpuMat& corners, const GpuMat& mask = GpuMat()); - int maxCorners; - double qualityLevel; - double minDistance; - - int blockSize; - bool useHarrisDetector; - double harrisK; - - void releaseMemory() - { - Dx_.release(); - Dy_.release(); - buf_.release(); - eig_.release(); - minMaxbuf_.release(); - tmpCorners_.release(); - } - -private: - GpuMat Dx_; - GpuMat Dy_; - GpuMat buf_; - GpuMat eig_; - GpuMat minMaxbuf_; - GpuMat tmpCorners_; -}; - -inline GoodFeaturesToTrackDetector_GPU::GoodFeaturesToTrackDetector_GPU(int maxCorners_, double qualityLevel_, double minDistance_, - int blockSize_, bool useHarrisDetector_, double harrisK_) -{ - maxCorners = maxCorners_; - qualityLevel = qualityLevel_; - minDistance = minDistance_; - blockSize = blockSize_; - useHarrisDetector = useHarrisDetector_; - harrisK = harrisK_; -} class CV_EXPORTS PyrLKOpticalFlow diff --git a/modules/gpu/perf/perf_denoising.cpp b/modules/gpu/perf/perf_denoising.cpp deleted file mode 100644 index 1e33601d60..0000000000 --- a/modules/gpu/perf/perf_denoising.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -using namespace std; -using namespace testing; -using namespace perf; - -#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p) - -////////////////////////////////////////////////////////////////////// -// BilateralFilter - -DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int); - -PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter, - Combine(GPU_DENOISING_IMAGE_SIZES, - Values(CV_8U, CV_32F), - GPU_CHANNELS_1_3, - Values(3, 5, 9))) -{ - declare.time(60.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int kernel_size = GET_PARAM(3); - - const float sigma_color = 7; - const float sigma_spatial = 5; - const int borderMode = cv::BORDER_REFLECT101; - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// nonLocalMeans - -DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int); - -PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans, - Combine(GPU_DENOISING_IMAGE_SIZES, - Values(CV_8U), - GPU_CHANNELS_1_3, - Values(21), - Values(5))) -{ - declare.time(600.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int search_widow_size = GET_PARAM(3); - const int block_size = GET_PARAM(4); - - const float h = 10; - const int borderMode = cv::BORDER_REFLECT101; - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::nonLocalMeans(d_src, dst, h, search_widow_size, block_size, borderMode); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - - -////////////////////////////////////////////////////////////////////// -// fastNonLocalMeans - -DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int); - -PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans, - Combine(GPU_DENOISING_IMAGE_SIZES, - Values(CV_8U), - GPU_CHANNELS_1_3, - Values(21), - Values(7))) -{ - declare.time(60.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int search_widow_size = GET_PARAM(2); - const int block_size = GET_PARAM(3); - - const float h = 10; - const int type = CV_MAKE_TYPE(depth, 1); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - cv::gpu::FastNonLocalMeansDenoising fnlmd; - - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() fnlmd.simpleMethod(d_src, dst, h, search_widow_size, block_size); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::fastNlMeansDenoising(src, dst, h, block_size, search_widow_size); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// fastNonLocalMeans (colored) - -DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int); - -PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored, - Combine(GPU_DENOISING_IMAGE_SIZES, - Values(CV_8U), - Values(21), - Values(7))) -{ - declare.time(60.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int search_widow_size = GET_PARAM(2); - const int block_size = GET_PARAM(3); - - const float h = 10; - const int type = CV_MAKE_TYPE(depth, 3); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - cv::gpu::FastNonLocalMeansDenoising fnlmd; - - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() fnlmd.labMethod(d_src, dst, h, h, search_widow_size, block_size); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::fastNlMeansDenoisingColored(src, dst, h, h, block_size, search_widow_size); - - CPU_SANITY_CHECK(dst); - } -} diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp deleted file mode 100644 index 5f8e9b297f..0000000000 --- a/modules/gpu/perf/perf_imgproc.cpp +++ /dev/null @@ -1,1631 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -using namespace std; -using namespace testing; -using namespace perf; - -////////////////////////////////////////////////////////////////////// -// Remap - -enum { HALF_SIZE=0, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH }; -CV_ENUM(RemapMode, HALF_SIZE, UPSIDE_DOWN, REFLECTION_X, REFLECTION_BOTH); - -void generateMap(cv::Mat& map_x, cv::Mat& map_y, int remapMode) -{ - for (int j = 0; j < map_x.rows; ++j) - { - for (int i = 0; i < map_x.cols; ++i) - { - switch (remapMode) - { - case HALF_SIZE: - if (i > map_x.cols*0.25 && i < map_x.cols*0.75 && j > map_x.rows*0.25 && j < map_x.rows*0.75) - { - map_x.at(j,i) = 2.f * (i - map_x.cols * 0.25f) + 0.5f; - map_y.at(j,i) = 2.f * (j - map_x.rows * 0.25f) + 0.5f; - } - else - { - map_x.at(j,i) = 0.f; - map_y.at(j,i) = 0.f; - } - break; - case UPSIDE_DOWN: - map_x.at(j,i) = static_cast(i); - map_y.at(j,i) = static_cast(map_x.rows - j); - break; - case REFLECTION_X: - map_x.at(j,i) = static_cast(map_x.cols - i); - map_y.at(j,i) = static_cast(j); - break; - case REFLECTION_BOTH: - map_x.at(j,i) = static_cast(map_x.cols - i); - map_y.at(j,i) = static_cast(map_x.rows - j); - break; - } // end of switch - } - } -} - -DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border_Mode, cv::Size, MatDepth, MatCn, Interpolation, BorderMode, RemapMode); - -PERF_TEST_P(Sz_Depth_Cn_Inter_Border_Mode, ImgProc_Remap, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES, - RemapMode::all())) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = GET_PARAM(3); - const int borderMode = GET_PARAM(4); - const int remapMode = GET_PARAM(5); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - cv::Mat xmap(size, CV_32FC1); - cv::Mat ymap(size, CV_32FC1); - generateMap(xmap, ymap, remapMode); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - const cv::gpu::GpuMat d_xmap(xmap); - const cv::gpu::GpuMat d_ymap(ymap); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::remap(d_src, dst, d_xmap, d_ymap, interpolation, borderMode); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// Resize - -DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Scale, cv::Size, MatDepth, MatCn, Interpolation, double); - -PERF_TEST_P(Sz_Depth_Cn_Inter_Scale, ImgProc_Resize, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - Values(0.5, 0.3, 2.0))) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = GET_PARAM(3); - const double f = GET_PARAM(4); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation); - - GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// ResizeArea - -DEF_PARAM_TEST(Sz_Depth_Cn_Scale, cv::Size, MatDepth, MatCn, double); - -PERF_TEST_P(Sz_Depth_Cn_Scale, ImgProc_ResizeArea, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(0.2, 0.1, 0.05))) -{ - declare.time(1.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = cv::INTER_AREA; - const double f = GET_PARAM(3); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::resize(d_src, dst, cv::Size(), f, f, interpolation); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::resize(src, dst, cv::Size(), f, f, interpolation); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// WarpAffine - -DEF_PARAM_TEST(Sz_Depth_Cn_Inter_Border, cv::Size, MatDepth, MatCn, Interpolation, BorderMode); - -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpAffine, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES)) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = GET_PARAM(3); - const int borderMode = GET_PARAM(4); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - const double aplha = CV_PI / 4; - const double mat[2 * 3] = - { - std::cos(aplha), -std::sin(aplha), src.cols / 2, - std::sin(aplha), std::cos(aplha), 0 - }; - const cv::Mat M(2, 3, CV_64F, (void*) mat); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::warpAffine(d_src, dst, M, size, interpolation, borderMode); - - GPU_SANITY_CHECK(dst, 1); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::warpAffine(src, dst, M, size, interpolation, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// WarpPerspective - -PERF_TEST_P(Sz_Depth_Cn_Inter_Border, ImgProc_WarpPerspective, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), - ALL_BORDER_MODES)) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = GET_PARAM(3); - const int borderMode = GET_PARAM(4); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - const double aplha = CV_PI / 4; - double mat[3][3] = { {std::cos(aplha), -std::sin(aplha), src.cols / 2}, - {std::sin(aplha), std::cos(aplha), 0}, - {0.0, 0.0, 1.0}}; - const cv::Mat M(3, 3, CV_64F, (void*) mat); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::warpPerspective(d_src, dst, M, size, interpolation, borderMode); - - GPU_SANITY_CHECK(dst, 1); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::warpPerspective(src, dst, M, size, interpolation, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// Threshold - -CV_ENUM(ThreshOp, THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV) - -DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); - -PERF_TEST_P(Sz_Depth_Op, ImgProc_Threshold, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F, CV_64F), - ThreshOp::all())) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int threshOp = GET_PARAM(2); - - cv::Mat src(size, depth); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::threshold(d_src, dst, 100.0, 255.0, threshOp); - - GPU_SANITY_CHECK(dst, 1e-10); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// HistEvenC1 - -PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC1, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_16S))) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - - cv::Mat src(size, depth); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::histEven(d_src, dst, d_buf, 30, 0, 180); - - GPU_SANITY_CHECK(dst); - } - else - { - const int hbins = 30; - const float hranges[] = {0.0f, 180.0f}; - const int histSize[] = {hbins}; - const float* ranges[] = {hranges}; - const int channels[] = {0}; - - cv::Mat dst; - - TEST_CYCLE() cv::calcHist(&src, 1, channels, cv::Mat(), dst, 1, histSize, ranges); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// HistEvenC4 - -PERF_TEST_P(Sz_Depth, ImgProc_HistEvenC4, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_16S))) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - - cv::Mat src(size, CV_MAKE_TYPE(depth, 4)); - declare.in(src, WARMUP_RNG); - - int histSize[] = {30, 30, 30, 30}; - int lowerLevel[] = {0, 0, 0, 0}; - int upperLevel[] = {180, 180, 180, 180}; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat d_hist[4]; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel); - - cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3; - d_hist[0].download(cpu_hist0); - d_hist[1].download(cpu_hist1); - d_hist[2].download(cpu_hist2); - d_hist[3].download(cpu_hist3); - SANITY_CHECK(cpu_hist0); - SANITY_CHECK(cpu_hist1); - SANITY_CHECK(cpu_hist2); - SANITY_CHECK(cpu_hist3); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// CalcHist - -PERF_TEST_P(Sz, ImgProc_CalcHist, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::calcHist(d_src, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// EqualizeHist - -PERF_TEST_P(Sz, ImgProc_EqualizeHist, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_hist; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::equalizeHist(d_src, dst, d_hist, d_buf); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::equalizeHist(src, dst); - - CPU_SANITY_CHECK(dst); - } -} - -DEF_PARAM_TEST(Sz_ClipLimit, cv::Size, double); - -PERF_TEST_P(Sz_ClipLimit, ImgProc_CLAHE, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(0.0, 40.0))) -{ - const cv::Size size = GET_PARAM(0); - const double clipLimit = GET_PARAM(1); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - cv::Ptr clahe = cv::gpu::createCLAHE(clipLimit); - cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() clahe->apply(d_src, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Ptr clahe = cv::createCLAHE(clipLimit); - cv::Mat dst; - - TEST_CYCLE() clahe->apply(src, dst); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// Canny - -DEF_PARAM_TEST(Image_AppertureSz_L2gradient, string, int, bool); - -PERF_TEST_P(Image_AppertureSz_L2gradient, ImgProc_Canny, - Combine(Values("perf/800x600.png", "perf/1280x1024.png", "perf/1680x1050.png"), - Values(3, 5), - Bool())) -{ - const string fileName = GET_PARAM(0); - const int apperture_size = GET_PARAM(1); - const bool useL2gradient = GET_PARAM(2); - - const cv::Mat image = readImage(fileName, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(image.empty()); - - const double low_thresh = 50.0; - const double high_thresh = 100.0; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_image(image); - cv::gpu::GpuMat dst; - cv::gpu::CannyBuf d_buf; - - TEST_CYCLE() cv::gpu::Canny(d_image, d_buf, dst, low_thresh, high_thresh, apperture_size, useL2gradient); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::Canny(image, dst, low_thresh, high_thresh, apperture_size, useL2gradient); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// MeanShiftFiltering - -DEF_PARAM_TEST_1(Image, string); - -PERF_TEST_P(Image, ImgProc_MeanShiftFiltering, - Values("gpu/meanshift/cones.png")) -{ - declare.time(300.0); - - const cv::Mat img = readImage(GetParam()); - ASSERT_FALSE(img.empty()); - - cv::Mat rgba; - cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA); - - const int sp = 50; - const int sr = 50; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(rgba); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::meanShiftFiltering(d_src, dst, sp, sr); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::pyrMeanShiftFiltering(img, dst, sp, sr); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// MeanShiftProc - -PERF_TEST_P(Image, ImgProc_MeanShiftProc, - Values("gpu/meanshift/cones.png")) -{ - declare.time(300.0); - - const cv::Mat img = readImage(GetParam()); - ASSERT_FALSE(img.empty()); - - cv::Mat rgba; - cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA); - - const int sp = 50; - const int sr = 50; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(rgba); - cv::gpu::GpuMat dstr; - cv::gpu::GpuMat dstsp; - - TEST_CYCLE() cv::gpu::meanShiftProc(d_src, dstr, dstsp, sp, sr); - - GPU_SANITY_CHECK(dstr); - GPU_SANITY_CHECK(dstsp); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// MeanShiftSegmentation - -PERF_TEST_P(Image, ImgProc_MeanShiftSegmentation, - Values("gpu/meanshift/cones.png")) -{ - declare.time(300.0); - - const cv::Mat img = readImage(GetParam()); - ASSERT_FALSE(img.empty()); - - cv::Mat rgba; - cv::cvtColor(img, rgba, cv::COLOR_BGR2BGRA); - - const int sp = 10; - const int sr = 10; - const int minsize = 20; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(rgba); - cv::Mat dst; - - TEST_CYCLE() cv::gpu::meanShiftSegmentation(d_src, dst, sp, sr, minsize); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// BlendLinear - -PERF_TEST_P(Sz_Depth_Cn, ImgProc_BlendLinear, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_32F), - GPU_CHANNELS_1_3_4)) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat img1(size, type); - cv::Mat img2(size, type); - declare.in(img1, img2, WARMUP_RNG); - - const cv::Mat weights1(size, CV_32FC1, cv::Scalar::all(0.5)); - const cv::Mat weights2(size, CV_32FC1, cv::Scalar::all(0.5)); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_img1(img1); - const cv::gpu::GpuMat d_img2(img2); - const cv::gpu::GpuMat d_weights1(weights1); - const cv::gpu::GpuMat d_weights2(weights2); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::blendLinear(d_img1, d_img2, d_weights1, d_weights2, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// MatchTemplate8U - -CV_ENUM(TemplateMethod, TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED) - -DEF_PARAM_TEST(Sz_TemplateSz_Cn_Method, cv::Size, cv::Size, MatCn, TemplateMethod); - -PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate8U, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)), - GPU_CHANNELS_1_3_4, - TemplateMethod::all())) -{ - declare.time(300.0); - - const cv::Size size = GET_PARAM(0); - const cv::Size templ_size = GET_PARAM(1); - const int cn = GET_PARAM(2); - const int method = GET_PARAM(3); - - cv::Mat image(size, CV_MAKE_TYPE(CV_8U, cn)); - cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_8U, cn)); - declare.in(image, templ, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_image(image); - const cv::gpu::GpuMat d_templ(templ); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method); - - GPU_SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::matchTemplate(image, templ, dst, method); - - CPU_SANITY_CHECK(dst); - } -}; - -//////////////////////////////////////////////////////////////////////////////// -// MatchTemplate32F - -PERF_TEST_P(Sz_TemplateSz_Cn_Method, ImgProc_MatchTemplate32F, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(cv::Size(5, 5), cv::Size(16, 16), cv::Size(30, 30)), - GPU_CHANNELS_1_3_4, - Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))) -{ - declare.time(300.0); - - const cv::Size size = GET_PARAM(0); - const cv::Size templ_size = GET_PARAM(1); - const int cn = GET_PARAM(2); - int method = GET_PARAM(3); - - cv::Mat image(size, CV_MAKE_TYPE(CV_32F, cn)); - cv::Mat templ(templ_size, CV_MAKE_TYPE(CV_32F, cn)); - declare.in(image, templ, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_image(image); - const cv::gpu::GpuMat d_templ(templ); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::matchTemplate(d_image, d_templ, dst, method); - - GPU_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::matchTemplate(image, templ, dst, method); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// CornerHarris - -DEF_PARAM_TEST(Image_Type_Border_BlockSz_ApertureSz, string, MatType, BorderMode, int, int); - -PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerHarris, - Combine(Values("gpu/stereobm/aloe-L.png"), - Values(CV_8UC1, CV_32FC1), - Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)), - Values(3, 5, 7), - Values(0, 3, 5, 7))) -{ - const string fileName = GET_PARAM(0); - const int type = GET_PARAM(1); - const int borderMode = GET_PARAM(2); - const int blockSize = GET_PARAM(3); - const int apertureSize = GET_PARAM(4); - - cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0); - - const double k = 0.5; - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_img(img); - cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_Dx; - cv::gpu::GpuMat d_Dy; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::cornerHarris(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, k, borderMode); - - GPU_SANITY_CHECK(dst, 1e-4); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::cornerHarris(img, dst, blockSize, apertureSize, k, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// CornerMinEigenVal - -PERF_TEST_P(Image_Type_Border_BlockSz_ApertureSz, ImgProc_CornerMinEigenVal, - Combine(Values("gpu/stereobm/aloe-L.png"), - Values(CV_8UC1, CV_32FC1), - Values(BorderMode(cv::BORDER_REFLECT101), BorderMode(cv::BORDER_REPLICATE), BorderMode(cv::BORDER_REFLECT)), - Values(3, 5, 7), - Values(0, 3, 5, 7))) -{ - const string fileName = GET_PARAM(0); - const int type = GET_PARAM(1); - const int borderMode = GET_PARAM(2); - const int blockSize = GET_PARAM(3); - const int apertureSize = GET_PARAM(4); - - cv::Mat img = readImage(fileName, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - img.convertTo(img, type, type == CV_32F ? 1.0 / 255.0 : 1.0); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_img(img); - cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_Dx; - cv::gpu::GpuMat d_Dy; - cv::gpu::GpuMat d_buf; - - TEST_CYCLE() cv::gpu::cornerMinEigenVal(d_img, dst, d_Dx, d_Dy, d_buf, blockSize, apertureSize, borderMode); - - GPU_SANITY_CHECK(dst, 1e-4); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::cornerMinEigenVal(img, dst, blockSize, apertureSize, borderMode); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// BuildWarpPlaneMaps - -PERF_TEST_P(Sz, ImgProc_BuildWarpPlaneMaps, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1); - const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1); - const cv::Mat T = cv::Mat::zeros(1, 3, CV_32F); - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat map_x; - cv::gpu::GpuMat map_y; - - TEST_CYCLE() cv::gpu::buildWarpPlaneMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, T, 1.0, map_x, map_y); - - GPU_SANITY_CHECK(map_x); - GPU_SANITY_CHECK(map_y); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// BuildWarpCylindricalMaps - -PERF_TEST_P(Sz, ImgProc_BuildWarpCylindricalMaps, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1); - const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1); - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat map_x; - cv::gpu::GpuMat map_y; - - TEST_CYCLE() cv::gpu::buildWarpCylindricalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y); - - GPU_SANITY_CHECK(map_x); - GPU_SANITY_CHECK(map_y); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// BuildWarpSphericalMaps - -PERF_TEST_P(Sz, ImgProc_BuildWarpSphericalMaps, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - const cv::Mat K = cv::Mat::eye(3, 3, CV_32FC1); - const cv::Mat R = cv::Mat::ones(3, 3, CV_32FC1); - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat map_x; - cv::gpu::GpuMat map_y; - - TEST_CYCLE() cv::gpu::buildWarpSphericalMaps(size, cv::Rect(0, 0, size.width, size.height), K, R, 1.0, map_x, map_y); - - GPU_SANITY_CHECK(map_x); - GPU_SANITY_CHECK(map_y); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// Rotate - -DEF_PARAM_TEST(Sz_Depth_Cn_Inter, cv::Size, MatDepth, MatCn, Interpolation); - -PERF_TEST_P(Sz_Depth_Cn_Inter, ImgProc_Rotate, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4, - Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)))) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - const int interpolation = GET_PARAM(3); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::rotate(d_src, dst, size, 30.0, 0, 0, interpolation); - - GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// PyrDown - -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrDown, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::pyrDown(d_src, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::pyrDown(src, dst); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// PyrUp - -PERF_TEST_P(Sz_Depth_Cn, ImgProc_PyrUp, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::pyrUp(d_src, dst); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::pyrUp(src, dst); - - CPU_SANITY_CHECK(dst); - } -} - -////////////////////////////////////////////////////////////////////// -// CvtColor - -DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CvtColorInfo); - -PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColor, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_32F), - Values(CvtColorInfo(4, 4, cv::COLOR_RGBA2BGRA), - CvtColorInfo(4, 1, cv::COLOR_BGRA2GRAY), - CvtColorInfo(1, 4, cv::COLOR_GRAY2BGRA), - CvtColorInfo(3, 3, cv::COLOR_BGR2XYZ), - CvtColorInfo(3, 3, cv::COLOR_XYZ2BGR), - CvtColorInfo(3, 3, cv::COLOR_BGR2YCrCb), - CvtColorInfo(3, 3, cv::COLOR_YCrCb2BGR), - CvtColorInfo(3, 3, cv::COLOR_BGR2YUV), - CvtColorInfo(3, 3, cv::COLOR_YUV2BGR), - CvtColorInfo(3, 3, cv::COLOR_BGR2HSV), - CvtColorInfo(3, 3, cv::COLOR_HSV2BGR), - CvtColorInfo(3, 3, cv::COLOR_BGR2HLS), - CvtColorInfo(3, 3, cv::COLOR_HLS2BGR), - CvtColorInfo(3, 3, cv::COLOR_BGR2Lab), - CvtColorInfo(3, 3, cv::COLOR_LBGR2Lab), - CvtColorInfo(3, 3, cv::COLOR_BGR2Luv), - CvtColorInfo(3, 3, cv::COLOR_LBGR2Luv), - CvtColorInfo(3, 3, cv::COLOR_Lab2BGR), - CvtColorInfo(3, 3, cv::COLOR_Lab2LBGR), - CvtColorInfo(3, 3, cv::COLOR_Luv2RGB), - CvtColorInfo(3, 3, cv::COLOR_Luv2LRGB)))) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const CvtColorInfo info = GET_PARAM(2); - - cv::Mat src(size, CV_MAKETYPE(depth, info.scn)); - cv::randu(src, 0, depth == CV_8U ? 255.0 : 1.0); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn); - - GPU_SANITY_CHECK(dst, 1e-4); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn); - - CPU_SANITY_CHECK(dst); - } -} - -PERF_TEST_P(Sz_Depth_Code, ImgProc_CvtColorBayer, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U), - Values(CvtColorInfo(1, 3, cv::COLOR_BayerBG2BGR), - CvtColorInfo(1, 3, cv::COLOR_BayerGB2BGR), - CvtColorInfo(1, 3, cv::COLOR_BayerRG2BGR), - CvtColorInfo(1, 3, cv::COLOR_BayerGR2BGR), - - CvtColorInfo(1, 1, cv::COLOR_BayerBG2GRAY), - CvtColorInfo(1, 1, cv::COLOR_BayerGB2GRAY), - CvtColorInfo(1, 1, cv::COLOR_BayerRG2GRAY), - CvtColorInfo(1, 1, cv::COLOR_BayerGR2GRAY)))) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const CvtColorInfo info = GET_PARAM(2); - - cv::Mat src(size, CV_MAKETYPE(depth, info.scn)); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::cvtColor(d_src, dst, info.code, info.dcn); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::cvtColor(src, dst, info.code, info.dcn); - - CPU_SANITY_CHECK(dst); - } -} - -CV_ENUM(DemosaicingCode, - COLOR_BayerBG2BGR, COLOR_BayerGB2BGR, COLOR_BayerRG2BGR, COLOR_BayerGR2BGR, - COLOR_BayerBG2GRAY, COLOR_BayerGB2GRAY, COLOR_BayerRG2GRAY, COLOR_BayerGR2GRAY, - COLOR_BayerBG2BGR_MHT, COLOR_BayerGB2BGR_MHT, COLOR_BayerRG2BGR_MHT, COLOR_BayerGR2BGR_MHT, - COLOR_BayerBG2GRAY_MHT, COLOR_BayerGB2GRAY_MHT, COLOR_BayerRG2GRAY_MHT, COLOR_BayerGR2GRAY_MHT) - -DEF_PARAM_TEST(Sz_Code, cv::Size, DemosaicingCode); - -PERF_TEST_P(Sz_Code, ImgProc_Demosaicing, - Combine(GPU_TYPICAL_MAT_SIZES, - DemosaicingCode::all())) -{ - const cv::Size size = GET_PARAM(0); - const int code = GET_PARAM(1); - - cv::Mat src(size, CV_8UC1); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::demosaicing(d_src, dst, code); - - GPU_SANITY_CHECK(dst); - } - else - { - if (code >= cv::COLOR_COLORCVT_MAX) - { - FAIL_NO_CPU(); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::cvtColor(src, dst, code); - - CPU_SANITY_CHECK(dst); - } - } -} - -////////////////////////////////////////////////////////////////////// -// SwapChannels - -PERF_TEST_P(Sz, ImgProc_SwapChannels, - GPU_TYPICAL_MAT_SIZES) -{ - const cv::Size size = GetParam(); - - cv::Mat src(size, CV_8UC4); - declare.in(src, WARMUP_RNG); - - const int dstOrder[] = {2, 1, 0, 3}; - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat dst(src); - - TEST_CYCLE() cv::gpu::swapChannels(dst, dstOrder); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// AlphaComp - -CV_ENUM(AlphaOp, ALPHA_OVER, ALPHA_IN, ALPHA_OUT, ALPHA_ATOP, ALPHA_XOR, ALPHA_PLUS, ALPHA_OVER_PREMUL, ALPHA_IN_PREMUL, ALPHA_OUT_PREMUL, ALPHA_ATOP_PREMUL, ALPHA_XOR_PREMUL, ALPHA_PLUS_PREMUL, ALPHA_PREMUL) - -DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, AlphaOp); - -PERF_TEST_P(Sz_Type_Op, ImgProc_AlphaComp, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8UC4, CV_16UC4, CV_32SC4, CV_32FC4), - AlphaOp::all())) -{ - const cv::Size size = GET_PARAM(0); - const int type = GET_PARAM(1); - const int alpha_op = GET_PARAM(2); - - cv::Mat img1(size, type); - cv::Mat img2(size, type); - declare.in(img1, img2, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_img1(img1); - const cv::gpu::GpuMat d_img2(img2); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::alphaComp(d_img1, d_img2, dst, alpha_op); - - GPU_SANITY_CHECK(dst, 1e-3, ERROR_RELATIVE); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// ImagePyramidBuild - -PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidBuild, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - const int nLayers = 5; - const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - - cv::gpu::ImagePyramid d_pyr; - - TEST_CYCLE() d_pyr.build(d_src, nLayers); - - cv::gpu::GpuMat dst; - d_pyr.getLayer(dst, dstSize); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// ImagePyramidGetLayer - -PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(CV_8U, CV_16U, CV_32F), - GPU_CHANNELS_1_3_4)) -{ - const cv::Size size = GET_PARAM(0); - const int depth = GET_PARAM(1); - const int channels = GET_PARAM(2); - - const int type = CV_MAKE_TYPE(depth, channels); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - const int nLayers = 3; - const cv::Size dstSize(size.width / 2 + 10, size.height / 2 + 10); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - cv::gpu::ImagePyramid d_pyr(d_src, nLayers); - - TEST_CYCLE() d_pyr.getLayer(dst, dstSize); - - GPU_SANITY_CHECK(dst); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////////////////////// -// HoughLines - -namespace -{ - struct Vec4iComparator - { - bool operator()(const cv::Vec4i& a, const cv::Vec4i b) const - { - if (a[0] != b[0]) return a[0] < b[0]; - else if(a[1] != b[1]) return a[1] < b[1]; - else if(a[2] != b[2]) return a[2] < b[2]; - else return a[3] < b[3]; - } - }; - struct Vec3fComparator - { - bool operator()(const cv::Vec3f& a, const cv::Vec3f b) const - { - if(a[0] != b[0]) return a[0] < b[0]; - else if(a[1] != b[1]) return a[1] < b[1]; - else return a[2] < b[2]; - } - }; - struct Vec2fComparator - { - bool operator()(const cv::Vec2f& a, const cv::Vec2f b) const - { - if(a[0] != b[0]) return a[0] < b[0]; - else return a[1] < b[1]; - } - }; -} - -PERF_TEST_P(Sz, ImgProc_HoughLines, - GPU_TYPICAL_MAT_SIZES) -{ - declare.time(30.0); - - const cv::Size size = GetParam(); - - const float rho = 1.0f; - const float theta = static_cast(CV_PI / 180.0); - const int threshold = 300; - - cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)); - cv::line(src, cv::Point(0, 100), cv::Point(src.cols, 100), cv::Scalar::all(255), 1); - cv::line(src, cv::Point(0, 200), cv::Point(src.cols, 200), cv::Scalar::all(255), 1); - cv::line(src, cv::Point(0, 400), cv::Point(src.cols, 400), cv::Scalar::all(255), 1); - cv::line(src, cv::Point(100, 0), cv::Point(100, src.rows), cv::Scalar::all(255), 1); - cv::line(src, cv::Point(200, 0), cv::Point(200, src.rows), cv::Scalar::all(255), 1); - cv::line(src, cv::Point(400, 0), cv::Point(400, src.rows), cv::Scalar::all(255), 1); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat d_lines; - cv::gpu::HoughLinesBuf d_buf; - - TEST_CYCLE() cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold); - - cv::Mat gpu_lines(d_lines.row(0)); - cv::Vec2f* begin = gpu_lines.ptr(0); - cv::Vec2f* end = begin + gpu_lines.cols; - std::sort(begin, end, Vec2fComparator()); - SANITY_CHECK(gpu_lines); - } - else - { - std::vector cpu_lines; - - TEST_CYCLE() cv::HoughLines(src, cpu_lines, rho, theta, threshold); - - SANITY_CHECK(cpu_lines); - } -} - -////////////////////////////////////////////////////////////////////// -// HoughLinesP - -DEF_PARAM_TEST_1(Image, std::string); - -PERF_TEST_P(Image, ImgProc_HoughLinesP, - testing::Values("cv/shared/pic5.png", "stitching/a1.png")) -{ - declare.time(30.0); - - const std::string fileName = getDataPath(GetParam()); - - const float rho = 1.0f; - const float theta = static_cast(CV_PI / 180.0); - const int threshold = 100; - const int minLineLenght = 50; - const int maxLineGap = 5; - - const cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(image.empty()); - - cv::Mat mask; - cv::Canny(image, mask, 50, 100); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_mask(mask); - cv::gpu::GpuMat d_lines; - cv::gpu::HoughLinesBuf d_buf; - - TEST_CYCLE() cv::gpu::HoughLinesP(d_mask, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); - - cv::Mat gpu_lines(d_lines); - cv::Vec4i* begin = gpu_lines.ptr(); - cv::Vec4i* end = begin + gpu_lines.cols; - std::sort(begin, end, Vec4iComparator()); - SANITY_CHECK(gpu_lines); - } - else - { - std::vector cpu_lines; - - TEST_CYCLE() cv::HoughLinesP(mask, cpu_lines, rho, theta, threshold, minLineLenght, maxLineGap); - - SANITY_CHECK(cpu_lines); - } -} - -////////////////////////////////////////////////////////////////////// -// HoughCircles - -DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float); - -PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles, - Combine(GPU_TYPICAL_MAT_SIZES, - Values(1.0f, 2.0f, 4.0f), - Values(1.0f))) -{ - declare.time(30.0); - - const cv::Size size = GET_PARAM(0); - const float dp = GET_PARAM(1); - const float minDist = GET_PARAM(2); - - const int minRadius = 10; - const int maxRadius = 30; - const int cannyThreshold = 100; - const int votesThreshold = 15; - - cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)); - cv::circle(src, cv::Point(100, 100), 20, cv::Scalar::all(255), -1); - cv::circle(src, cv::Point(200, 200), 25, cv::Scalar::all(255), -1); - cv::circle(src, cv::Point(200, 100), 25, cv::Scalar::all(255), -1); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat d_circles; - cv::gpu::HoughCirclesBuf d_buf; - - TEST_CYCLE() cv::gpu::HoughCircles(d_src, d_circles, d_buf, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); - - cv::Mat gpu_circles(d_circles); - cv::Vec3f* begin = gpu_circles.ptr(0); - cv::Vec3f* end = begin + gpu_circles.cols; - std::sort(begin, end, Vec3fComparator()); - SANITY_CHECK(gpu_circles); - } - else - { - std::vector cpu_circles; - - TEST_CYCLE() cv::HoughCircles(src, cpu_circles, cv::HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); - - SANITY_CHECK(cpu_circles); - } -} - -////////////////////////////////////////////////////////////////////// -// GeneralizedHough - -enum { GHT_POSITION = cv::GeneralizedHough::GHT_POSITION, - GHT_SCALE = cv::GeneralizedHough::GHT_SCALE, - GHT_ROTATION = cv::GeneralizedHough::GHT_ROTATION - }; - -CV_FLAGS(GHMethod, GHT_POSITION, GHT_SCALE, GHT_ROTATION); - -DEF_PARAM_TEST(Method_Sz, GHMethod, cv::Size); - -PERF_TEST_P(Method_Sz, ImgProc_GeneralizedHough, - Combine(Values(GHMethod(GHT_POSITION), GHMethod(GHT_POSITION | GHT_SCALE), GHMethod(GHT_POSITION | GHT_ROTATION), GHMethod(GHT_POSITION | GHT_SCALE | GHT_ROTATION)), - GPU_TYPICAL_MAT_SIZES)) -{ - declare.time(10); - - const int method = GET_PARAM(0); - const cv::Size imageSize = GET_PARAM(1); - - const cv::Mat templ = readImage("cv/shared/templ.png", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(templ.empty()); - - cv::Mat image(imageSize, CV_8UC1, cv::Scalar::all(0)); - templ.copyTo(image(cv::Rect(50, 50, templ.cols, templ.rows))); - - cv::RNG rng(123456789); - const int objCount = rng.uniform(5, 15); - for (int i = 0; i < objCount; ++i) - { - double scale = rng.uniform(0.7, 1.3); - bool rotate = 1 == rng.uniform(0, 2); - - cv::Mat obj; - cv::resize(templ, obj, cv::Size(), scale, scale); - if (rotate) - obj = obj.t(); - - cv::Point pos; - - pos.x = rng.uniform(0, image.cols - obj.cols); - pos.y = rng.uniform(0, image.rows - obj.rows); - - cv::Mat roi = image(cv::Rect(pos, obj.size())); - cv::add(roi, obj, roi); - } - - cv::Mat edges; - cv::Canny(image, edges, 50, 100); - - cv::Mat dx, dy; - cv::Sobel(image, dx, CV_32F, 1, 0); - cv::Sobel(image, dy, CV_32F, 0, 1); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_edges(edges); - const cv::gpu::GpuMat d_dx(dx); - const cv::gpu::GpuMat d_dy(dy); - cv::gpu::GpuMat posAndVotes; - - cv::Ptr d_hough = cv::gpu::GeneralizedHough_GPU::create(method); - if (method & GHT_ROTATION) - { - d_hough->set("maxAngle", 90.0); - d_hough->set("angleStep", 2.0); - } - - d_hough->setTemplate(cv::gpu::GpuMat(templ)); - - TEST_CYCLE() d_hough->detect(d_edges, d_dx, d_dy, posAndVotes); - - const cv::gpu::GpuMat positions(1, posAndVotes.cols, CV_32FC4, posAndVotes.data); - GPU_SANITY_CHECK(positions); - } - else - { - cv::Mat positions; - - cv::Ptr hough = cv::GeneralizedHough::create(method); - if (method & GHT_ROTATION) - { - hough->set("maxAngle", 90.0); - hough->set("angleStep", 2.0); - } - - hough->setTemplate(templ); - - TEST_CYCLE() hough->detect(edges, dx, dy, positions); - - CPU_SANITY_CHECK(positions); - } -} diff --git a/modules/gpu/perf/perf_labeling.cpp b/modules/gpu/perf/perf_labeling.cpp deleted file mode 100644 index 0484da9d59..0000000000 --- a/modules/gpu/perf/perf_labeling.cpp +++ /dev/null @@ -1,195 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" - -using namespace std; -using namespace testing; -using namespace perf; - -DEF_PARAM_TEST_1(Image, string); - -struct GreedyLabeling -{ - struct dot - { - int x; - int y; - - static dot make(int i, int j) - { - dot d; d.x = i; d.y = j; - return d; - } - }; - - struct InInterval - { - InInterval(const int& _lo, const int& _hi) : lo(-_lo), hi(_hi) {} - const int lo, hi; - - bool operator() (const unsigned char a, const unsigned char b) const - { - int d = a - b; - return lo <= d && d <= hi; - } - - private: - InInterval& operator=(const InInterval&); - - - }; - - GreedyLabeling(cv::Mat img) - : image(img), _labels(image.size(), CV_32SC1, cv::Scalar::all(-1)) {stack = new dot[image.cols * image.rows];} - - ~GreedyLabeling(){delete[] stack;} - - void operator() (cv::Mat labels) const - { - labels.setTo(cv::Scalar::all(-1)); - InInterval inInt(0, 2); - int cc = -1; - - int* dist_labels = (int*)labels.data; - int pitch = static_cast(labels.step1()); - - unsigned char* source = (unsigned char*)image.data; - int width = image.cols; - int height = image.rows; - - for (int j = 0; j < image.rows; ++j) - for (int i = 0; i < image.cols; ++i) - { - if (dist_labels[j * pitch + i] != -1) continue; - - dot* top = stack; - dot p = dot::make(i, j); - cc++; - - dist_labels[j * pitch + i] = cc; - - while (top >= stack) - { - int* dl = &dist_labels[p.y * pitch + p.x]; - unsigned char* sp = &source[p.y * image.step1() + p.x]; - - dl[0] = cc; - - //right - if( p.x < (width - 1) && dl[ +1] == -1 && inInt(sp[0], sp[+1])) - *top++ = dot::make(p.x + 1, p.y); - - //left - if( p.x > 0 && dl[-1] == -1 && inInt(sp[0], sp[-1])) - *top++ = dot::make(p.x - 1, p.y); - - //bottom - if( p.y < (height - 1) && dl[+pitch] == -1 && inInt(sp[0], sp[+image.step1()])) - *top++ = dot::make(p.x, p.y + 1); - - //top - if( p.y > 0 && dl[-pitch] == -1 && inInt(sp[0], sp[-static_cast(image.step1())])) - *top++ = dot::make(p.x, p.y - 1); - - p = *--top; - } - } - } - - cv::Mat image; - cv::Mat _labels; - dot* stack; -}; - -PERF_TEST_P(Image, DISABLED_Labeling_ConnectivityMask, - Values("gpu/labeling/aloe-disp.png")) -{ - declare.time(1.0); - - const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(image.empty()); - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat d_image(image); - cv::gpu::GpuMat mask; - - TEST_CYCLE() cv::gpu::connectivityMask(d_image, mask, cv::Scalar::all(0), cv::Scalar::all(2)); - - GPU_SANITY_CHECK(mask); - } - else - { - FAIL_NO_CPU(); - } -} - -PERF_TEST_P(Image, DISABLED_Labeling_ConnectedComponents, - Values("gpu/labeling/aloe-disp.png")) -{ - declare.time(1.0); - - const cv::Mat image = readImage(GetParam(), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(image.empty()); - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat d_mask; - cv::gpu::connectivityMask(cv::gpu::GpuMat(image), d_mask, cv::Scalar::all(0), cv::Scalar::all(2)); - - cv::gpu::GpuMat components; - - TEST_CYCLE() cv::gpu::labelComponents(d_mask, components); - - GPU_SANITY_CHECK(components); - } - else - { - GreedyLabeling host(image); - - TEST_CYCLE() host(host._labels); - - cv::Mat components = host._labels; - CPU_SANITY_CHECK(components); - } -} diff --git a/modules/gpu/src/bilateral_filter.cpp b/modules/gpu/src/bilateral_filter.cpp deleted file mode 100644 index ef5be018da..0000000000 --- a/modules/gpu/src/bilateral_filter.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -using namespace cv; -using namespace cv::gpu; - -#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) - -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_no_cuda(); } -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_no_cuda(); } - -void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -#else /* !defined (HAVE_CUDA) */ - -namespace cv { namespace gpu { namespace cudev -{ - namespace disp_bilateral_filter - { - void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc); - - template - void disp_bilateral_filter(PtrStepSz disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream); - } -}}} - -using namespace ::cv::gpu::cudev::disp_bilateral_filter; - -namespace -{ - const float DEFAULT_EDGE_THRESHOLD = 0.1f; - const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f; - const float DEFAULT_SIGMA_RANGE = 10.0f; - - inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len) - { - Mat cpu_table_color(1, len, CV_32F); - - float* line = cpu_table_color.ptr(); - - for(int i = 0; i < len; i++) - line[i] = static_cast(std::exp(-double(i * i) / (2 * sigma_range * sigma_range))); - - table_color.upload(cpu_table_color); - } - - inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space) - { - int half = (win_size >> 1); - - Mat cpu_table_space(half + 1, half + 1, CV_32F); - - for (int y = 0; y <= half; ++y) - { - float* row = cpu_table_space.ptr(y); - for (int x = 0; x <= half; ++x) - row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space); - } - - table_space.upload(cpu_table_space); - } - - template - void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold, - GpuMat& table_color, GpuMat& table_space, - const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream) - { - short edge_disc = std::max(short(1), short(ndisp * edge_threshold + 0.5)); - short max_disc = short(ndisp * max_disc_threshold + 0.5); - - disp_load_constants(table_color.ptr(), table_space, ndisp, radius, edge_disc, max_disc); - - if (&dst != &disp) - { - if (stream) - stream.enqueueCopy(disp, dst); - else - disp.copyTo(dst); - } - - disp_bilateral_filter(dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); - } - - typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, - GpuMat& table_color, GpuMat& table_space, - const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream); - - const bilateral_filter_operator_t operators[] = - {disp_bilateral_filter_operator, 0, 0, disp_bilateral_filter_operator, 0, 0, 0, 0}; -} - -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_) - : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD), - sigma_range(DEFAULT_SIGMA_RANGE) -{ - calc_color_weighted_table(table_color, sigma_range, 255); - calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); -} - -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_, - float max_disc_threshold_, float sigma_range_) - : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_), - sigma_range(sigma_range_) -{ - calc_color_weighted_table(table_color, sigma_range, 255); - calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); -} - -void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream) -{ - CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters); - CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3)); - operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream); -} - -#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/blend.cpp b/modules/gpu/src/blend.cpp deleted file mode 100644 index 3fd6507810..0000000000 --- a/modules/gpu/src/blend.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -using namespace cv; -using namespace cv::gpu; - -#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) - -void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -#else - -namespace cv { namespace gpu { namespace cudev -{ - namespace blend - { - template - void blendLinearCaller(int rows, int cols, int cn, PtrStep img1, PtrStep img2, PtrStepf weights1, PtrStepf weights2, PtrStep result, cudaStream_t stream); - - void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream); - } -}}} - -using namespace ::cv::gpu::cudev::blend; - -void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, - GpuMat& result, Stream& stream) -{ - CV_Assert(img1.size() == img2.size()); - CV_Assert(img1.type() == img2.type()); - CV_Assert(weights1.size() == img1.size()); - CV_Assert(weights2.size() == img2.size()); - CV_Assert(weights1.type() == CV_32F); - CV_Assert(weights2.type() == CV_32F); - - const Size size = img1.size(); - const int depth = img1.depth(); - const int cn = img1.channels(); - - result.create(size, CV_MAKE_TYPE(depth, cn)); - - switch (depth) - { - case CV_8U: - if (cn != 4) - blendLinearCaller(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); - else - blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); - break; - case CV_32F: - blendLinearCaller(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream)); - break; - default: - CV_Error(cv::Error::StsUnsupportedFormat, "bad image depth in linear blending function"); - } -} - -#endif diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp deleted file mode 100644 index dc35823486..0000000000 --- a/modules/gpu/src/color.cpp +++ /dev/null @@ -1,1989 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -using namespace cv; -using namespace cv::gpu; - -#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) - -void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_no_cuda(); } -void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); } - -#else /* !defined (HAVE_CUDA) */ - -#include "cvt_color_internal.h" - -namespace cv { namespace gpu { - namespace cudev - { - template - void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template - void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - } -}} - -using namespace ::cv::gpu::cudev; - -namespace -{ - typedef void (*gpu_func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - void bgr_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgr_to_rgb_8u, 0, bgr_to_rgb_16u, 0, 0, bgr_to_rgb_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgr_to_bgra_8u, 0, bgr_to_bgra_16u, 0, 0, bgr_to_bgra_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgr_to_rgba_8u, 0, bgr_to_rgba_16u, 0, 0, bgr_to_rgba_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgra_to_bgr_8u, 0, bgra_to_bgr_16u, 0, 0, bgra_to_bgr_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgra_to_rgb_8u, 0, bgra_to_rgb_16u, 0, 0, bgra_to_rgb_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgra_to_rgba_8u, 0, bgra_to_rgba_16u, 0, 0, bgra_to_rgba_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_8UC2); - - cudev::bgr_to_bgr555(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_8UC2); - - cudev::bgr_to_bgr565(src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_8UC2); - - cudev::rgb_to_bgr555(src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_8UC2); - - cudev::rgb_to_bgr565(src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_8UC2); - - cudev::bgra_to_bgr555(src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_8UC2); - - cudev::bgra_to_bgr565(src, dst, StreamAccessor::getStream(stream)); - } - - void rgba_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_8UC2); - - cudev::rgba_to_bgr555(src, dst, StreamAccessor::getStream(stream)); - } - - void rgba_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_8UC2); - - cudev::rgba_to_bgr565(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr555_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC3); - - cudev::bgr555_to_rgb(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr565_to_rgb(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC3); - - cudev::bgr565_to_rgb(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr555_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC3); - - cudev::bgr555_to_bgr(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr565_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC3); - - cudev::bgr565_to_bgr(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr555_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC4); - - cudev::bgr555_to_rgba(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr565_to_rgba(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC4); - - cudev::bgr565_to_rgba(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr555_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC4); - - cudev::bgr555_to_bgra(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr565_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC4); - - cudev::bgr565_to_bgra(src, dst, StreamAccessor::getStream(stream)); - } - - void gray_to_bgr(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {gray_to_bgr_8u, 0, gray_to_bgr_16u, 0, 0, gray_to_bgr_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 1); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 3)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void gray_to_bgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {gray_to_bgra_8u, 0, gray_to_bgra_16u, 0, 0, gray_to_bgra_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 1); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 4)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void gray_to_bgr555(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 1); - - dst.create(src.size(), CV_8UC2); - - cudev::gray_to_bgr555(src, dst, StreamAccessor::getStream(stream)); - } - - void gray_to_bgr565(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 1); - - dst.create(src.size(), CV_8UC2); - - cudev::gray_to_bgr565(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr555_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC1); - - cudev::bgr555_to_gray(src, dst, StreamAccessor::getStream(stream)); - } - - void bgr565_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - CV_Assert(src.depth() == CV_8U); - CV_Assert(src.channels() == 2); - - dst.create(src.size(), CV_8UC1); - - cudev::bgr565_to_gray(src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {rgb_to_gray_8u, 0, rgb_to_gray_16u, 0, 0, rgb_to_gray_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgr_to_gray_8u, 0, bgr_to_gray_16u, 0, 0, bgr_to_gray_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgba_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {rgba_to_gray_8u, 0, rgba_to_gray_16u, 0, 0, rgba_to_gray_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgra_to_gray(const GpuMat& src, GpuMat& dst, int, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[] = {bgra_to_gray_8u, 0, bgra_to_gray_16u, 0, 0, bgra_to_gray_32f}; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); - - funcs[src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_yuv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_yuv_8u, 0, rgb_to_yuv_16u, 0, 0, rgb_to_yuv_32f}, - {rgba_to_yuv_8u, 0, rgba_to_yuv_16u, 0, 0, rgba_to_yuv_32f} - }, - { - {rgb_to_yuv4_8u, 0, rgb_to_yuv4_16u, 0, 0, rgb_to_yuv4_32f}, - {rgba_to_yuv4_8u, 0, rgba_to_yuv4_16u, 0, 0, rgba_to_yuv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_yuv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_yuv_8u, 0, bgr_to_yuv_16u, 0, 0, bgr_to_yuv_32f}, - {bgra_to_yuv_8u, 0, bgra_to_yuv_16u, 0, 0, bgra_to_yuv_32f} - }, - { - {bgr_to_yuv4_8u, 0, bgr_to_yuv4_16u, 0, 0, bgr_to_yuv4_32f}, - {bgra_to_yuv4_8u, 0, bgra_to_yuv4_16u, 0, 0, bgra_to_yuv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void yuv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {yuv_to_rgb_8u, 0, yuv_to_rgb_16u, 0, 0, yuv_to_rgb_32f}, - {yuv4_to_rgb_8u, 0, yuv4_to_rgb_16u, 0, 0, yuv4_to_rgb_32f} - }, - { - {yuv_to_rgba_8u, 0, yuv_to_rgba_16u, 0, 0, yuv_to_rgba_32f}, - {yuv4_to_rgba_8u, 0, yuv4_to_rgba_16u, 0, 0, yuv4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void yuv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {yuv_to_bgr_8u, 0, yuv_to_bgr_16u, 0, 0, yuv_to_bgr_32f}, - {yuv4_to_bgr_8u, 0, yuv4_to_bgr_16u, 0, 0, yuv4_to_bgr_32f} - }, - { - {yuv_to_bgra_8u, 0, yuv_to_bgra_16u, 0, 0, yuv_to_bgra_32f}, - {yuv4_to_bgra_8u, 0, yuv4_to_bgra_16u, 0, 0, yuv4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_YCrCb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_YCrCb_8u, 0, rgb_to_YCrCb_16u, 0, 0, rgb_to_YCrCb_32f}, - {rgba_to_YCrCb_8u, 0, rgba_to_YCrCb_16u, 0, 0, rgba_to_YCrCb_32f} - }, - { - {rgb_to_YCrCb4_8u, 0, rgb_to_YCrCb4_16u, 0, 0, rgb_to_YCrCb4_32f}, - {rgba_to_YCrCb4_8u, 0, rgba_to_YCrCb4_16u, 0, 0, rgba_to_YCrCb4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_YCrCb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_YCrCb_8u, 0, bgr_to_YCrCb_16u, 0, 0, bgr_to_YCrCb_32f}, - {bgra_to_YCrCb_8u, 0, bgra_to_YCrCb_16u, 0, 0, bgra_to_YCrCb_32f} - }, - { - {bgr_to_YCrCb4_8u, 0, bgr_to_YCrCb4_16u, 0, 0, bgr_to_YCrCb4_32f}, - {bgra_to_YCrCb4_8u, 0, bgra_to_YCrCb4_16u, 0, 0, bgra_to_YCrCb4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void YCrCb_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {YCrCb_to_rgb_8u, 0, YCrCb_to_rgb_16u, 0, 0, YCrCb_to_rgb_32f}, - {YCrCb4_to_rgb_8u, 0, YCrCb4_to_rgb_16u, 0, 0, YCrCb4_to_rgb_32f} - }, - { - {YCrCb_to_rgba_8u, 0, YCrCb_to_rgba_16u, 0, 0, YCrCb_to_rgba_32f}, - {YCrCb4_to_rgba_8u, 0, YCrCb4_to_rgba_16u, 0, 0, YCrCb4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void YCrCb_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {YCrCb_to_bgr_8u, 0, YCrCb_to_bgr_16u, 0, 0, YCrCb_to_bgr_32f}, - {YCrCb4_to_bgr_8u, 0, YCrCb4_to_bgr_16u, 0, 0, YCrCb4_to_bgr_32f} - }, - { - {YCrCb_to_bgra_8u, 0, YCrCb_to_bgra_16u, 0, 0, YCrCb_to_bgra_32f}, - {YCrCb4_to_bgra_8u, 0, YCrCb4_to_bgra_16u, 0, 0, YCrCb4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_xyz(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_xyz_8u, 0, rgb_to_xyz_16u, 0, 0, rgb_to_xyz_32f}, - {rgba_to_xyz_8u, 0, rgba_to_xyz_16u, 0, 0, rgba_to_xyz_32f} - }, - { - {rgb_to_xyz4_8u, 0, rgb_to_xyz4_16u, 0, 0, rgb_to_xyz4_32f}, - {rgba_to_xyz4_8u, 0, rgba_to_xyz4_16u, 0, 0, rgba_to_xyz4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_xyz(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_xyz_8u, 0, bgr_to_xyz_16u, 0, 0, bgr_to_xyz_32f}, - {bgra_to_xyz_8u, 0, bgra_to_xyz_16u, 0, 0, bgra_to_xyz_32f} - }, - { - {bgr_to_xyz4_8u, 0, bgr_to_xyz4_16u, 0, 0, bgr_to_xyz4_32f}, - {bgra_to_xyz4_8u, 0, bgra_to_xyz4_16u, 0, 0, bgra_to_xyz4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void xyz_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {xyz_to_rgb_8u, 0, xyz_to_rgb_16u, 0, 0, xyz_to_rgb_32f}, - {xyz4_to_rgb_8u, 0, xyz4_to_rgb_16u, 0, 0, xyz4_to_rgb_32f} - }, - { - {xyz_to_rgba_8u, 0, xyz_to_rgba_16u, 0, 0, xyz_to_rgba_32f}, - {xyz4_to_rgba_8u, 0, xyz4_to_rgba_16u, 0, 0, xyz4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void xyz_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {xyz_to_bgr_8u, 0, xyz_to_bgr_16u, 0, 0, xyz_to_bgr_32f}, - {xyz4_to_bgr_8u, 0, xyz4_to_bgr_16u, 0, 0, xyz4_to_bgr_32f} - }, - { - {xyz_to_bgra_8u, 0, xyz_to_bgra_16u, 0, 0, xyz_to_bgra_32f}, - {xyz4_to_bgra_8u, 0, xyz4_to_bgra_16u, 0, 0, xyz4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_hsv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_hsv_8u, 0, 0, 0, 0, rgb_to_hsv_32f}, - {rgba_to_hsv_8u, 0, 0, 0, 0, rgba_to_hsv_32f}, - }, - { - {rgb_to_hsv4_8u, 0, 0, 0, 0, rgb_to_hsv4_32f}, - {rgba_to_hsv4_8u, 0, 0, 0, 0, rgba_to_hsv4_32f}, - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_hsv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_hsv_8u, 0, 0, 0, 0, bgr_to_hsv_32f}, - {bgra_to_hsv_8u, 0, 0, 0, 0, bgra_to_hsv_32f} - }, - { - {bgr_to_hsv4_8u, 0, 0, 0, 0, bgr_to_hsv4_32f}, - {bgra_to_hsv4_8u, 0, 0, 0, 0, bgra_to_hsv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hsv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hsv_to_rgb_8u, 0, 0, 0, 0, hsv_to_rgb_32f}, - {hsv4_to_rgb_8u, 0, 0, 0, 0, hsv4_to_rgb_32f} - }, - { - {hsv_to_rgba_8u, 0, 0, 0, 0, hsv_to_rgba_32f}, - {hsv4_to_rgba_8u, 0, 0, 0, 0, hsv4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hsv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hsv_to_bgr_8u, 0, 0, 0, 0, hsv_to_bgr_32f}, - {hsv4_to_bgr_8u, 0, 0, 0, 0, hsv4_to_bgr_32f} - }, - { - {hsv_to_bgra_8u, 0, 0, 0, 0, hsv_to_bgra_32f}, - {hsv4_to_bgra_8u, 0, 0, 0, 0, hsv4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_hls(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_hls_8u, 0, 0, 0, 0, rgb_to_hls_32f}, - {rgba_to_hls_8u, 0, 0, 0, 0, rgba_to_hls_32f}, - }, - { - {rgb_to_hls4_8u, 0, 0, 0, 0, rgb_to_hls4_32f}, - {rgba_to_hls4_8u, 0, 0, 0, 0, rgba_to_hls4_32f}, - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_hls(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_hls_8u, 0, 0, 0, 0, bgr_to_hls_32f}, - {bgra_to_hls_8u, 0, 0, 0, 0, bgra_to_hls_32f} - }, - { - {bgr_to_hls4_8u, 0, 0, 0, 0, bgr_to_hls4_32f}, - {bgra_to_hls4_8u, 0, 0, 0, 0, bgra_to_hls4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hls_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hls_to_rgb_8u, 0, 0, 0, 0, hls_to_rgb_32f}, - {hls4_to_rgb_8u, 0, 0, 0, 0, hls4_to_rgb_32f} - }, - { - {hls_to_rgba_8u, 0, 0, 0, 0, hls_to_rgba_32f}, - {hls4_to_rgba_8u, 0, 0, 0, 0, hls4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hls_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hls_to_bgr_8u, 0, 0, 0, 0, hls_to_bgr_32f}, - {hls4_to_bgr_8u, 0, 0, 0, 0, hls4_to_bgr_32f} - }, - { - {hls_to_bgra_8u, 0, 0, 0, 0, hls_to_bgra_32f}, - {hls4_to_bgra_8u, 0, 0, 0, 0, hls4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_hsv_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_hsv_full_8u, 0, 0, 0, 0, rgb_to_hsv_full_32f}, - {rgba_to_hsv_full_8u, 0, 0, 0, 0, rgba_to_hsv_full_32f}, - }, - { - {rgb_to_hsv4_full_8u, 0, 0, 0, 0, rgb_to_hsv4_full_32f}, - {rgba_to_hsv4_full_8u, 0, 0, 0, 0, rgba_to_hsv4_full_32f}, - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_hsv_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_hsv_full_8u, 0, 0, 0, 0, bgr_to_hsv_full_32f}, - {bgra_to_hsv_full_8u, 0, 0, 0, 0, bgra_to_hsv_full_32f} - }, - { - {bgr_to_hsv4_full_8u, 0, 0, 0, 0, bgr_to_hsv4_full_32f}, - {bgra_to_hsv4_full_8u, 0, 0, 0, 0, bgra_to_hsv4_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hsv_to_rgb_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hsv_to_rgb_full_8u, 0, 0, 0, 0, hsv_to_rgb_full_32f}, - {hsv4_to_rgb_full_8u, 0, 0, 0, 0, hsv4_to_rgb_full_32f} - }, - { - {hsv_to_rgba_full_8u, 0, 0, 0, 0, hsv_to_rgba_full_32f}, - {hsv4_to_rgba_full_8u, 0, 0, 0, 0, hsv4_to_rgba_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hsv_to_bgr_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hsv_to_bgr_full_8u, 0, 0, 0, 0, hsv_to_bgr_full_32f}, - {hsv4_to_bgr_full_8u, 0, 0, 0, 0, hsv4_to_bgr_full_32f} - }, - { - {hsv_to_bgra_full_8u, 0, 0, 0, 0, hsv_to_bgra_full_32f}, - {hsv4_to_bgra_full_8u, 0, 0, 0, 0, hsv4_to_bgra_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_hls_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {rgb_to_hls_full_8u, 0, 0, 0, 0, rgb_to_hls_full_32f}, - {rgba_to_hls_full_8u, 0, 0, 0, 0, rgba_to_hls_full_32f}, - }, - { - {rgb_to_hls4_full_8u, 0, 0, 0, 0, rgb_to_hls4_full_32f}, - {rgba_to_hls4_full_8u, 0, 0, 0, 0, rgba_to_hls4_full_32f}, - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_hls_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {bgr_to_hls_full_8u, 0, 0, 0, 0, bgr_to_hls_full_32f}, - {bgra_to_hls_full_8u, 0, 0, 0, 0, bgra_to_hls_full_32f} - }, - { - {bgr_to_hls4_full_8u, 0, 0, 0, 0, bgr_to_hls4_full_32f}, - {bgra_to_hls4_full_8u, 0, 0, 0, 0, bgra_to_hls4_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hls_to_rgb_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hls_to_rgb_full_8u, 0, 0, 0, 0, hls_to_rgb_full_32f}, - {hls4_to_rgb_full_8u, 0, 0, 0, 0, hls4_to_rgb_full_32f} - }, - { - {hls_to_rgba_full_8u, 0, 0, 0, 0, hls_to_rgba_full_32f}, - {hls4_to_rgba_full_8u, 0, 0, 0, 0, hls4_to_rgba_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void hls_to_bgr_full(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][6] = - { - { - {hls_to_bgr_full_8u, 0, 0, 0, 0, hls_to_bgr_full_32f}, - {hls4_to_bgr_full_8u, 0, 0, 0, 0, hls4_to_bgr_full_32f} - }, - { - {hls_to_bgra_full_8u, 0, 0, 0, 0, hls_to_bgra_full_32f}, - {hls4_to_bgra_full_8u, 0, 0, 0, 0, hls4_to_bgra_full_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {bgr_to_lab_8u, bgr_to_lab_32f}, - {bgra_to_lab_8u, bgra_to_lab_32f} - }, - { - {bgr_to_lab4_8u, bgr_to_lab4_32f}, - {bgra_to_lab4_8u, bgra_to_lab4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {rgb_to_lab_8u, rgb_to_lab_32f}, - {rgba_to_lab_8u, rgba_to_lab_32f} - }, - { - {rgb_to_lab4_8u, rgb_to_lab4_32f}, - {rgba_to_lab4_8u, rgba_to_lab4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lbgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lbgr_to_lab_8u, lbgr_to_lab_32f}, - {lbgra_to_lab_8u, lbgra_to_lab_32f} - }, - { - {lbgr_to_lab4_8u, lbgr_to_lab4_32f}, - {lbgra_to_lab4_8u, lbgra_to_lab4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lrgb_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lrgb_to_lab_8u, lrgb_to_lab_32f}, - {lrgba_to_lab_8u, lrgba_to_lab_32f} - }, - { - {lrgb_to_lab4_8u, lrgb_to_lab4_32f}, - {lrgba_to_lab4_8u, lrgba_to_lab4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lab_to_bgr_8u, lab_to_bgr_32f}, - {lab4_to_bgr_8u, lab4_to_bgr_32f} - }, - { - {lab_to_bgra_8u, lab_to_bgra_32f}, - {lab4_to_bgra_8u, lab4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lab_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lab_to_rgb_8u, lab_to_rgb_32f}, - {lab4_to_rgb_8u, lab4_to_rgb_32f} - }, - { - {lab_to_rgba_8u, lab_to_rgba_32f}, - {lab4_to_rgba_8u, lab4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lab_to_lbgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lab_to_lbgr_8u, lab_to_lbgr_32f}, - {lab4_to_lbgr_8u, lab4_to_lbgr_32f} - }, - { - {lab_to_lbgra_8u, lab_to_lbgra_32f}, - {lab4_to_lbgra_8u, lab4_to_lbgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lab_to_lrgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lab_to_lrgb_8u, lab_to_lrgb_32f}, - {lab4_to_lrgb_8u, lab4_to_lrgb_32f} - }, - { - {lab_to_lrgba_8u, lab_to_lrgba_32f}, - {lab4_to_lrgba_8u, lab4_to_lrgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void bgr_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {bgr_to_luv_8u, bgr_to_luv_32f}, - {bgra_to_luv_8u, bgra_to_luv_32f} - }, - { - {bgr_to_luv4_8u, bgr_to_luv4_32f}, - {bgra_to_luv4_8u, bgra_to_luv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {rgb_to_luv_8u, rgb_to_luv_32f}, - {rgba_to_luv_8u, rgba_to_luv_32f} - }, - { - {rgb_to_luv4_8u, rgb_to_luv4_32f}, - {rgba_to_luv4_8u, rgba_to_luv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lbgr_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lbgr_to_luv_8u, lbgr_to_luv_32f}, - {lbgra_to_luv_8u, lbgra_to_luv_32f} - }, - { - {lbgr_to_luv4_8u, lbgr_to_luv4_32f}, - {lbgra_to_luv4_8u, lbgra_to_luv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void lrgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {lrgb_to_luv_8u, lrgb_to_luv_32f}, - {lrgba_to_luv_8u, lrgba_to_luv_32f} - }, - { - {lrgb_to_luv4_8u, lrgb_to_luv4_32f}, - {lrgba_to_luv4_8u, lrgba_to_luv4_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void luv_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {luv_to_bgr_8u, luv_to_bgr_32f}, - {luv4_to_bgr_8u, luv4_to_bgr_32f} - }, - { - {luv_to_bgra_8u, luv_to_bgra_32f}, - {luv4_to_bgra_8u, luv4_to_bgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {luv_to_rgb_8u, luv_to_rgb_32f}, - {luv4_to_rgb_8u, luv4_to_rgb_32f} - }, - { - {luv_to_rgba_8u, luv_to_rgba_32f}, - {luv4_to_rgba_8u, luv4_to_rgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void luv_to_lbgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {luv_to_lbgr_8u, luv_to_lbgr_32f}, - {luv4_to_lbgr_8u, luv4_to_lbgr_32f} - }, - { - {luv_to_lbgra_8u, luv_to_lbgra_32f}, - {luv4_to_lbgra_8u, luv4_to_lbgra_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void luv_to_lrgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - using namespace cv::gpu::cudev; - static const gpu_func_t funcs[2][2][2] = - { - { - {luv_to_lrgb_8u, luv_to_lrgb_32f}, - {luv4_to_lrgb_8u, luv4_to_lrgb_32f} - }, - { - {luv_to_lrgba_8u, luv_to_lrgba_32f}, - {luv4_to_lrgba_8u, luv4_to_lrgba_32f} - } - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.depth() == CV_8U || src.depth() == CV_32F); - CV_Assert(src.channels() == 3 || src.channels() == 4); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKE_TYPE(src.depth(), dcn)); - - funcs[dcn == 4][src.channels() == 4][src.depth() == CV_32F](src, dst, StreamAccessor::getStream(stream)); - } - - void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& st) - { - #if (CUDA_VERSION < 5000) - (void)src; - (void)dst; - (void)st; - CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); - #else - CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4); - - dst.create(src.size(), src.type()); - - cudaStream_t stream = StreamAccessor::getStream(st); - NppStreamHandler h(stream); - - NppiSize oSizeROI; - oSizeROI.width = src.cols; - oSizeROI.height = src.rows; - - if (src.depth() == CV_8U) - nppSafeCall( nppiAlphaPremul_8u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); - else - nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - #endif - } - - void bayer_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, bool blue_last, bool start_with_green, Stream& stream) - { - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - static const func_t funcs[3][4] = - { - {0,0,Bayer2BGR_8u_gpu<3>, Bayer2BGR_8u_gpu<4>}, - {0,0,0,0}, - {0,0,Bayer2BGR_16u_gpu<3>, Bayer2BGR_16u_gpu<4>} - }; - - if (dcn <= 0) dcn = 3; - - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1); - CV_Assert(src.rows > 2 && src.cols > 2); - CV_Assert(dcn == 3 || dcn == 4); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - - funcs[src.depth()][dcn - 1](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream)); - } - void bayerBG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - bayer_to_bgr(src, dst, dcn, false, false, stream); - } - void bayerGB_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - bayer_to_bgr(src, dst, dcn, false, true, stream); - } - void bayerRG_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - bayer_to_bgr(src, dst, dcn, true, false, stream); - } - void bayerGR_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) - { - bayer_to_bgr(src, dst, dcn, true, true, stream); - } - - void bayer_to_gray(const GpuMat& src, GpuMat& dst, bool blue_last, bool start_with_green, Stream& stream) - { - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - static const func_t funcs[3] = - { - Bayer2BGR_8u_gpu<1>, - 0, - Bayer2BGR_16u_gpu<1>, - }; - - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1); - CV_Assert(src.rows > 2 && src.cols > 2); - - dst.create(src.size(), CV_MAKETYPE(src.depth(), 1)); - - funcs[src.depth()](src, dst, blue_last, start_with_green, StreamAccessor::getStream(stream)); - } - void bayerBG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) - { - bayer_to_gray(src, dst, false, false, stream); - } - void bayerGB_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) - { - bayer_to_gray(src, dst, false, true, stream); - } - void bayerRG_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) - { - bayer_to_gray(src, dst, true, false, stream); - } - void bayerGR_to_gray(const GpuMat& src, GpuMat& dst, int /*dcn*/, Stream& stream) - { - bayer_to_gray(src, dst, true, true, stream); - } -} - -void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream) -{ - typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream); - static const func_t funcs[] = - { - bgr_to_bgra, // CV_BGR2BGRA =0 - bgra_to_bgr, // CV_BGRA2BGR =1 - bgr_to_rgba, // CV_BGR2RGBA =2 - bgra_to_rgb, // CV_RGBA2BGR =3 - bgr_to_rgb, // CV_BGR2RGB =4 - bgra_to_rgba, // CV_BGRA2RGBA =5 - - bgr_to_gray, // CV_BGR2GRAY =6 - rgb_to_gray, // CV_RGB2GRAY =7 - gray_to_bgr, // CV_GRAY2BGR =8 - gray_to_bgra, // CV_GRAY2BGRA =9 - bgra_to_gray, // CV_BGRA2GRAY =10 - rgba_to_gray, // CV_RGBA2GRAY =11 - - bgr_to_bgr565, // CV_BGR2BGR565 =12 - rgb_to_bgr565, // CV_RGB2BGR565 =13 - bgr565_to_bgr, // CV_BGR5652BGR =14 - bgr565_to_rgb, // CV_BGR5652RGB =15 - bgra_to_bgr565, // CV_BGRA2BGR565 =16 - rgba_to_bgr565, // CV_RGBA2BGR565 =17 - bgr565_to_bgra, // CV_BGR5652BGRA =18 - bgr565_to_rgba, // CV_BGR5652RGBA =19 - - gray_to_bgr565, // CV_GRAY2BGR565 =20 - bgr565_to_gray, // CV_BGR5652GRAY =21 - - bgr_to_bgr555, // CV_BGR2BGR555 =22 - rgb_to_bgr555, // CV_RGB2BGR555 =23 - bgr555_to_bgr, // CV_BGR5552BGR =24 - bgr555_to_rgb, // CV_BGR5552RGB =25 - bgra_to_bgr555, // CV_BGRA2BGR555 =26 - rgba_to_bgr555, // CV_RGBA2BGR555 =27 - bgr555_to_bgra, // CV_BGR5552BGRA =28 - bgr555_to_rgba, // CV_BGR5552RGBA =29 - - gray_to_bgr555, // CV_GRAY2BGR555 =30 - bgr555_to_gray, // CV_BGR5552GRAY =31 - - bgr_to_xyz, // CV_BGR2XYZ =32 - rgb_to_xyz, // CV_RGB2XYZ =33 - xyz_to_bgr, // CV_XYZ2BGR =34 - xyz_to_rgb, // CV_XYZ2RGB =35 - - bgr_to_YCrCb, // CV_BGR2YCrCb =36 - rgb_to_YCrCb, // CV_RGB2YCrCb =37 - YCrCb_to_bgr, // CV_YCrCb2BGR =38 - YCrCb_to_rgb, // CV_YCrCb2RGB =39 - - bgr_to_hsv, // CV_BGR2HSV =40 - rgb_to_hsv, // CV_RGB2HSV =41 - - 0, // =42 - 0, // =43 - - bgr_to_lab, // CV_BGR2Lab =44 - rgb_to_lab, // CV_RGB2Lab =45 - - bayerBG_to_bgr, // CV_BayerBG2BGR =46 - bayerGB_to_bgr, // CV_BayerGB2BGR =47 - bayerRG_to_bgr, // CV_BayerRG2BGR =48 - bayerGR_to_bgr, // CV_BayerGR2BGR =49 - - bgr_to_luv, // CV_BGR2Luv =50 - rgb_to_luv, // CV_RGB2Luv =51 - - bgr_to_hls, // CV_BGR2HLS =52 - rgb_to_hls, // CV_RGB2HLS =53 - - hsv_to_bgr, // CV_HSV2BGR =54 - hsv_to_rgb, // CV_HSV2RGB =55 - - lab_to_bgr, // CV_Lab2BGR =56 - lab_to_rgb, // CV_Lab2RGB =57 - luv_to_bgr, // CV_Luv2BGR =58 - luv_to_rgb, // CV_Luv2RGB =59 - - hls_to_bgr, // CV_HLS2BGR =60 - hls_to_rgb, // CV_HLS2RGB =61 - - 0, // CV_BayerBG2BGR_VNG =62 - 0, // CV_BayerGB2BGR_VNG =63 - 0, // CV_BayerRG2BGR_VNG =64 - 0, // CV_BayerGR2BGR_VNG =65 - - bgr_to_hsv_full, // CV_BGR2HSV_FULL = 66 - rgb_to_hsv_full, // CV_RGB2HSV_FULL = 67 - bgr_to_hls_full, // CV_BGR2HLS_FULL = 68 - rgb_to_hls_full, // CV_RGB2HLS_FULL = 69 - - hsv_to_bgr_full, // CV_HSV2BGR_FULL = 70 - hsv_to_rgb_full, // CV_HSV2RGB_FULL = 71 - hls_to_bgr_full, // CV_HLS2BGR_FULL = 72 - hls_to_rgb_full, // CV_HLS2RGB_FULL = 73 - - lbgr_to_lab, // CV_LBGR2Lab = 74 - lrgb_to_lab, // CV_LRGB2Lab = 75 - lbgr_to_luv, // CV_LBGR2Luv = 76 - lrgb_to_luv, // CV_LRGB2Luv = 77 - - lab_to_lbgr, // CV_Lab2LBGR = 78 - lab_to_lrgb, // CV_Lab2LRGB = 79 - luv_to_lbgr, // CV_Luv2LBGR = 80 - luv_to_lrgb, // CV_Luv2LRGB = 81 - - bgr_to_yuv, // CV_BGR2YUV = 82 - rgb_to_yuv, // CV_RGB2YUV = 83 - yuv_to_bgr, // CV_YUV2BGR = 84 - yuv_to_rgb, // CV_YUV2RGB = 85 - - bayerBG_to_gray, // CV_BayerBG2GRAY = 86 - bayerGB_to_gray, // CV_BayerGB2GRAY = 87 - bayerRG_to_gray, // CV_BayerRG2GRAY = 88 - bayerGR_to_gray, // CV_BayerGR2GRAY = 89 - - //YUV 4:2:0 formats family - 0, // CV_YUV2RGB_NV12 = 90, - 0, // CV_YUV2BGR_NV12 = 91, - 0, // CV_YUV2RGB_NV21 = 92, - 0, // CV_YUV2BGR_NV21 = 93, - - 0, // CV_YUV2RGBA_NV12 = 94, - 0, // CV_YUV2BGRA_NV12 = 95, - 0, // CV_YUV2RGBA_NV21 = 96, - 0, // CV_YUV2BGRA_NV21 = 97, - - 0, // CV_YUV2RGB_YV12 = 98, - 0, // CV_YUV2BGR_YV12 = 99, - 0, // CV_YUV2RGB_IYUV = 100, - 0, // CV_YUV2BGR_IYUV = 101, - - 0, // CV_YUV2RGBA_YV12 = 102, - 0, // CV_YUV2BGRA_YV12 = 103, - 0, // CV_YUV2RGBA_IYUV = 104, - 0, // CV_YUV2BGRA_IYUV = 105, - - 0, // CV_YUV2GRAY_420 = 106, - - //YUV 4:2:2 formats family - 0, // CV_YUV2RGB_UYVY = 107, - 0, // CV_YUV2BGR_UYVY = 108, - 0, // //CV_YUV2RGB_VYUY = 109, - 0, // //CV_YUV2BGR_VYUY = 110, - - 0, // CV_YUV2RGBA_UYVY = 111, - 0, // CV_YUV2BGRA_UYVY = 112, - 0, // //CV_YUV2RGBA_VYUY = 113, - 0, // //CV_YUV2BGRA_VYUY = 114, - - 0, // CV_YUV2RGB_YUY2 = 115, - 0, // CV_YUV2BGR_YUY2 = 116, - 0, // CV_YUV2RGB_YVYU = 117, - 0, // CV_YUV2BGR_YVYU = 118, - - 0, // CV_YUV2RGBA_YUY2 = 119, - 0, // CV_YUV2BGRA_YUY2 = 120, - 0, // CV_YUV2RGBA_YVYU = 121, - 0, // CV_YUV2BGRA_YVYU = 122, - - 0, // CV_YUV2GRAY_UYVY = 123, - 0, // CV_YUV2GRAY_YUY2 = 124, - - // alpha premultiplication - rgba_to_mbgra, // CV_RGBA2mRGBA = 125, - 0, // CV_mRGBA2RGBA = 126, - - 0, // CV_COLORCVT_MAX = 127 - }; - - CV_Assert(code < 128); - - func_t func = funcs[code]; - - if (func == 0) - CV_Error( cv::Error::StsBadFlag, "Unknown/unsupported color conversion code" ); - - func(src, dst, dcn, stream); -} - -void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream) -{ - const int depth = src.depth(); - - CV_Assert( src.channels() == 1 ); - - switch (code) - { - case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY: - bayer_to_gray(src, dst, code == cv::COLOR_BayerBG2GRAY || code == cv::COLOR_BayerGB2GRAY, code == cv::COLOR_BayerGB2GRAY || code == cv::COLOR_BayerGR2GRAY, stream); - break; - - case cv::COLOR_BayerBG2BGR: case cv::COLOR_BayerGB2BGR: case cv::COLOR_BayerRG2BGR: case cv::COLOR_BayerGR2BGR: - bayer_to_bgr(src, dst, dcn, code == cv::COLOR_BayerBG2BGR || code == cv::COLOR_BayerGB2BGR, code == cv::COLOR_BayerGB2BGR || code == cv::COLOR_BayerGR2BGR, stream); - break; - - case COLOR_BayerBG2BGR_MHT: case COLOR_BayerGB2BGR_MHT: case COLOR_BayerRG2BGR_MHT: case COLOR_BayerGR2BGR_MHT: - { - if (dcn <= 0) - dcn = 3; - - CV_Assert( depth == CV_8U ); - CV_Assert( dcn == 3 || dcn == 4 ); - - dst.create(src.size(), CV_MAKETYPE(depth, dcn)); - dst.setTo(Scalar::all(0)); - - Size wholeSize; - Point ofs; - src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); - - const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, - code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - - if (dcn == 3) - cudev::MHCdemosaic<3>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - else - cudev::MHCdemosaic<4>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - - break; - } - - case COLOR_BayerBG2GRAY_MHT: case COLOR_BayerGB2GRAY_MHT: case COLOR_BayerRG2GRAY_MHT: case COLOR_BayerGR2GRAY_MHT: - { - CV_Assert( depth == CV_8U ); - - dst.create(src.size(), CV_MAKETYPE(depth, 1)); - dst.setTo(Scalar::all(0)); - - Size wholeSize; - Point ofs; - src.locateROI(wholeSize, ofs); - PtrStepSzb srcWhole(wholeSize.height, wholeSize.width, src.datastart, src.step); - - const int2 firstRed = make_int2(code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGB2BGR_MHT ? 0 : 1, - code == COLOR_BayerRG2BGR_MHT || code == COLOR_BayerGR2BGR_MHT ? 0 : 1); - - cudev::MHCdemosaic<1>(srcWhole, make_int2(ofs.x, ofs.y), dst, firstRed, StreamAccessor::getStream(stream)); - - break; - } - - default: - CV_Error( cv::Error::StsBadFlag, "Unknown / unsupported color conversion code" ); - } -} - -void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s) -{ - CV_Assert(image.type() == CV_8UC4); - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - NppiSize sz; - sz.width = image.cols; - sz.height = image.rows; - - nppSafeCall( nppiSwapChannels_8u_C4IR(image.ptr(), static_cast(image.step), sz, dstOrder) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -} - -void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream) -{ -#if (CUDA_VERSION < 5000) - (void)src; - (void)dst; - (void)forward; - (void)stream; - CV_Error( cv::Error::StsNotImplemented, "This function works only with CUDA 5.0 or higher" ); -#else - typedef NppStatus (*func_t)(const Npp8u* pSrc, int nSrcStep, Npp8u* pDst, int nDstStep, NppiSize oSizeROI); - typedef NppStatus (*func_inplace_t)(Npp8u* pSrcDst, int nSrcDstStep, NppiSize oSizeROI); - - static const func_t funcs[2][5] = - { - {0, 0, 0, nppiGammaInv_8u_C3R, nppiGammaInv_8u_AC4R}, - {0, 0, 0, nppiGammaFwd_8u_C3R, nppiGammaFwd_8u_AC4R} - }; - static const func_inplace_t funcs_inplace[2][5] = - { - {0, 0, 0, nppiGammaInv_8u_C3IR, nppiGammaInv_8u_AC4IR}, - {0, 0, 0, nppiGammaFwd_8u_C3IR, nppiGammaFwd_8u_AC4IR} - }; - - CV_Assert(src.type() == CV_8UC3 || src.type() == CV_8UC4); - - dst.create(src.size(), src.type()); - - NppStreamHandler h(StreamAccessor::getStream(stream)); - - NppiSize oSizeROI; - oSizeROI.width = src.cols; - oSizeROI.height = src.rows; - - if (dst.data == src.data) - funcs_inplace[forward][src.channels()](dst.ptr(), static_cast(src.step), oSizeROI); - else - funcs[forward][src.channels()](src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI); - -#endif -} - -#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/cuda/bilateral_filter.cu b/modules/gpu/src/cuda/bilateral_filter.cu deleted file mode 100644 index 4449274548..0000000000 --- a/modules/gpu/src/cuda/bilateral_filter.cu +++ /dev/null @@ -1,199 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" - -using namespace cv::gpu; - -typedef unsigned char uchar; -typedef unsigned short ushort; - -////////////////////////////////////////////////////////////////////////////////// -/// Bilateral filtering - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - __device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); } - __device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); } - __device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); } - __device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); } - - __device__ __forceinline__ float sqr(const float& a) { return a * a; } - - template - __global__ void bilateral_kernel(const PtrStepSz src, PtrStep dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half) - { - typedef typename TypeVec::cn>::vec_type value_type; - - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - - if (x >= src.cols || y >= src.rows) - return; - - value_type center = saturate_cast(src(y, x)); - - value_type sum1 = VecTraits::all(0); - float sum2 = 0; - - int r = ksz / 2; - float r2 = (float)(r * r); - - int tx = x - r + ksz; - int ty = y - r + ksz; - - if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows) - { - for (int cy = y - r; cy < ty; ++cy) - for (int cx = x - r; cx < tx; ++cx) - { - float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy); - if (space2 > r2) - continue; - - value_type value = saturate_cast(src(cy, cx)); - - float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half); - sum1 = sum1 + weight * value; - sum2 = sum2 + weight; - } - } - else - { - for (int cy = y - r; cy < ty; ++cy) - for (int cx = x - r; cx < tx; ++cx) - { - float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy); - if (space2 > r2) - continue; - - value_type value = saturate_cast(b.at(cy, cx, src.data, src.step)); - - float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half); - - sum1 = sum1 + weight * value; - sum2 = sum2 + weight; - } - } - dst(y, x) = saturate_cast(sum1 / sum2); - } - - template class B> - void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream) - { - dim3 block (32, 8); - dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y)); - - B b(src.rows, src.cols); - - float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial); - float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color); - - cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel >, cudaFuncCachePreferL1) ); - bilateral_kernel<<>>((PtrStepSz)src, (PtrStepSz)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half); - cudaSafeCall ( cudaGetLastError () ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template - void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream) - { - typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream); - - static caller_t funcs[] = - { - bilateral_caller, - bilateral_caller, - bilateral_caller, - bilateral_caller, - bilateral_caller, - }; - funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream); - } - } -}}} - - -#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \ - template void cv::gpu::cudev::imgproc::bilateral_filter_gpu(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t); - -OCV_INSTANTIATE_BILATERAL_FILTER(uchar) -//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2) -OCV_INSTANTIATE_BILATERAL_FILTER(uchar3) -OCV_INSTANTIATE_BILATERAL_FILTER(uchar4) - -//OCV_INSTANTIATE_BILATERAL_FILTER(schar) -//OCV_INSTANTIATE_BILATERAL_FILTER(schar2) -//OCV_INSTANTIATE_BILATERAL_FILTER(schar3) -//OCV_INSTANTIATE_BILATERAL_FILTER(schar4) - -OCV_INSTANTIATE_BILATERAL_FILTER(short) -//OCV_INSTANTIATE_BILATERAL_FILTER(short2) -OCV_INSTANTIATE_BILATERAL_FILTER(short3) -OCV_INSTANTIATE_BILATERAL_FILTER(short4) - -OCV_INSTANTIATE_BILATERAL_FILTER(ushort) -//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2) -OCV_INSTANTIATE_BILATERAL_FILTER(ushort3) -OCV_INSTANTIATE_BILATERAL_FILTER(ushort4) - -//OCV_INSTANTIATE_BILATERAL_FILTER(int) -//OCV_INSTANTIATE_BILATERAL_FILTER(int2) -//OCV_INSTANTIATE_BILATERAL_FILTER(int3) -//OCV_INSTANTIATE_BILATERAL_FILTER(int4) - -OCV_INSTANTIATE_BILATERAL_FILTER(float) -//OCV_INSTANTIATE_BILATERAL_FILTER(float2) -OCV_INSTANTIATE_BILATERAL_FILTER(float3) -OCV_INSTANTIATE_BILATERAL_FILTER(float4) - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/blend.cu b/modules/gpu/src/cuda/blend.cu deleted file mode 100644 index be8c0b2f35..0000000000 --- a/modules/gpu/src/cuda/blend.cu +++ /dev/null @@ -1,121 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace blend - { - template - __global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep img1, const PtrStep img2, - const PtrStepf weights1, const PtrStepf weights2, PtrStep result) - { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y < rows && x < cols) - { - int x_ = x / cn; - float w1 = weights1.ptr(y)[x_]; - float w2 = weights2.ptr(y)[x_]; - T p1 = img1.ptr(y)[x]; - T p2 = img2.ptr(y)[x]; - result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f); - } - } - - template - void blendLinearCaller(int rows, int cols, int cn, PtrStep img1, PtrStep img2, PtrStepf weights1, PtrStepf weights2, PtrStep result, cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y)); - - blendLinearKernel<<>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - - template void blendLinearCaller(int, int, int, PtrStep, PtrStep, PtrStepf, PtrStepf, PtrStep, cudaStream_t stream); - template void blendLinearCaller(int, int, int, PtrStep, PtrStep, PtrStepf, PtrStepf, PtrStep, cudaStream_t stream); - - - __global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2, - const PtrStepf weights1, const PtrStepf weights2, PtrStepb result) - { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y < rows && x < cols) - { - float w1 = weights1.ptr(y)[x]; - float w2 = weights2.ptr(y)[x]; - float sum_inv = 1.f / (w1 + w2 + 1e-5f); - w1 *= sum_inv; - w2 *= sum_inv; - uchar4 p1 = ((const uchar4*)img1.ptr(y))[x]; - uchar4 p2 = ((const uchar4*)img2.ptr(y))[x]; - ((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2, - p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2); - } - } - - void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - blendLinearKernel8UC4<<>>(rows, cols, img1, img2, weights1, weights2, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - } // namespace blend -}}} // namespace cv { namespace gpu { namespace cudev - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu deleted file mode 100644 index 042e9afcc6..0000000000 --- a/modules/gpu/src/cuda/canny.cu +++ /dev/null @@ -1,494 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include -#include //std::swap -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/emulation.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/utility.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace canny -{ - struct L1 : binary_function - { - __device__ __forceinline__ float operator ()(int x, int y) const - { - return ::abs(x) + ::abs(y); - } - - __device__ __forceinline__ L1() {} - __device__ __forceinline__ L1(const L1&) {} - }; - struct L2 : binary_function - { - __device__ __forceinline__ float operator ()(int x, int y) const - { - return ::sqrtf(x * x + y * y); - } - - __device__ __forceinline__ L2() {} - __device__ __forceinline__ L2(const L2&) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; -}}} - -namespace canny -{ - texture tex_src(false, cudaFilterModePoint, cudaAddressModeClamp); - struct SrcTex - { - const int xoff; - const int yoff; - __host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {} - - __device__ __forceinline__ int operator ()(int y, int x) const - { - return tex2D(tex_src, x + xoff, y + yoff); - } - }; - - template __global__ - void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y >= mag.rows || x >= mag.cols) - return; - - int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1)); - int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1)); - - dx(y, x) = dxVal; - dy(y, x) = dyVal; - - mag(y, x) = norm(dxVal, dyVal); - } - - void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad) - { - const dim3 block(16, 16); - const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y)); - - bindTexture(&tex_src, srcWhole); - SrcTex src(xoff, yoff); - - if (L2Grad) - { - L2 norm; - calcMagnitudeKernel<<>>(src, dx, dy, mag, norm); - } - else - { - L1 norm; - calcMagnitudeKernel<<>>(src, dx, dy, mag, norm); - } - - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall(cudaThreadSynchronize()); - } - - void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad) - { - if (L2Grad) - { - L2 norm; - transform(dx, dy, mag, norm, WithOutMask(), 0); - } - else - { - L1 norm; - transform(dx, dy, mag, norm, WithOutMask(), 0); - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// - -namespace canny -{ - texture tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh) - { - const int CANNY_SHIFT = 15; - const int TG22 = (int)(0.4142135623730950488016887242097*(1<= dx.cols - 1 || y == 0 || y >= dx.rows - 1) - return; - - int dxVal = dx(y, x); - int dyVal = dy(y, x); - - const int s = (dxVal ^ dyVal) < 0 ? -1 : 1; - const float m = tex2D(tex_mag, x, y); - - dxVal = ::abs(dxVal); - dyVal = ::abs(dyVal); - - // 0 - the pixel can not belong to an edge - // 1 - the pixel might belong to an edge - // 2 - the pixel does belong to an edge - int edge_type = 0; - - if (m > low_thresh) - { - const int tg22x = dxVal * TG22; - const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT); - - dyVal <<= CANNY_SHIFT; - - if (dyVal < tg22x) - { - if (m > tex2D(tex_mag, x - 1, y) && m >= tex2D(tex_mag, x + 1, y)) - edge_type = 1 + (int)(m > high_thresh); - } - else if(dyVal > tg67x) - { - if (m > tex2D(tex_mag, x, y - 1) && m >= tex2D(tex_mag, x, y + 1)) - edge_type = 1 + (int)(m > high_thresh); - } - else - { - if (m > tex2D(tex_mag, x - s, y - 1) && m >= tex2D(tex_mag, x + s, y + 1)) - edge_type = 1 + (int)(m > high_thresh); - } - } - - map(y, x) = edge_type; - } - - void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh) - { - const dim3 block(16, 16); - const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y)); - - bindTexture(&tex_mag, mag); - - calcMapKernel<<>>(dx, dy, map, low_thresh, high_thresh); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -////////////////////////////////////////////////////////////////////////////////////////// - -namespace canny -{ - __device__ int counter = 0; - - __global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st) - { - __shared__ volatile int smem[18][18]; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? map(y, x) : 0; - if (threadIdx.y == 0) - smem[0][threadIdx.x + 1] = y > 0 ? map(y - 1, x) : 0; - if (threadIdx.y == blockDim.y - 1) - smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? map(y + 1, x) : 0; - if (threadIdx.x == 0) - smem[threadIdx.y + 1][0] = x > 0 ? map(y, x - 1) : 0; - if (threadIdx.x == blockDim.x - 1) - smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? map(y, x + 1) : 0; - if (threadIdx.x == 0 && threadIdx.y == 0) - smem[0][0] = y > 0 && x > 0 ? map(y - 1, x - 1) : 0; - if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0) - smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? map(y - 1, x + 1) : 0; - if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1) - smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? map(y + 1, x - 1) : 0; - if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1) - smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? map(y + 1, x + 1) : 0; - - __syncthreads(); - - if (x >= map.cols || y >= map.rows) - return; - - int n; - - #pragma unroll - for (int k = 0; k < 16; ++k) - { - n = 0; - - if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1) - { - n += smem[threadIdx.y ][threadIdx.x ] == 2; - n += smem[threadIdx.y ][threadIdx.x + 1] == 2; - n += smem[threadIdx.y ][threadIdx.x + 2] == 2; - - n += smem[threadIdx.y + 1][threadIdx.x ] == 2; - n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2; - - n += smem[threadIdx.y + 2][threadIdx.x ] == 2; - n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2; - n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2; - } - - if (n > 0) - smem[threadIdx.y + 1][threadIdx.x + 1] = 2; - } - - const int e = smem[threadIdx.y + 1][threadIdx.x + 1]; - - map(y, x) = e; - - n = 0; - - if (e == 2) - { - n += smem[threadIdx.y ][threadIdx.x ] == 1; - n += smem[threadIdx.y ][threadIdx.x + 1] == 1; - n += smem[threadIdx.y ][threadIdx.x + 2] == 1; - - n += smem[threadIdx.y + 1][threadIdx.x ] == 1; - n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1; - - n += smem[threadIdx.y + 2][threadIdx.x ] == 1; - n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1; - n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1; - } - - if (n > 0) - { - const int ind = ::atomicAdd(&counter, 1); - st[ind] = make_ushort2(x, y); - } - } - - void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1) - { - void* counter_ptr; - cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) ); - - cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); - - const dim3 block(16, 16); - const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y)); - - edgesHysteresisLocalKernel<<>>(map, st1); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -////////////////////////////////////////////////////////////////////////////////////////// - -namespace canny -{ - __constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; - __constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; - - __global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count) - { - const int stack_size = 512; - - __shared__ int s_counter; - __shared__ int s_ind; - __shared__ ushort2 s_st[stack_size]; - - if (threadIdx.x == 0) - s_counter = 0; - - __syncthreads(); - - int ind = blockIdx.y * gridDim.x + blockIdx.x; - - if (ind >= count) - return; - - ushort2 pos = st1[ind]; - - if (threadIdx.x < 8) - { - pos.x += c_dx[threadIdx.x]; - pos.y += c_dy[threadIdx.x]; - - if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1) - { - map(pos.y, pos.x) = 2; - - ind = Emulation::smem::atomicAdd(&s_counter, 1); - - s_st[ind] = pos; - } - } - - __syncthreads(); - - while (s_counter > 0 && s_counter <= stack_size - blockDim.x) - { - const int subTaskIdx = threadIdx.x >> 3; - const int portion = ::min(s_counter, blockDim.x >> 3); - - if (subTaskIdx < portion) - pos = s_st[s_counter - 1 - subTaskIdx]; - - __syncthreads(); - - if (threadIdx.x == 0) - s_counter -= portion; - - __syncthreads(); - - if (subTaskIdx < portion) - { - pos.x += c_dx[threadIdx.x & 7]; - pos.y += c_dy[threadIdx.x & 7]; - - if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1) - { - map(pos.y, pos.x) = 2; - - ind = Emulation::smem::atomicAdd(&s_counter, 1); - - s_st[ind] = pos; - } - } - - __syncthreads(); - } - - if (s_counter > 0) - { - if (threadIdx.x == 0) - { - ind = ::atomicAdd(&counter, s_counter); - s_ind = ind - s_counter; - } - - __syncthreads(); - - ind = s_ind; - - for (int i = threadIdx.x; i < s_counter; i += blockDim.x) - st2[ind + i] = s_st[i]; - } - } - - void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2) - { - void* counter_ptr; - cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) ); - - int count; - cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); - - while (count > 0) - { - cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); - - const dim3 block(128); - const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1); - - edgesHysteresisGlobalKernel<<>>(map, st1, st2, count); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); - - std::swap(st1, st2); - } - } -} - -////////////////////////////////////////////////////////////////////////////////////////// - -namespace canny -{ - struct GetEdges : unary_function - { - __device__ __forceinline__ uchar operator ()(int e) const - { - return (uchar)(-(e >> 1)); - } - - __device__ __forceinline__ GetEdges() {} - __device__ __forceinline__ GetEdges(const GetEdges&) {} - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; -}}} - -namespace canny -{ - void getEdges(PtrStepSzi map, PtrStepSzb dst) - { - transform(map, dst, GetEdges(), WithOutMask(), 0); - } -} - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu deleted file mode 100644 index 9552f1b06f..0000000000 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ /dev/null @@ -1,534 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include -#include -#include -#include - -#include -#include - -namespace cv { namespace gpu { namespace cudev -{ - namespace ccl - { - enum - { - WARP_SIZE = 32, - WARP_LOG = 5, - - CTA_SIZE_X = 32, - CTA_SIZE_Y = 8, - - STA_SIZE_MERGE_Y = 4, - STA_SIZE_MERGE_X = 32, - - TPB_X = 1, - TPB_Y = 4, - - TILE_COLS = CTA_SIZE_X * TPB_X, - TILE_ROWS = CTA_SIZE_Y * TPB_Y - }; - - template struct IntervalsTraits - { - typedef T elem_type; - }; - - template<> struct IntervalsTraits - { - typedef int dist_type; - enum {ch = 1}; - }; - - template<> struct IntervalsTraits - { - typedef int3 dist_type; - enum {ch = 3}; - }; - - template<> struct IntervalsTraits - { - typedef int4 dist_type; - enum {ch = 4}; - }; - - template<> struct IntervalsTraits - { - typedef int dist_type; - enum {ch = 1}; - }; - - template<> struct IntervalsTraits - { - typedef int3 dist_type; - enum {ch = 3}; - }; - - template<> struct IntervalsTraits - { - typedef int4 dist_type; - enum {ch = 4}; - }; - - template<> struct IntervalsTraits - { - typedef float dist_type; - enum {ch = 1}; - }; - - template<> struct IntervalsTraits - { - typedef int dist_type; - enum {ch = 1}; - }; - - typedef unsigned char component; - enum Edges { UP = 1, DOWN = 2, LEFT = 4, RIGHT = 8, EMPTY = 0xF0 }; - - template struct InInterval {}; - - template struct InInterval - { - typedef typename VecTraits::elem_type E; - __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) : lo((E)(-_lo.x)), hi((E)_hi.x) {}; - T lo, hi; - - template __device__ __forceinline__ bool operator() (const I& a, const I& b) const - { - I d = a - b; - return lo <= d && d <= hi; - } - }; - - - template struct InInterval - { - typedef typename VecTraits::elem_type E; - __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) - : lo (VecTraits::make((E)(-_lo.x), (E)(-_lo.y), (E)(-_lo.z))), hi (VecTraits::make((E)_hi.x, (E)_hi.y, (E)_hi.z)){}; - T lo, hi; - - template __device__ __forceinline__ bool operator() (const I& a, const I& b) const - { - I d = a - b; - return lo.x <= d.x && d.x <= hi.x && - lo.y <= d.y && d.y <= hi.y && - lo.z <= d.z && d.z <= hi.z; - } - }; - - template struct InInterval - { - typedef typename VecTraits::elem_type E; - __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) - : lo (VecTraits::make((E)(-_lo.x), (E)(-_lo.y), (E)(-_lo.z), (E)(-_lo.w))), hi (VecTraits::make((E)_hi.x, (E)_hi.y, (E)_hi.z, (E)_hi.w)){}; - T lo, hi; - - template __device__ __forceinline__ bool operator() (const I& a, const I& b) const - { - I d = a - b; - return lo.x <= d.x && d.x <= hi.x && - lo.y <= d.y && d.y <= hi.y && - lo.z <= d.z && d.z <= hi.z && - lo.w <= d.w && d.w <= hi.w; - } - }; - - - template - __global__ void computeConnectivity(const PtrStepSz image, PtrStepSzb components, F connected) - { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - - if (x >= image.cols || y >= image.rows) return; - - T intensity = image(y, x); - component c = 0; - - if ( x > 0 && connected(intensity, image(y, x - 1))) - c |= LEFT; - - if ( y > 0 && connected(intensity, image(y - 1, x))) - c |= UP; - - if ( x + 1 < image.cols && connected(intensity, image(y, x + 1))) - c |= RIGHT; - - if ( y + 1 < image.rows && connected(intensity, image(y + 1, x))) - c |= DOWN; - - components(y, x) = c; - } - - template< typename T> - void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream) - { - dim3 block(CTA_SIZE_X, CTA_SIZE_Y); - dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y)); - - typedef InInterval::dist_type, IntervalsTraits::ch> Int_t; - - Int_t inInt(lo, hi); - computeConnectivity<<>>(static_cast >(image), edges, inInt); - - cudaSafeCall( cudaGetLastError() ); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges(const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - template void computeEdges (const PtrStepSzb& image, PtrStepSzb edges, const float4& lo, const float4& hi, cudaStream_t stream); - - __global__ void lableTiles(const PtrStepSzb edges, PtrStepSzi comps) - { - int x = threadIdx.x + blockIdx.x * TILE_COLS; - int y = threadIdx.y + blockIdx.y * TILE_ROWS; - - if (x >= edges.cols || y >= edges.rows) return; - - //currently x is 1 - int bounds = ((y + TPB_Y) < edges.rows); - - __shared__ int labelsTile[TILE_ROWS][TILE_COLS]; - __shared__ int edgesTile[TILE_ROWS][TILE_COLS]; - - int new_labels[TPB_Y][TPB_X]; - int old_labels[TPB_Y][TPB_X]; - - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - int yloc = threadIdx.y + CTA_SIZE_Y * i; - int xloc = threadIdx.x + CTA_SIZE_X * j; - component c = edges(bounds * (y + CTA_SIZE_Y * i), x + CTA_SIZE_X * j); - - if (!xloc) c &= ~LEFT; - if (!yloc) c &= ~UP; - - if (xloc == TILE_COLS -1) c &= ~RIGHT; - if (yloc == TILE_ROWS -1) c &= ~DOWN; - - new_labels[i][j] = yloc * TILE_COLS + xloc; - edgesTile[yloc][xloc] = c; - } - - for (int k = 0; ;++k) - { - //1. backup - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - int yloc = threadIdx.y + CTA_SIZE_Y * i; - int xloc = threadIdx.x + CTA_SIZE_X * j; - - old_labels[i][j] = new_labels[i][j]; - labelsTile[yloc][xloc] = new_labels[i][j]; - } - - __syncthreads(); - - //2. compare local arrays - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - int yloc = threadIdx.y + CTA_SIZE_Y * i; - int xloc = threadIdx.x + CTA_SIZE_X * j; - - component c = edgesTile[yloc][xloc]; - int label = new_labels[i][j]; - - if (c & UP) - label = ::min(label, labelsTile[yloc - 1][xloc]); - - if (c & DOWN) - label = ::min(label, labelsTile[yloc + 1][xloc]); - - if (c & LEFT) - label = ::min(label, labelsTile[yloc][xloc - 1]); - - if (c & RIGHT) - label = ::min(label, labelsTile[yloc][xloc + 1]); - - new_labels[i][j] = label; - } - - __syncthreads(); - - //3. determine: Is any value changed? - int changed = 0; - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - if (new_labels[i][j] < old_labels[i][j]) - { - changed = 1; - Emulation::smem::atomicMin(&labelsTile[0][0] + old_labels[i][j], new_labels[i][j]); - } - } - - changed = Emulation::syncthreadsOr(changed); - - if (!changed) - break; - - //4. Compact paths - const int *labels = &labelsTile[0][0]; - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - int label = new_labels[i][j]; - - while( labels[label] < label ) label = labels[label]; - - new_labels[i][j] = label; - } - __syncthreads(); - } - - #pragma unroll - for (int i = 0; i < TPB_Y; ++i) - #pragma unroll - for (int j = 0; j < TPB_X; ++j) - { - int label = new_labels[i][j]; - int yloc = label / TILE_COLS; - int xloc = label - yloc * TILE_COLS; - - xloc += blockIdx.x * TILE_COLS; - yloc += blockIdx.y * TILE_ROWS; - - label = yloc * edges.cols + xloc; - // do it for x too. - if (y + CTA_SIZE_Y * i < comps.rows) comps(y + CTA_SIZE_Y * i, x + CTA_SIZE_X * j) = label; - } - } - - __device__ __forceinline__ int root(const PtrStepSzi& comps, int label) - { - while(1) - { - int y = label / comps.cols; - int x = label - y * comps.cols; - - int parent = comps(y, x); - - if (label == parent) break; - - label = parent; - } - return label; - } - - __device__ __forceinline__ void isConnected(PtrStepSzi& comps, int l1, int l2, bool& changed) - { - int r1 = root(comps, l1); - int r2 = root(comps, l2); - - if (r1 == r2) return; - - int mi = ::min(r1, r2); - int ma = ::max(r1, r2); - - int y = ma / comps.cols; - int x = ma - y * comps.cols; - - atomicMin(&comps.ptr(y)[x], mi); - changed = true; - } - - __global__ void crossMerge(const int tilesNumY, const int tilesNumX, int tileSizeY, int tileSizeX, - const PtrStepSzb edges, PtrStepSzi comps, const int yIncomplete, int xIncomplete) - { - int tid = threadIdx.y * blockDim.x + threadIdx.x; - int stride = blockDim.y * blockDim.x; - - int ybegin = blockIdx.y * (tilesNumY * tileSizeY); - int yend = ybegin + tilesNumY * tileSizeY; - - if (blockIdx.y == gridDim.y - 1) - { - yend -= yIncomplete * tileSizeY; - yend -= tileSizeY; - tileSizeY = (edges.rows % tileSizeY); - - yend += tileSizeY; - } - - int xbegin = blockIdx.x * tilesNumX * tileSizeX; - int xend = xbegin + tilesNumX * tileSizeX; - - if (blockIdx.x == gridDim.x - 1) - { - if (xIncomplete) yend = ybegin; - xend -= xIncomplete * tileSizeX; - xend -= tileSizeX; - tileSizeX = (edges.cols % tileSizeX); - - xend += tileSizeX; - } - - if (blockIdx.y == (gridDim.y - 1) && yIncomplete) - { - xend = xbegin; - } - - int tasksV = (tilesNumX - 1) * (yend - ybegin); - int tasksH = (tilesNumY - 1) * (xend - xbegin); - - int total = tasksH + tasksV; - - bool changed; - do - { - changed = false; - for (int taskIdx = tid; taskIdx < total; taskIdx += stride) - { - if (taskIdx < tasksH) - { - int indexH = taskIdx; - - int row = indexH / (xend - xbegin); - int col = indexH - row * (xend - xbegin); - - int y = ybegin + (row + 1) * tileSizeY; - int x = xbegin + col; - - component e = edges( x, y); - if (e & UP) - { - int lc = comps(y,x); - int lu = comps(y - 1, x); - - isConnected(comps, lc, lu, changed); - } - } - else - { - int indexV = taskIdx - tasksH; - - int col = indexV / (yend - ybegin); - int row = indexV - col * (yend - ybegin); - - int x = xbegin + (col + 1) * tileSizeX; - int y = ybegin + row; - - component e = edges(x, y); - if (e & LEFT) - { - int lc = comps(y, x); - int ll = comps(y, x - 1); - - isConnected(comps, lc, ll, changed); - } - } - } - } while (Emulation::syncthreadsOr(changed)); - } - - __global__ void flatten(const PtrStepSzb edges, PtrStepSzi comps) - { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - - if( x < comps.cols && y < comps.rows) - comps(y, x) = root(comps, comps(y, x)); - } - - enum {CC_NO_COMPACT = 0, CC_COMPACT_LABELS = 1}; - - void labelComponents(const PtrStepSzb& edges, PtrStepSzi comps, int flags, cudaStream_t stream) - { - (void) flags; - dim3 block(CTA_SIZE_X, CTA_SIZE_Y); - dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); - - lableTiles<<>>(edges, comps); - cudaSafeCall( cudaGetLastError() ); - - int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS; - while (grid.x > 1 || grid.y > 1) - { - dim3 mergeGrid((int)ceilf(grid.x / 2.f), (int)ceilf(grid.y / 2.f)); - dim3 mergeBlock(STA_SIZE_MERGE_X, STA_SIZE_MERGE_Y); - // debug log - // std::cout << "merging: " << grid.y << " x " << grid.x << " ---> " << mergeGrid.y << " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl; - crossMerge<<>>(2, 2, tileSizeY, tileSizeX, edges, comps, (int)ceilf(grid.y / 2.f) - grid.y / 2, (int)ceilf(grid.x / 2.f) - grid.x / 2); - tileSizeX <<= 1; - tileSizeY <<= 1; - grid = mergeGrid; - - cudaSafeCall( cudaGetLastError() ); - } - - grid.x = divUp(edges.cols, block.x); - grid.y = divUp(edges.rows, block.y); - flatten<<>>(edges, comps); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - } -} } } - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/clahe.cu b/modules/gpu/src/cuda/clahe.cu deleted file mode 100644 index 7c6645749b..0000000000 --- a/modules/gpu/src/cuda/clahe.cu +++ /dev/null @@ -1,186 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/emulation.hpp" -#include "opencv2/core/cuda/scan.hpp" -#include "opencv2/core/cuda/reduce.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace clahe -{ - __global__ void calcLutKernel(const PtrStepb src, PtrStepb lut, - const int2 tileSize, const int tilesX, - const int clipLimit, const float lutScale) - { - __shared__ int smem[512]; - - const int tx = blockIdx.x; - const int ty = blockIdx.y; - const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x; - - smem[tid] = 0; - __syncthreads(); - - for (int i = threadIdx.y; i < tileSize.y; i += blockDim.y) - { - const uchar* srcPtr = src.ptr(ty * tileSize.y + i) + tx * tileSize.x; - for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x) - { - const int data = srcPtr[j]; - Emulation::smem::atomicAdd(&smem[data], 1); - } - } - - __syncthreads(); - - int tHistVal = smem[tid]; - - __syncthreads(); - - if (clipLimit > 0) - { - // clip histogram bar - - int clipped = 0; - if (tHistVal > clipLimit) - { - clipped = tHistVal - clipLimit; - tHistVal = clipLimit; - } - - // find number of overall clipped samples - - reduce<256>(smem, clipped, tid, plus()); - - // broadcast evaluated value - - __shared__ int totalClipped; - - if (tid == 0) - totalClipped = clipped; - __syncthreads(); - - // redistribute clipped samples evenly - - int redistBatch = totalClipped / 256; - tHistVal += redistBatch; - - int residual = totalClipped - redistBatch * 256; - if (tid < residual) - ++tHistVal; - } - - const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid); - - lut(ty * tilesX + tx, tid) = saturate_cast(__float2int_rn(lutScale * lutVal)); - } - - void calcLut(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream) - { - const dim3 block(32, 8); - const dim3 grid(tilesX, tilesY); - - calcLutKernel<<>>(src, lut, tileSize, tilesX, clipLimit, lutScale); - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= src.cols || y >= src.rows) - return; - - const float tyf = (static_cast(y) / tileSize.y) - 0.5f; - int ty1 = __float2int_rd(tyf); - int ty2 = ty1 + 1; - const float ya = tyf - ty1; - ty1 = ::max(ty1, 0); - ty2 = ::min(ty2, tilesY - 1); - - const float txf = (static_cast(x) / tileSize.x) - 0.5f; - int tx1 = __float2int_rd(txf); - int tx2 = tx1 + 1; - const float xa = txf - tx1; - tx1 = ::max(tx1, 0); - tx2 = ::min(tx2, tilesX - 1); - - const int srcVal = src(y, x); - - float res = 0; - - res += lut(ty1 * tilesX + tx1, srcVal) * ((1.0f - xa) * (1.0f - ya)); - res += lut(ty1 * tilesX + tx2, srcVal) * ((xa) * (1.0f - ya)); - res += lut(ty2 * tilesX + tx1, srcVal) * ((1.0f - xa) * (ya)); - res += lut(ty2 * tilesX + tx2, srcVal) * ((xa) * (ya)); - - dst(y, x) = saturate_cast(res); - } - - void transform(PtrStepSzb src, PtrStepSzb dst, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream) - { - const dim3 block(32, 8); - const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) ); - - tranformKernel<<>>(src, dst, lut, tileSize, tilesX, tilesY); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -#endif // CUDA_DISABLER diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu deleted file mode 100644 index 1a5d4865ed..0000000000 --- a/modules/gpu/src/cuda/color.cu +++ /dev/null @@ -1,461 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/color.hpp" -#include "cvt_color_internal.h" - -namespace cv { namespace gpu { namespace cudev -{ - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits::functor_type) - { - enum { smart_block_dim_x = 8 }; - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type) - { - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type) - { - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits::functor_type) - { - enum { smart_block_dim_y = 8 }; - enum { smart_shift = 4 }; - }; - -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \ - void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) \ - { \ - traits::functor_type functor = traits::create_functor(); \ - typedef typename traits::functor_type::argument_type src_t; \ - typedef typename traits::functor_type::result_type dst_t; \ - cv::gpu::cudev::transform((PtrStepSz)src, (PtrStepSz)dst, functor, WithOutMask(), stream); \ - } - -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits) - -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) - -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) - -#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(name) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits) \ - OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv4) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv4) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgra) - - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgb) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgba) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgr) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgra) - OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgra) - - #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR - #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE - #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL - #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F - #undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL -}}} // namespace cv { namespace gpu { namespace cudev - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/debayer.cu b/modules/gpu/src/cuda/debayer.cu deleted file mode 100644 index 46a1c14ef4..0000000000 --- a/modules/gpu/src/cuda/debayer.cu +++ /dev/null @@ -1,544 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/color.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - template struct Bayer2BGR; - - template <> struct Bayer2BGR - { - uchar3 res0; - uchar3 res1; - uchar3 res2; - uchar3 res3; - - __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) - { - uchar4 patch[3][3]; - patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x]; - patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; - patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; - - patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x]; - patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)]; - patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; - - patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x]; - patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; - patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; - - if ((s_y & 1) ^ start_with_green) - { - const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1; - const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1; - - const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2; - const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2; - - const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1; - const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1; - - const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2; - const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2; - - if ((s_y & 1) ^ blue_last) - { - res0.x = t1; - res0.y = patch[1][1].x; - res0.z = t0; - - res1.x = patch[1][1].y; - res1.y = t3; - res1.z = t2; - - res2.x = t5; - res2.y = patch[1][1].z; - res2.z = t4; - - res3.x = patch[1][1].w; - res3.y = t7; - res3.z = t6; - } - else - { - res0.x = t0; - res0.y = patch[1][1].x; - res0.z = t1; - - res1.x = t2; - res1.y = t3; - res1.z = patch[1][1].y; - - res2.x = t4; - res2.y = patch[1][1].z; - res2.z = t5; - - res3.x = t6; - res3.y = t7; - res3.z = patch[1][1].w; - } - } - else - { - const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2; - const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2; - - const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1; - const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1; - - const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2; - const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2; - - const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1; - const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1; - - if ((s_y & 1) ^ blue_last) - { - res0.x = patch[1][1].x; - res0.y = t1; - res0.z = t0; - - res1.x = t3; - res1.y = patch[1][1].y; - res1.z = t2; - - res2.x = patch[1][1].z; - res2.y = t5; - res2.z = t4; - - res3.x = t7; - res3.y = patch[1][1].w; - res3.z = t6; - } - else - { - res0.x = t0; - res0.y = t1; - res0.z = patch[1][1].x; - - res1.x = t2; - res1.y = patch[1][1].y; - res1.z = t3; - - res2.x = t4; - res2.y = t5; - res2.z = patch[1][1].z; - - res3.x = t6; - res3.y = patch[1][1].w; - res3.z = t7; - } - } - } - }; - - template __device__ __forceinline__ D toDst(const uchar3& pix); - template <> __device__ __forceinline__ uchar toDst(const uchar3& pix) - { - typename bgr_to_gray_traits::functor_type f = bgr_to_gray_traits::create_functor(); - return f(pix); - } - template <> __device__ __forceinline__ uchar3 toDst(const uchar3& pix) - { - return pix; - } - template <> __device__ __forceinline__ uchar4 toDst(const uchar3& pix) - { - return make_uchar4(pix.x, pix.y, pix.z, 255); - } - - template - __global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep dst, const bool blue_last, const bool start_with_green) - { - const int s_x = blockIdx.x * blockDim.x + threadIdx.x; - int s_y = blockIdx.y * blockDim.y + threadIdx.y; - - if (s_y >= src.rows || (s_x << 2) >= src.cols) - return; - - s_y = ::min(::max(s_y, 1), src.rows - 2); - - Bayer2BGR bayer; - bayer.apply(src, s_x, s_y, blue_last, start_with_green); - - const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; - const int d_y = blockIdx.y * blockDim.y + threadIdx.y; - - dst(d_y, d_x) = toDst(bayer.res0); - if (d_x + 1 < src.cols) - dst(d_y, d_x + 1) = toDst(bayer.res1); - if (d_x + 2 < src.cols) - dst(d_y, d_x + 2) = toDst(bayer.res2); - if (d_x + 3 < src.cols) - dst(d_y, d_x + 3) = toDst(bayer.res3); - } - - template <> struct Bayer2BGR - { - ushort3 res0; - ushort3 res1; - - __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) - { - ushort2 patch[3][3]; - patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x]; - patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; - patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; - - patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x]; - patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)]; - patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; - - patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x]; - patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; - patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; - - if ((s_y & 1) ^ start_with_green) - { - const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1; - const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1; - - const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2; - const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2; - - if ((s_y & 1) ^ blue_last) - { - res0.x = t1; - res0.y = patch[1][1].x; - res0.z = t0; - - res1.x = patch[1][1].y; - res1.y = t3; - res1.z = t2; - } - else - { - res0.x = t0; - res0.y = patch[1][1].x; - res0.z = t1; - - res1.x = t2; - res1.y = t3; - res1.z = patch[1][1].y; - } - } - else - { - const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2; - const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2; - - const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1; - const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1; - - if ((s_y & 1) ^ blue_last) - { - res0.x = patch[1][1].x; - res0.y = t1; - res0.z = t0; - - res1.x = t3; - res1.y = patch[1][1].y; - res1.z = t2; - } - else - { - res0.x = t0; - res0.y = t1; - res0.z = patch[1][1].x; - - res1.x = t2; - res1.y = patch[1][1].y; - res1.z = t3; - } - } - } - }; - - template __device__ __forceinline__ D toDst(const ushort3& pix); - template <> __device__ __forceinline__ ushort toDst(const ushort3& pix) - { - typename bgr_to_gray_traits::functor_type f = bgr_to_gray_traits::create_functor(); - return f(pix); - } - template <> __device__ __forceinline__ ushort3 toDst(const ushort3& pix) - { - return pix; - } - template <> __device__ __forceinline__ ushort4 toDst(const ushort3& pix) - { - return make_ushort4(pix.x, pix.y, pix.z, numeric_limits::max()); - } - - template - __global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep dst, const bool blue_last, const bool start_with_green) - { - const int s_x = blockIdx.x * blockDim.x + threadIdx.x; - int s_y = blockIdx.y * blockDim.y + threadIdx.y; - - if (s_y >= src.rows || (s_x << 1) >= src.cols) - return; - - s_y = ::min(::max(s_y, 1), src.rows - 2); - - Bayer2BGR bayer; - bayer.apply(src, s_x, s_y, blue_last, start_with_green); - - const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; - const int d_y = blockIdx.y * blockDim.y + threadIdx.y; - - dst(d_y, d_x) = toDst(bayer.res0); - if (d_x + 1 < src.cols) - dst(d_y, d_x + 1) = toDst(bayer.res1); - } - - template - void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) - { - typedef typename TypeVec::vec_type dst_t; - - const dim3 block(32, 8); - const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u, cudaFuncCachePreferL1) ); - - Bayer2BGR_8u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template - void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) - { - typedef typename TypeVec::vec_type dst_t; - - const dim3 block(32, 8); - const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u, cudaFuncCachePreferL1) ); - - Bayer2BGR_16u<<>>(src, (PtrStepSz)dst, blue_last, start_with_green); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - - template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); - - ////////////////////////////////////////////////////////////// - // Bayer Demosaicing (Malvar, He, and Cutler) - // - // by Morgan McGuire, Williams College - // http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders - // - // ported to CUDA - - texture sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp); - - template - __global__ void MHCdemosaic(PtrStepSz dst, const int2 sourceOffset, const int2 firstRed) - { - const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/; - const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ; - const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/; - const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/; - const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ; - const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1) - return; - - int2 center; - center.x = x + sourceOffset.x; - center.y = y + sourceOffset.y; - - int4 xCoord; - xCoord.x = center.x - 2; - xCoord.y = center.x - 1; - xCoord.z = center.x + 1; - xCoord.w = center.x + 2; - - int4 yCoord; - yCoord.x = center.y - 2; - yCoord.y = center.y - 1; - yCoord.z = center.y + 1; - yCoord.w = center.y + 2; - - float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0) - - float4 Dvec; - Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1) - Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1) - Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1) - Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1) - - float4 value; - value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0 - value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0 - value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0 - value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0 - - // (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1) - value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1 - value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1 - value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1 - value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1 - - float4 PATTERN; - PATTERN.x = kCx * C; - PATTERN.y = kCy * C; - PATTERN.z = kCz * C; - PATTERN.w = PATTERN.z; - - float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w; - - // There are five filter patterns (identity, cross, checker, - // theta, phi). Precompute the terms from all of them and then - // use swizzles to assign to color channels. - // - // Channel Matches - // x cross (e.g., EE G) - // y checker (e.g., EE B) - // z theta (e.g., EO R) - // w phi (e.g., EO B) - - #define A value.x // A0 + A1 - #define B value.y // B0 + B1 - #define E value.z // E0 + E1 - #define F value.w // F0 + F1 - - float3 temp; - - // PATTERN.yzw += (kD.yz * D).xyy; - temp.x = kDy * D; - temp.y = kDz * D; - PATTERN.y += temp.x; - PATTERN.z += temp.y; - PATTERN.w += temp.y; - - // PATTERN += (kA.xyz * A).xyzx; - temp.x = kAx * A; - temp.y = kAy * A; - temp.z = kAz * A; - PATTERN.x += temp.x; - PATTERN.y += temp.y; - PATTERN.z += temp.z; - PATTERN.w += temp.x; - - // PATTERN += (kE.xyw * E).xyxz; - temp.x = kEx * E; - temp.y = kEy * E; - temp.z = kEw * E; - PATTERN.x += temp.x; - PATTERN.y += temp.y; - PATTERN.z += temp.x; - PATTERN.w += temp.z; - - // PATTERN.xw += kB.xw * B; - PATTERN.x += kBx * B; - PATTERN.w += kBw * B; - - // PATTERN.xz += kF.xz * F; - PATTERN.x += kFx * F; - PATTERN.z += kFz * F; - - // Determine which of four types of pixels we are on. - int2 alternate; - alternate.x = (x + firstRed.x) % 2; - alternate.y = (y + firstRed.y) % 2; - - // in BGR sequence; - uchar3 pixelColor = - (alternate.y == 0) ? - ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.y), saturate_cast(PATTERN.x), saturate_cast(C)) : - make_uchar3(saturate_cast(PATTERN.w), saturate_cast(C), saturate_cast(PATTERN.z))) : - ((alternate.x == 0) ? - make_uchar3(saturate_cast(PATTERN.z), saturate_cast(C), saturate_cast(PATTERN.w)) : - make_uchar3(saturate_cast(C), saturate_cast(PATTERN.x), saturate_cast(PATTERN.y))); - - dst(y, x) = toDst(pixelColor); - } - - template - void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream) - { - typedef typename TypeVec::vec_type dst_t; - - const dim3 block(32, 8); - const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); - - bindTexture(&sourceTex, src); - - MHCdemosaic<<>>((PtrStepSz)dst, sourceOffset, firstRed); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); - template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); -}}} - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/gftt.cu b/modules/gpu/src/cuda/gftt.cu deleted file mode 100644 index b4af9e5dbc..0000000000 --- a/modules/gpu/src/cuda/gftt.cu +++ /dev/null @@ -1,143 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include -#include - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/utility.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace gfft - { - texture eigTex(0, cudaFilterModePoint, cudaAddressModeClamp); - - __device__ int g_counter = 0; - - template __global__ void findCorners(float threshold, const Mask mask, float2* corners, int max_count, int rows, int cols) - { - const int j = blockIdx.x * blockDim.x + threadIdx.x; - const int i = blockIdx.y * blockDim.y + threadIdx.y; - - if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j)) - { - float val = tex2D(eigTex, j, i); - - if (val > threshold) - { - float maxVal = val; - - maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal); - maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal); - maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal); - - maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal); - maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal); - - maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal); - maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal); - maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal); - - if (val == maxVal) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < max_count) - corners[ind] = make_float2(j, i); - } - } - } - } - - int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count) - { - void* counter_ptr; - cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); - - cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) ); - - bindTexture(&eigTex, eig); - - dim3 block(16, 16); - dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y)); - - if (mask.data) - findCorners<<>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols); - else - findCorners<<>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols); - - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int count; - cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return std::min(count, max_count); - } - - class EigGreater - { - public: - __device__ __forceinline__ bool operator()(float2 a, float2 b) const - { - return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y); - } - }; - - - void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count) - { - bindTexture(&eigTex, eig); - - thrust::device_ptr ptr(corners); - - thrust::sort(ptr, ptr + count, EigGreater()); - } - } // namespace optical_flow -}}} - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hist.cu b/modules/gpu/src/cuda/hist.cu deleted file mode 100644 index 474c27cf76..0000000000 --- a/modules/gpu/src/cuda/hist.cu +++ /dev/null @@ -1,153 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/emulation.hpp" -#include "opencv2/core/cuda/transform.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace hist -{ - __global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist) - { - __shared__ int shist[256]; - - const int y = blockIdx.x * blockDim.y + threadIdx.y; - const int tid = threadIdx.y * blockDim.x + threadIdx.x; - - shist[tid] = 0; - __syncthreads(); - - if (y < rows) - { - const unsigned int* rowPtr = (const unsigned int*) (src + y * step); - - const int cols_4 = cols / 4; - for (int x = threadIdx.x; x < cols_4; x += blockDim.x) - { - unsigned int data = rowPtr[x]; - - Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1); - Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1); - Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1); - Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1); - } - - if (cols % 4 != 0 && threadIdx.x == 0) - { - for (int x = cols_4 * 4; x < cols; ++x) - { - unsigned int data = ((const uchar*)rowPtr)[x]; - Emulation::smem::atomicAdd(&shist[data], 1); - } - } - } - - __syncthreads(); - - const int histVal = shist[tid]; - if (histVal > 0) - ::atomicAdd(hist + tid, histVal); - } - - void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream) - { - const dim3 block(32, 8); - const dim3 grid(divUp(src.rows, block.y)); - - histogram256Kernel<<>>(src.data, src.cols, src.rows, src.step, hist); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } -} - -///////////////////////////////////////////////////////////////////////// - -namespace hist -{ - __constant__ int c_lut[256]; - - struct EqualizeHist : unary_function - { - float scale; - - __host__ EqualizeHist(float _scale) : scale(_scale) {} - - __device__ __forceinline__ uchar operator ()(uchar val) const - { - const int lut = c_lut[val]; - return __float2int_rn(scale * lut); - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template <> struct TransformFunctorTraits : DefaultTransformFunctorTraits - { - enum { smart_shift = 4 }; - }; -}}} - -namespace hist -{ - void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream) - { - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) ); - - const float scale = 255.0f / (src.cols * src.rows); - - cudev::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream); - } -} - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu deleted file mode 100644 index 5a4481b6e5..0000000000 --- a/modules/gpu/src/cuda/hough.cu +++ /dev/null @@ -1,1709 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include -#include - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/emulation.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/limits.hpp" -#include "opencv2/core/cuda/dynamic_smem.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace hough - { - __device__ int g_counter; - - //////////////////////////////////////////////////////////////////////// - // buildPointList - - template - __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) - { - __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; - __shared__ int s_qsize[4]; - __shared__ int s_globStart[4]; - - const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (threadIdx.x == 0) - s_qsize[threadIdx.y] = 0; - __syncthreads(); - - if (y < src.rows) - { - // fill the queue - const uchar* srcRow = src.ptr(y); - for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) - { - if (srcRow[xx]) - { - const unsigned int val = (y << 16) | xx; - const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); - s_queues[threadIdx.y][qidx] = val; - } - } - } - - __syncthreads(); - - // let one thread reserve the space required in the global list - if (threadIdx.x == 0 && threadIdx.y == 0) - { - // find how many items are stored in each list - int totalSize = 0; - for (int i = 0; i < blockDim.y; ++i) - { - s_globStart[i] = totalSize; - totalSize += s_qsize[i]; - } - - // calculate the offset in the global list - const int globalOffset = atomicAdd(&g_counter, totalSize); - for (int i = 0; i < blockDim.y; ++i) - s_globStart[i] += globalOffset; - } - - __syncthreads(); - - // copy local queues to global queue - const int qsize = s_qsize[threadIdx.y]; - int gidx = s_globStart[threadIdx.y] + threadIdx.x; - for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) - list[gidx] = s_queues[threadIdx.y][i]; - } - - int buildPointList_gpu(PtrStepSzb src, unsigned int* list) - { - const int PIXELS_PER_THREAD = 16; - - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 4); - const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); - - buildPointList<<>>(src, list); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // linesAccum - - __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - ::atomicAdd(accumRow + r + 1, 1); - } - } - - __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - smem[i] = 0; - - __syncthreads(); - - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - - __syncthreads(); - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - accumRow[i] = smem[i]; - } - - void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20) - { - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(accum.rows - 2); - - size_t smemSize = (accum.cols - 1) * sizeof(int); - - if (smemSize < sharedMemPerBlock - 1000) - linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - else - linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // linesGetResult - - __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes > threshold && - curVotes > accum(n + 1, r) && - curVotes >= accum(n + 1, r + 2) && - curVotes > accum(n, r + 1) && - curVotes >= accum(n + 2, r + 1)) - { - const float radius = (r - (numrho - 1) * 0.5f) * rho; - const float angle = n * theta; - - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - { - out[ind] = make_float2(radius, angle); - votes[ind] = curVotes; - } - } - } - - int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); - - linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - if (doSort && totalCount > 0) - { - thrust::device_ptr outPtr(out); - thrust::device_ptr votesPtr(votes); - thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater()); - } - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // houghLinesProbabilistic - - texture tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void houghLinesProbabilistic(const PtrStepSzi accum, - int4* out, const int maxSize, - const float rho, const float theta, - const int lineGap, const int lineLength, - const int rows, const int cols) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes >= lineLength && - curVotes > accum(n, r) && - curVotes > accum(n, r + 1) && - curVotes > accum(n, r + 2) && - curVotes > accum(n + 1, r) && - curVotes > accum(n + 1, r + 2) && - curVotes > accum(n + 2, r) && - curVotes > accum(n + 2, r + 1) && - curVotes > accum(n + 2, r + 2)) - { - const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho; - const float angle = n * theta; - - float cosa; - float sina; - sincosf(angle, &sina, &cosa); - - float2 p0 = make_float2(cosa * radius, sina * radius); - float2 dir = make_float2(-sina, cosa); - - float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)}; - float a; - - if (dir.x != 0) - { - a = -p0.x / dir.x; - pb[0].x = 0; - pb[0].y = p0.y + a * dir.y; - - a = (cols - 1 - p0.x) / dir.x; - pb[1].x = cols - 1; - pb[1].y = p0.y + a * dir.y; - } - if (dir.y != 0) - { - a = -p0.y / dir.y; - pb[2].x = p0.x + a * dir.x; - pb[2].y = 0; - - a = (rows - 1 - p0.y) / dir.y; - pb[3].x = p0.x + a * dir.x; - pb[3].y = rows - 1; - } - - if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[0]; - if (dir.x < 0) - dir = -dir; - } - else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[1]; - if (dir.x > 0) - dir = -dir; - } - else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols)) - { - p0 = pb[2]; - if (dir.y < 0) - dir = -dir; - } - else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols)) - { - p0 = pb[3]; - if (dir.y > 0) - dir = -dir; - } - - float2 d; - if (::fabsf(dir.x) > ::fabsf(dir.y)) - { - d.x = dir.x > 0 ? 1 : -1; - d.y = dir.y / ::fabsf(dir.x); - } - else - { - d.x = dir.x / ::fabsf(dir.y); - d.y = dir.y > 0 ? 1 : -1; - } - - float2 line_end[2]; - int gap; - bool inLine = false; - - float2 p1 = p0; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - return; - - for (;;) - { - if (tex2D(tex_mask, p1.x, p1.y)) - { - gap = 0; - - if (!inLine) - { - line_end[0] = p1; - line_end[1] = p1; - inLine = true; - } - else - { - line_end[1] = p1; - } - } - else if (inLine) - { - if (++gap > lineGap) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - gap = 0; - inLine = false; - } - } - - p1 = p1 + d; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - { - if (inLine) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - } - break; - } - } - } - } - - int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - bindTexture(&tex_mask, mask); - - houghLinesProbabilistic<<>>(accum, - out, maxSize, - rho, theta, - lineGap, lineLength, - mask.rows, mask.cols); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumCenters - - __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, - PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) - { - const int SHIFT = 10; - const int ONE = 1 << SHIFT; - - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= count) - return; - - const unsigned int val = list[tid]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const int vx = dx(y, x); - const int vy = dy(y, x); - - if (vx == 0 && vy == 0) - return; - - const float mag = ::sqrtf(vx * vx + vy * vy); - - const int x0 = __float2int_rn((x * idp) * ONE); - const int y0 = __float2int_rn((y * idp) * ONE); - - int sx = __float2int_rn((vx * idp) * ONE / mag); - int sy = __float2int_rn((vy * idp) * ONE / mag); - - // Step from minRadius to maxRadius in both directions of the gradient - for (int k1 = 0; k1 < 2; ++k1) - { - int x1 = x0 + minRadius * sx; - int y1 = y0 + minRadius * sy; - - for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) - { - const int x2 = x1 >> SHIFT; - const int y2 = y1 >> SHIFT; - - if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) - break; - - ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); - } - - sx = -sx; - sy = -sy; - } - } - - void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) - { - const dim3 block(256); - const dim3 grid(divUp(count, block.x)); - - cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); - - circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // buildCentersList - - __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < accum.cols - 2 && y < accum.rows - 2) - { - const int top = accum(y, x + 1); - - const int left = accum(y + 1, x); - const int cur = accum(y + 1, x + 1); - const int right = accum(y + 1, x + 2); - - const int bottom = accum(y + 2, x + 1); - - if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) - { - const unsigned int val = (y << 16) | x; - const int idx = ::atomicAdd(&g_counter, 1); - centers[idx] = val; - } - } - } - - int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); - - buildCentersList<<>>(accum, centers, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumRadius - - __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, - float3* circles, const int maxCircles, const float dp, - const int minRadius, const int maxRadius, const int histSize, const int threshold) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) - smem[i] = 0; - __syncthreads(); - - unsigned int val = centers[blockIdx.x]; - - float cx = (val & 0xFFFF); - float cy = (val >> 16) & 0xFFFF; - - cx = (cx + 0.5f) * dp; - cy = (cy + 0.5f) * dp; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); - if (rad >= minRadius && rad <= maxRadius) - { - const int r = __float2int_rn(rad - minRadius); - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - } - - __syncthreads(); - - for (int i = threadIdx.x; i < histSize; i += blockDim.x) - { - const int curVotes = smem[i + 1]; - - if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxCircles) - circles[ind] = make_float3(cx, cy, i + minRadius); - } - } - } - - int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, - float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(centersCount); - - const int histSize = maxRadius - minRadius + 1; - size_t smemSize = (histSize + 2) * sizeof(int); - - circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxCircles); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // Generalized Hough - - template - __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep dx, const PtrStep dy, unsigned int* coordList, float* thetaList) - { - __shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD]; - __shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD]; - __shared__ int s_sizes[4]; - __shared__ int s_globStart[4]; - - const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (threadIdx.x == 0) - s_sizes[threadIdx.y] = 0; - __syncthreads(); - - if (y < edges.rows) - { - // fill the queue - const uchar* edgesRow = edges.ptr(y); - const T* dxRow = dx.ptr(y); - const T* dyRow = dy.ptr(y); - - for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x) - { - const T dxVal = dxRow[xx]; - const T dyVal = dyRow[xx]; - - if (edgesRow[xx] && (dxVal != 0 || dyVal != 0)) - { - const unsigned int coord = (y << 16) | xx; - - float theta = ::atan2f(dyVal, dxVal); - if (theta < 0) - theta += 2.0f * CV_PI_F; - - const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1); - - s_coordLists[threadIdx.y][qidx] = coord; - s_thetaLists[threadIdx.y][qidx] = theta; - } - } - } - - __syncthreads(); - - // let one thread reserve the space required in the global list - if (threadIdx.x == 0 && threadIdx.y == 0) - { - // find how many items are stored in each list - int totalSize = 0; - for (int i = 0; i < blockDim.y; ++i) - { - s_globStart[i] = totalSize; - totalSize += s_sizes[i]; - } - - // calculate the offset in the global list - const int globalOffset = atomicAdd(&g_counter, totalSize); - for (int i = 0; i < blockDim.y; ++i) - s_globStart[i] += globalOffset; - } - - __syncthreads(); - - // copy local queues to global queue - const int qsize = s_sizes[threadIdx.y]; - int gidx = s_globStart[threadIdx.y] + threadIdx.x; - for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) - { - coordList[gidx] = s_coordLists[threadIdx.y][i]; - thetaList[gidx] = s_thetaLists[threadIdx.y][i]; - } - } - - template - int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList) - { - const int PIXELS_PER_THREAD = 8; - - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 4); - const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList, cudaFuncCachePreferShared) ); - - buildEdgePointList<<>>(edges, (PtrStepSz) dx, (PtrStepSz) dy, coordList, thetaList); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - template int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList); - - __global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount, - PtrStep r_table, int* r_sizes, int maxSize, - const short2 templCenter, const float thetaScale) - { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= pointsCount) - return; - - const unsigned int coord = coordList[tid]; - short2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[tid]; - const int n = __float2int_rn(theta * thetaScale); - - const int ind = ::atomicAdd(r_sizes + n, 1); - if (ind < maxSize) - r_table(n, ind) = p - templCenter; - } - - void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, int* r_sizes, - short2 templCenter, int levels) - { - const dim3 block(256); - const dim3 grid(divUp(pointsCount, block.x)); - - const float thetaScale = levels / (2.0f * CV_PI_F); - - buildRTable<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_Pos - - __global__ void GHT_Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount, - const PtrStep r_table, const int* r_sizes, - PtrStepSzi hist, - const float idp, const float thetaScale) - { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= pointsCount) - return; - - const unsigned int coord = coordList[tid]; - short2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[tid]; - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - short2 c = p - r_row[j]; - - c.x = __float2int_rn(c.x * idp); - c.y = __float2int_rn(c.y * idp); - - if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2) - ::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1); - } - } - - void GHT_Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepSzi hist, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(divUp(pointsCount, block.x)); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_Pos_calcHist<<>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= hist.cols - 2 || y >= hist.rows - 2) - return; - - const int curVotes = hist(y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(y + 1, x) && - curVotes >= hist(y + 1, x + 2) && - curVotes > hist(y, x + 1) && - curVotes >= hist(y + 2, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f); - votes[ind] = make_int3(curVotes, 0, 0); - } - } - } - - int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_Pos_findPosInHist<<>>(hist, out, votes, maxSize, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_PosScale - - __global__ void GHT_Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList, - PtrStep r_table, const int* r_sizes, - PtrStepi hist, const int rows, const int cols, - const float minScale, const float scaleStep, const int scaleRange, - const float idp, const float thetaScale) - { - const unsigned int coord = coordList[blockIdx.x]; - float2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float theta = thetaList[blockIdx.x]; - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - const float2 d = saturate_cast(r_row[j]); - - for (int s = threadIdx.x; s < scaleRange; s += blockDim.x) - { - const float scale = minScale + s * scaleStep; - - float2 c = p - scale * d; - - c.x *= idp; - c.y *= idp; - - if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) - ::atomicAdd(hist.ptr((s + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); - } - } - } - - void GHT_Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minScale, float scaleStep, int scaleRange, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_PosScale_calcHist<<>>(coordList, thetaList, - r_table, r_sizes, - hist, rows, cols, - minScale, scaleStep, scaleRange, - idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange, - float4* out, int3* votes, const int maxSize, - const float minScale, const float scaleStep, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= cols || y >= rows) - return; - - for (int s = 0; s < scaleRange; ++s) - { - const float scale = minScale + s * scaleStep; - - const int prevScaleIdx = (s) * (rows + 2); - const int curScaleIdx = (s + 1) * (rows + 2); - const int nextScaleIdx = (s + 2) * (rows + 2); - - const int curVotes = hist(curScaleIdx + y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(curScaleIdx + y + 1, x) && - curVotes >= hist(curScaleIdx + y + 1, x + 2) && - curVotes > hist(curScaleIdx + y, x + 1) && - curVotes >= hist(curScaleIdx + y + 2, x + 1) && - curVotes > hist(prevScaleIdx + y + 1, x + 1) && - curVotes >= hist(nextScaleIdx + y + 1, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, scale, 0.0f); - votes[ind] = make_int3(curVotes, curVotes, 0); - } - } - } - } - - int GHT_Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize, - float minScale, float scaleStep, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_PosScale_findPosInHist<<>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Ballard_PosRotation - - __global__ void GHT_Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList, - PtrStep r_table, const int* r_sizes, - PtrStepi hist, const int rows, const int cols, - const float minAngle, const float angleStep, const int angleRange, - const float idp, const float thetaScale) - { - const unsigned int coord = coordList[blockIdx.x]; - float2 p; - p.x = (coord & 0xFFFF); - p.y = (coord >> 16) & 0xFFFF; - - const float thetaVal = thetaList[blockIdx.x]; - - for (int a = threadIdx.x; a < angleRange; a += blockDim.x) - { - const float angle = (minAngle + a * angleStep) * (CV_PI_F / 180.0f); - float sinA, cosA; - sincosf(angle, &sinA, &cosA); - - float theta = thetaVal - angle; - if (theta < 0) - theta += 2.0f * CV_PI_F; - - const int n = __float2int_rn(theta * thetaScale); - - const short2* r_row = r_table.ptr(n); - const int r_row_size = r_sizes[n]; - - for (int j = 0; j < r_row_size; ++j) - { - const float2 d = saturate_cast(r_row[j]); - - const float2 dr = make_float2(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA); - - float2 c = make_float2(p.x - dr.x, p.y - dr.y); - c.x *= idp; - c.y *= idp; - - if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows) - ::atomicAdd(hist.ptr((a + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1); - } - } - } - - void GHT_Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - PtrStepSz r_table, const int* r_sizes, - PtrStepi hist, int rows, int cols, - float minAngle, float angleStep, int angleRange, - float dp, int levels) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float idp = 1.0f / dp; - const float thetaScale = levels / (2.0f * CV_PI_F); - - GHT_Ballard_PosRotation_calcHist<<>>(coordList, thetaList, - r_table, r_sizes, - hist, rows, cols, - minAngle, angleStep, angleRange, - idp, thetaScale); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange, - float4* out, int3* votes, const int maxSize, - const float minAngle, const float angleStep, const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= cols || y >= rows) - return; - - for (int a = 0; a < angleRange; ++a) - { - const float angle = minAngle + a * angleStep; - - const int prevAngleIdx = (a) * (rows + 2); - const int curAngleIdx = (a + 1) * (rows + 2); - const int nextAngleIdx = (a + 2) * (rows + 2); - - const int curVotes = hist(curAngleIdx + y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(curAngleIdx + y + 1, x) && - curVotes >= hist(curAngleIdx + y + 1, x + 2) && - curVotes > hist(curAngleIdx + y, x + 1) && - curVotes >= hist(curAngleIdx + y + 2, x + 1) && - curVotes > hist(prevAngleIdx + y + 1, x + 1) && - curVotes >= hist(nextAngleIdx + y + 1, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, 1.0f, angle); - votes[ind] = make_int3(curVotes, 0, curVotes); - } - } - } - } - - int GHT_Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize, - float minAngle, float angleStep, float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Ballard_PosRotation_findPosInHist<<>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // GHT_Guil_Full - - struct FeatureTable - { - uchar* p1_pos_data; - size_t p1_pos_step; - - uchar* p1_theta_data; - size_t p1_theta_step; - - uchar* p2_pos_data; - size_t p2_pos_step; - - uchar* d12_data; - size_t d12_step; - - uchar* r1_data; - size_t r1_step; - - uchar* r2_data; - size_t r2_step; - }; - - __constant__ FeatureTable c_templFeatures; - __constant__ FeatureTable c_imageFeatures; - - void GHT_Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) - { - FeatureTable tbl; - - tbl.p1_pos_data = p1_pos.data; - tbl.p1_pos_step = p1_pos.step; - - tbl.p1_theta_data = p1_theta.data; - tbl.p1_theta_step = p1_theta.step; - - tbl.p2_pos_data = p2_pos.data; - tbl.p2_pos_step = p2_pos.step; - - tbl.d12_data = d12.data; - tbl.d12_step = d12.step; - - tbl.r1_data = r1.data; - tbl.r1_step = r1.step; - - tbl.r2_data = r2.data; - tbl.r2_step = r2.step; - - cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) ); - } - void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2) - { - FeatureTable tbl; - - tbl.p1_pos_data = p1_pos.data; - tbl.p1_pos_step = p1_pos.step; - - tbl.p1_theta_data = p1_theta.data; - tbl.p1_theta_step = p1_theta.step; - - tbl.p2_pos_data = p2_pos.data; - tbl.p2_pos_step = p2_pos.step; - - tbl.d12_data = d12.data; - tbl.d12_step = d12.step; - - tbl.r1_data = r1.data; - tbl.r1_step = r1.step; - - tbl.r2_data = r2.data; - tbl.r2_step = r2.step; - - cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) ); - } - - struct TemplFeatureTable - { - static __device__ float2* p1_pos(int n) - { - return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step); - } - static __device__ float* p1_theta(int n) - { - return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step); - } - static __device__ float2* p2_pos(int n) - { - return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step); - } - - static __device__ float* d12(int n) - { - return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step); - } - - static __device__ float2* r1(int n) - { - return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step); - } - static __device__ float2* r2(int n) - { - return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step); - } - }; - struct ImageFeatureTable - { - static __device__ float2* p1_pos(int n) - { - return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step); - } - static __device__ float* p1_theta(int n) - { - return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step); - } - static __device__ float2* p2_pos(int n) - { - return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step); - } - - static __device__ float* d12(int n) - { - return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step); - } - - static __device__ float2* r1(int n) - { - return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step); - } - static __device__ float2* r2(int n) - { - return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step); - } - }; - - __device__ float clampAngle(float a) - { - float res = a; - - while (res > 2.0f * CV_PI_F) - res -= 2.0f * CV_PI_F; - while (res < 0.0f) - res += 2.0f * CV_PI_F; - - return res; - } - - __device__ bool angleEq(float a, float b, float eps) - { - return (::fabs(clampAngle(a - b)) <= eps); - } - - template - __global__ void GHT_Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount, - int* sizes, const int maxSize, - const float xi, const float angleEpsilon, const float alphaScale, - const float2 center, const float maxDist) - { - const float p1_theta = thetaList[blockIdx.x]; - const unsigned int coord1 = coordList[blockIdx.x]; - float2 p1_pos; - p1_pos.x = (coord1 & 0xFFFF); - p1_pos.y = (coord1 >> 16) & 0xFFFF; - - for (int i = threadIdx.x; i < pointsCount; i += blockDim.x) - { - const float p2_theta = thetaList[i]; - const unsigned int coord2 = coordList[i]; - float2 p2_pos; - p2_pos.x = (coord2 & 0xFFFF); - p2_pos.y = (coord2 >> 16) & 0xFFFF; - - if (angleEq(p1_theta - p2_theta, xi, angleEpsilon)) - { - const float2 d = p1_pos - p2_pos; - - float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta); - float d12 = ::sqrtf(d.x * d.x + d.y * d.y); - - if (d12 > maxDist) - continue; - - float2 r1 = p1_pos - center; - float2 r2 = p2_pos - center; - - const int n = __float2int_rn(alpha12 * alphaScale); - - const int ind = ::atomicAdd(sizes + n, 1); - - if (ind < maxSize) - { - if (!isTempl) - { - FT::p1_pos(n)[ind] = p1_pos; - FT::p2_pos(n)[ind] = p2_pos; - } - - FT::p1_theta(n)[ind] = p1_theta; - - FT::d12(n)[ind] = d12; - - if (isTempl) - { - FT::r1(n)[ind] = r1; - FT::r2(n)[ind] = r2; - } - } - } - } - } - - template - void GHT_Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - const dim3 block(256); - const dim3 grid(pointsCount); - - const float alphaScale = levels / (2.0f * CV_PI_F); - - GHT_Guil_Full_buildFeatureList<<>>(coordList, thetaList, pointsCount, - sizes, maxSize, - xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale, - center, maxDist); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - thrust::device_ptr sizesPtr(sizes); - thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum(), maxSize)); - } - - void GHT_Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - GHT_Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, - sizes, maxSize, - xi, angleEpsilon, levels, - center, maxDist); - } - void GHT_Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, - int* sizes, int maxSize, - float xi, float angleEpsilon, int levels, - float2 center, float maxDist) - { - GHT_Guil_Full_buildFeatureList_caller(coordList, thetaList, pointsCount, - sizes, maxSize, - xi, angleEpsilon, levels, - center, maxDist); - } - - __global__ void GHT_Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist, - const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange) - { - extern __shared__ int s_OHist[]; - for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) - s_OHist[i] = 0; - __syncthreads(); - - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx]; - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - - const float angle = clampAngle(im_p1_theta - t_p1_theta); - - if (angle >= minAngle && angle <= maxAngle) - { - const int n = __float2int_rn((angle - minAngle) * iAngleStep); - Emulation::smem::atomicAdd(&s_OHist[n], 1); - } - } - } - __syncthreads(); - - for (int i = threadIdx.x; i <= angleRange; i += blockDim.x) - ::atomicAdd(OHist + i, s_OHist[i]); - } - - void GHT_Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist, - float minAngle, float maxAngle, float angleStep, int angleRange, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - minAngle *= (CV_PI_F / 180.0f); - maxAngle *= (CV_PI_F / 180.0f); - angleStep *= (CV_PI_F / 180.0f); - - const size_t smemSize = (angleRange + 1) * sizeof(float); - - GHT_Guil_Full_calcOHist<<>>(templSizes, imageSizes, OHist, - minAngle, maxAngle, 1.0f / angleStep, angleRange); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist, - const float angle, const float angleEpsilon, - const float minScale, const float maxScale, const float iScaleStep, const int scaleRange) - { - extern __shared__ int s_SHist[]; - for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) - s_SHist[i] = 0; - __syncthreads(); - - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; - const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle; - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - const float im_d12 = ImageFeatureTable::d12(level)[i]; - - if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) - { - const float scale = im_d12 / t_d12; - - if (scale >= minScale && scale <= maxScale) - { - const int s = __float2int_rn((scale - minScale) * iScaleStep); - Emulation::smem::atomicAdd(&s_SHist[s], 1); - } - } - } - } - __syncthreads(); - - for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x) - ::atomicAdd(SHist + i, s_SHist[i]); - } - - void GHT_Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist, - float angle, float angleEpsilon, - float minScale, float maxScale, float iScaleStep, int scaleRange, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - angle *= (CV_PI_F / 180.0f); - angleEpsilon *= (CV_PI_F / 180.0f); - - const size_t smemSize = (scaleRange + 1) * sizeof(float); - - GHT_Guil_Full_calcSHist<<>>(templSizes, imageSizes, SHist, - angle, angleEpsilon, - minScale, maxScale, iScaleStep, scaleRange); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, - const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale, - const float idp) - { - const int tIdx = blockIdx.x; - const int level = blockIdx.y; - - const int tSize = templSizes[level]; - - if (tIdx < tSize) - { - const int imSize = imageSizes[level]; - - const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle; - - float2 r1 = TemplFeatureTable::r1(level)[tIdx]; - float2 r2 = TemplFeatureTable::r2(level)[tIdx]; - - r1 = r1 * scale; - r2 = r2 * scale; - - r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y); - r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y); - - for (int i = threadIdx.x; i < imSize; i += blockDim.x) - { - const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i]; - - const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i]; - const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i]; - - if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon)) - { - float2 c1, c2; - - c1 = im_p1_pos - r1; - c1 = c1 * idp; - - c2 = im_p2_pos - r2; - c2 = c2 * idp; - - if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1) - continue; - - if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2) - ::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1); - } - } - } - } - - void GHT_Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist, - float angle, float angleEpsilon, float scale, - float dp, - int levels, int tMaxSize) - { - const dim3 block(256); - const dim3 grid(tMaxSize, levels + 1); - - angle *= (CV_PI_F / 180.0f); - angleEpsilon *= (CV_PI_F / 180.0f); - - const float sinVal = ::sinf(angle); - const float cosVal = ::cosf(angle); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) ); - - GHT_Guil_Full_calcPHist<<>>(templSizes, imageSizes, PHist, - angle, sinVal, cosVal, angleEpsilon, scale, - 1.0f / dp); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, - const float angle, const int angleVotes, const float scale, const int scaleVotes, - const float dp, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= hist.cols - 2 || y >= hist.rows - 2) - return; - - const int curVotes = hist(y + 1, x + 1); - - if (curVotes > threshold && - curVotes > hist(y + 1, x) && - curVotes >= hist(y + 1, x + 2) && - curVotes > hist(y, x + 1) && - curVotes >= hist(y + 2, x + 1)) - { - const int ind = ::atomicAdd(&g_counter, 1); - - if (ind < maxSize) - { - out[ind] = make_float4(x * dp, y * dp, scale, angle); - votes[ind] = make_int3(curVotes, scaleVotes, angleVotes); - } - } - } - - int GHT_Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize, - float angle, int angleVotes, float scale, int scaleVotes, - float dp, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) ); - - GHT_Guil_Full_findPosInHist<<>>(hist, out, votes, maxSize, - angle, angleVotes, scale, scaleVotes, - dp, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - } -}}} - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu deleted file mode 100644 index 01cfae4cbd..0000000000 --- a/modules/gpu/src/cuda/imgproc.cu +++ /dev/null @@ -1,754 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" -#include "internal_shared.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// - - texture tex_meanshift; - - __device__ short2 do_mean_shift(int x0, int y0, unsigned char* out, - size_t out_step, int cols, int rows, - int sp, int sr, int maxIter, float eps) - { - int isr2 = sr*sr; - uchar4 c = tex2D(tex_meanshift, x0, y0 ); - - // iterate meanshift procedure - for( int iter = 0; iter < maxIter; iter++ ) - { - int count = 0; - int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; - float icount; - - //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) - int minx = x0-sp; - int miny = y0-sp; - int maxx = x0+sp; - int maxy = y0+sp; - - for( int y = miny; y <= maxy; y++) - { - int rowCount = 0; - for( int x = minx; x <= maxx; x++ ) - { - uchar4 t = tex2D( tex_meanshift, x, y ); - - int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z); - if( norm2 <= isr2 ) - { - s0 += t.x; s1 += t.y; s2 += t.z; - sx += x; rowCount++; - } - } - count += rowCount; - sy += y*rowCount; - } - - if( count == 0 ) - break; - - icount = 1.f/count; - int x1 = __float2int_rz(sx*icount); - int y1 = __float2int_rz(sy*icount); - s0 = __float2int_rz(s0*icount); - s1 = __float2int_rz(s1*icount); - s2 = __float2int_rz(s2*icount); - - int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z); - - bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps); - - x0 = x1; y0 = y1; - c.x = s0; c.y = s1; c.z = s2; - - if( stopFlag ) - break; - } - - int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar); - *(uchar4*)(out + base) = c; - - return make_short2((short)x0, (short)y0); - } - - __global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps ) - { - int x0 = blockIdx.x * blockDim.x + threadIdx.x; - int y0 = blockIdx.y * blockDim.y + threadIdx.y; - - if( x0 < cols && y0 < rows ) - do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps); - } - - __global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep, - unsigned char* outsp, size_t outspstep, - int cols, int rows, - int sp, int sr, int maxIter, float eps) - { - int x0 = blockIdx.x * blockDim.x + threadIdx.x; - int y0 = blockIdx.y * blockDim.y + threadIdx.y; - - if( x0 < cols && y0 < rows ) - { - int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short); - *(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps); - } - } - - void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream) - { - dim3 grid(1, 1, 1); - dim3 threads(32, 8, 1); - grid.x = divUp(src.cols, threads.x); - grid.y = divUp(src.rows, threads.y); - - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); - - meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - - //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); - } - - void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream) - { - dim3 grid(1, 1, 1); - dim3 threads(32, 8, 1); - grid.x = divUp(src.cols, threads.x); - grid.y = divUp(src.rows, threads.y); - - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) ); - - meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps ); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - - //cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); - } - - /////////////////////////////////// drawColorDisp /////////////////////////////////////////////// - - template - __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1) - { - unsigned int H = ((ndisp-d) * 240)/ndisp; - - unsigned int hi = (H/60) % 6; - float f = H/60.f - H/60; - float p = V * (1 - S); - float q = V * (1 - f * S); - float t = V * (1 - (1 - f) * S); - - float3 res; - - if (hi == 0) //R = V, G = t, B = p - { - res.x = p; - res.y = t; - res.z = V; - } - - if (hi == 1) // R = q, G = V, B = p - { - res.x = p; - res.y = V; - res.z = q; - } - - if (hi == 2) // R = p, G = V, B = t - { - res.x = t; - res.y = V; - res.z = p; - } - - if (hi == 3) // R = p, G = q, B = V - { - res.x = V; - res.y = q; - res.z = p; - } - - if (hi == 4) // R = t, G = p, B = V - { - res.x = V; - res.y = p; - res.z = t; - } - - if (hi == 5) // R = V, G = p, B = q - { - res.x = q; - res.y = p; - res.z = V; - } - const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f); - const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f); - const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f); - const unsigned int a = 255U; - - return (a << 24) + (r << 16) + (g << 8) + b; - } - - __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) - { - const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x < width && y < height) - { - uchar4 d4 = *(uchar4*)(disp + y * disp_step + x); - - uint4 res; - res.x = cvtPixel(d4.x, ndisp); - res.y = cvtPixel(d4.y, ndisp); - res.z = cvtPixel(d4.z, ndisp); - res.w = cvtPixel(d4.w, ndisp); - - uint4* line = (uint4*)(out_image + y * out_step); - line[x >> 2] = res; - } - } - - __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) - { - const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x < width && y < height) - { - short2 d2 = *(short2*)(disp + y * disp_step + x); - - uint2 res; - res.x = cvtPixel(d2.x, ndisp); - res.y = cvtPixel(d2.y, ndisp); - - uint2* line = (uint2*)(out_image + y * out_step); - line[x >> 1] = res; - } - } - - - void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) - { - dim3 threads(16, 16, 1); - dim3 grid(1, 1, 1); - grid.x = divUp(src.cols, threads.x << 2); - grid.y = divUp(src.rows, threads.y); - - drawColorDisp<<>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void drawColorDisp_gpu(const PtrStepSz& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) - { - dim3 threads(32, 8, 1); - dim3 grid(1, 1, 1); - grid.x = divUp(src.cols, threads.x << 1); - grid.y = divUp(src.rows, threads.y); - - drawColorDisp<<>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - /////////////////////////////////// reprojectImageTo3D /////////////////////////////////////////////// - - __constant__ float cq[16]; - - template - __global__ void reprojectImageTo3D(const PtrStepSz disp, PtrStep xyz) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (y >= disp.rows || x >= disp.cols) - return; - - const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3]; - const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7]; - const float qz = x * cq[ 8] + y * cq[ 9] + cq[11]; - const float qw = x * cq[12] + y * cq[13] + cq[15]; - - const T d = disp(y, x); - - const float iW = 1.f / (qw + cq[14] * d); - - D v = VecTraits::all(1.0f); - v.x = (qx + cq[2] * d) * iW; - v.y = (qy + cq[6] * d) * iW; - v.z = (qz + cq[10] * d) * iW; - - xyz(y, x) = v; - } - - template - void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y)); - - cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) ); - - reprojectImageTo3D<<>>((PtrStepSz)disp, (PtrStepSz)xyz); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); - template void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); - template void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); - template void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); - - /////////////////////////////////////////// Corner Harris ///////////////////////////////////////////////// - - texture harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp); - texture harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - float a = 0.f; - float b = 0.f; - float c = 0.f; - - const int ibegin = y - (block_size / 2); - const int jbegin = x - (block_size / 2); - const int iend = ibegin + block_size; - const int jend = jbegin + block_size; - - for (int i = ibegin; i < iend; ++i) - { - for (int j = jbegin; j < jend; ++j) - { - float dx = tex2D(harrisDxTex, j, i); - float dy = tex2D(harrisDyTex, j, i); - - a += dx * dx; - b += dx * dy; - c += dy * dy; - } - } - - dst(y, x) = a * c - b * b - k * (a + c) * (a + c); - } - } - - template - __global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst, const BR border_row, const BC border_col) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - float a = 0.f; - float b = 0.f; - float c = 0.f; - - const int ibegin = y - (block_size / 2); - const int jbegin = x - (block_size / 2); - const int iend = ibegin + block_size; - const int jend = jbegin + block_size; - - for (int i = ibegin; i < iend; ++i) - { - const int y = border_col.idx_row(i); - - for (int j = jbegin; j < jend; ++j) - { - const int x = border_row.idx_col(j); - - float dx = tex2D(harrisDxTex, x, y); - float dy = tex2D(harrisDyTex, x, y); - - a += dx * dx; - b += dx * dy; - c += dy * dy; - } - } - - dst(y, x) = a * c - b * b - k * (a + c) * (a + c); - } - } - - void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y)); - - bindTexture(&harrisDxTex, Dx); - bindTexture(&harrisDyTex, Dy); - - switch (border_type) - { - case BORDER_REFLECT101_GPU: - cornerHarris_kernel<<>>(block_size, k, dst, BrdRowReflect101(Dx.cols), BrdColReflect101(Dx.rows)); - break; - - case BORDER_REFLECT_GPU: - cornerHarris_kernel<<>>(block_size, k, dst, BrdRowReflect(Dx.cols), BrdColReflect(Dx.rows)); - break; - - case BORDER_REPLICATE_GPU: - cornerHarris_kernel<<>>(block_size, k, dst); - break; - } - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - /////////////////////////////////////////// Corner Min Eigen Val ///////////////////////////////////////////////// - - texture minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp); - texture minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - float a = 0.f; - float b = 0.f; - float c = 0.f; - - const int ibegin = y - (block_size / 2); - const int jbegin = x - (block_size / 2); - const int iend = ibegin + block_size; - const int jend = jbegin + block_size; - - for (int i = ibegin; i < iend; ++i) - { - for (int j = jbegin; j < jend; ++j) - { - float dx = tex2D(minEigenValDxTex, j, i); - float dy = tex2D(minEigenValDyTex, j, i); - - a += dx * dx; - b += dx * dy; - c += dy * dy; - } - } - - a *= 0.5f; - c *= 0.5f; - - dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b); - } - } - - - template - __global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst, const BR border_row, const BC border_col) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - float a = 0.f; - float b = 0.f; - float c = 0.f; - - const int ibegin = y - (block_size / 2); - const int jbegin = x - (block_size / 2); - const int iend = ibegin + block_size; - const int jend = jbegin + block_size; - - for (int i = ibegin; i < iend; ++i) - { - int y = border_col.idx_row(i); - - for (int j = jbegin; j < jend; ++j) - { - int x = border_row.idx_col(j); - - float dx = tex2D(minEigenValDxTex, x, y); - float dy = tex2D(minEigenValDyTex, x, y); - - a += dx * dx; - b += dx * dy; - c += dy * dy; - } - } - - a *= 0.5f; - c *= 0.5f; - - dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b); - } - } - - void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y)); - - bindTexture(&minEigenValDxTex, Dx); - bindTexture(&minEigenValDyTex, Dy); - - switch (border_type) - { - case BORDER_REFLECT101_GPU: - cornerMinEigenVal_kernel<<>>(block_size, dst, BrdRowReflect101(Dx.cols), BrdColReflect101(Dx.rows)); - break; - - case BORDER_REFLECT_GPU: - cornerMinEigenVal_kernel<<>>(block_size, dst, BrdRowReflect(Dx.cols), BrdColReflect(Dx.rows)); - break; - - case BORDER_REPLICATE_GPU: - cornerMinEigenVal_kernel<<>>(block_size, dst); - break; - } - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - - ////////////////////////////////////////////////////////////////////////// - // buildWarpMaps - - // TODO use intrinsics like __sinf and so on - - namespace build_warp_maps - { - - __constant__ float ck_rinv[9]; - __constant__ float cr_kinv[9]; - __constant__ float ct[3]; - __constant__ float cscale; - } - - - class PlaneMapper - { - public: - static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y) - { - using namespace build_warp_maps; - - float x_ = u / cscale - ct[0]; - float y_ = v / cscale - ct[1]; - - float z; - x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]); - y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]); - z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]); - - x /= z; - y /= z; - } - }; - - - class CylindricalMapper - { - public: - static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y) - { - using namespace build_warp_maps; - - u /= cscale; - float x_ = ::sinf(u); - float y_ = v / cscale; - float z_ = ::cosf(u); - - float z; - x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_; - y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_; - z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_; - - if (z > 0) { x /= z; y /= z; } - else x = y = -1; - } - }; - - - class SphericalMapper - { - public: - static __device__ __forceinline__ void mapBackward(float u, float v, float &x, float &y) - { - using namespace build_warp_maps; - - v /= cscale; - u /= cscale; - - float sinv = ::sinf(v); - float x_ = sinv * ::sinf(u); - float y_ = -::cosf(v); - float z_ = sinv * ::cosf(u); - - float z; - x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_; - y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_; - z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_; - - if (z > 0) { x /= z; y /= z; } - else x = y = -1; - } - }; - - - template - __global__ void buildWarpMapsKernel(int tl_u, int tl_v, int cols, int rows, - PtrStepf map_x, PtrStepf map_y) - { - int du = blockIdx.x * blockDim.x + threadIdx.x; - int dv = blockIdx.y * blockDim.y + threadIdx.y; - if (du < cols && dv < rows) - { - float u = tl_u + du; - float v = tl_v + dv; - float x, y; - Mapper::mapBackward(u, v, x, y); - map_x.ptr(dv)[du] = x; - map_y.ptr(dv)[du] = y; - } - } - - - void buildWarpPlaneMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y, - const float k_rinv[9], const float r_kinv[9], const float t[3], - float scale, cudaStream_t stream) - { - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ct, t, 3*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); - - int cols = map_x.cols; - int rows = map_x.rows; - - dim3 threads(32, 8); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - buildWarpMapsKernel<<>>(tl_u, tl_v, cols, rows, map_x, map_y); - cudaSafeCall(cudaGetLastError()); - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - - - void buildWarpCylindricalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y, - const float k_rinv[9], const float r_kinv[9], float scale, - cudaStream_t stream) - { - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); - - int cols = map_x.cols; - int rows = map_x.rows; - - dim3 threads(32, 8); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - buildWarpMapsKernel<<>>(tl_u, tl_v, cols, rows, map_x, map_y); - cudaSafeCall(cudaGetLastError()); - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - - - void buildWarpSphericalMaps(int tl_u, int tl_v, PtrStepSzf map_x, PtrStepSzf map_y, - const float k_rinv[9], const float r_kinv[9], float scale, - cudaStream_t stream) - { - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::ck_rinv, k_rinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cr_kinv, r_kinv, 9*sizeof(float))); - cudaSafeCall(cudaMemcpyToSymbol(build_warp_maps::cscale, &scale, sizeof(float))); - - int cols = map_x.cols; - int rows = map_x.rows; - - dim3 threads(32, 8); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - buildWarpMapsKernel<<>>(tl_u, tl_v, cols, rows, map_x, map_y); - cudaSafeCall(cudaGetLastError()); - if (stream == 0) - cudaSafeCall(cudaDeviceSynchronize()); - } - } // namespace imgproc -}}} // namespace cv { namespace gpu { namespace cudev { - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu deleted file mode 100644 index 6670639290..0000000000 --- a/modules/gpu/src/cuda/match_template.cu +++ /dev/null @@ -1,916 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_math.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace match_template - { - __device__ __forceinline__ float sum(float v) { return v; } - __device__ __forceinline__ float sum(float2 v) { return v.x + v.y; } - __device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; } - __device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; } - - __device__ __forceinline__ float first(float v) { return v; } - __device__ __forceinline__ float first(float2 v) { return v.x; } - __device__ __forceinline__ float first(float3 v) { return v.x; } - __device__ __forceinline__ float first(float4 v) { return v.x; } - - __device__ __forceinline__ float mul(float a, float b) { return a * b; } - __device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); } - __device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } - __device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } - - __device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; } - __device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); } - __device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } - __device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } - - __device__ __forceinline__ float sub(float a, float b) { return a - b; } - __device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); } - __device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } - __device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } - - __device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; } - __device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); } - __device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } - __device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); } - - ////////////////////////////////////////////////////////////////////// - // Naive_CCORR - - template - __global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result) - { - typedef typename TypeVec::vec_type Type; - typedef typename TypeVec::vec_type Typef; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - Typef res = VecTraits::all(0); - - for (int i = 0; i < h; ++i) - { - const Type* image_ptr = (const Type*)image.ptr(y + i); - const Type* templ_ptr = (const Type*)templ.ptr(i); - for (int j = 0; j < w; ++j) - res = res + mul(image_ptr[x + j], templ_ptr[j]); - } - - result.ptr(y)[x] = sum(res); - } - } - - template - void matchTemplateNaive_CCORR(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream) - { - const dim3 threads(32, 8); - const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplateNaiveKernel_CCORR<<>>(templ.cols, templ.rows, image, templ, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) - { - typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream); - - static const caller_t callers[] = - { - 0, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR - }; - - callers[cn](image, templ, result, stream); - } - - - void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) - { - typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream); - - static const caller_t callers[] = - { - 0, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR, matchTemplateNaive_CCORR - }; - - callers[cn](image, templ, result, stream); - } - - ////////////////////////////////////////////////////////////////////// - // Naive_SQDIFF - - template - __global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result) - { - typedef typename TypeVec::vec_type Type; - typedef typename TypeVec::vec_type Typef; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - Typef res = VecTraits::all(0); - Typef delta; - - for (int i = 0; i < h; ++i) - { - const Type* image_ptr = (const Type*)image.ptr(y + i); - const Type* templ_ptr = (const Type*)templ.ptr(i); - for (int j = 0; j < w; ++j) - { - delta = sub(image_ptr[x + j], templ_ptr[j]); - res = res + delta * delta; - } - } - - result.ptr(y)[x] = sum(res); - } - } - - template - void matchTemplateNaive_SQDIFF(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream) - { - const dim3 threads(32, 8); - const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplateNaiveKernel_SQDIFF<<>>(templ.cols, templ.rows, image, templ, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) - { - typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream); - - static const caller_t callers[] = - { - 0, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF - }; - - callers[cn](image, templ, result, stream); - } - - void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream) - { - typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream); - - static const caller_t callers[] = - { - 0, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF, matchTemplateNaive_SQDIFF - }; - - callers[cn](image, templ, result, stream); - } - - ////////////////////////////////////////////////////////////////////// - // Prepared_SQDIFF - - template - __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sqsum_ = (float)( - (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - - (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = image_sqsum_ - 2.f * ccorr + templ_sqsum; - } - } - - template - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream) - { - const dim3 threads(32, 8); - const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_SQDIFF_8U<<>>(w, h, image_sqsum, templ_sqsum, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn, - cudaStream_t stream) - { - typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream); - - static const caller_t callers[] = - { - 0, matchTemplatePrepared_SQDIFF_8U<1>, matchTemplatePrepared_SQDIFF_8U<2>, matchTemplatePrepared_SQDIFF_8U<3>, matchTemplatePrepared_SQDIFF_8U<4> - }; - - callers[cn](w, h, image_sqsum, templ_sqsum, result, stream); - } - - ////////////////////////////////////////////////////////////////////// - // Prepared_SQDIFF_NORMED - - // normAcc* are accurate normalization routines which make GPU matchTemplate - // consistent with CPU one - - __device__ float normAcc(float num, float denum) - { - if (::fabs(num) < denum) - return num / denum; - if (::fabs(num) < denum * 1.125f) - return num > 0 ? 1 : -1; - return 0; - } - - - __device__ float normAcc_SQDIFF(float num, float denum) - { - if (::fabs(num) < denum) - return num / denum; - if (::fabs(num) < denum * 1.125f) - return num > 0 ? 1 : -1; - return 1; - } - - - template - __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( - int w, int h, const PtrStep image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sqsum_ = (float)( - (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - - (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum, - sqrtf(image_sqsum_ * templ_sqsum)); - } - } - - template - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, - PtrStepSzf result, cudaStream_t stream) - { - const dim3 threads(32, 8); - const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_SQDIFF_NORMED_8U<<>>(w, h, image_sqsum, templ_sqsum, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, - PtrStepSzf result, int cn, cudaStream_t stream) - { - typedef void (*caller_t)(int w, int h, const PtrStepSz image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream); - static const caller_t callers[] = - { - 0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4> - }; - - callers[cn](w, h, image_sqsum, templ_sqsum, result, stream); - } - - ////////////////////////////////////////////////////////////////////// - // Prepared_CCOFF - - __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep image_sum, PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_ = (float)( - (image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) - - (image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = ccorr - image_sum_ * templ_sum_scale; - } - } - - void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_CCOFF_8U<<>>(w, h, (float)templ_sum / (w * h), image_sum, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_8UC2( - int w, int h, float templ_sum_scale_r, float templ_sum_scale_g, - const PtrStep image_sum_r, - const PtrStep image_sum_g, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g; - } - } - - void matchTemplatePrepared_CCOFF_8UC2( - int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - unsigned int templ_sum_r, unsigned int templ_sum_g, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_CCOFF_8UC2<<>>( - w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h), - image_sum_r, image_sum_g, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_8UC3( - int w, int h, - float templ_sum_scale_r, - float templ_sum_scale_g, - float templ_sum_scale_b, - const PtrStep image_sum_r, - const PtrStep image_sum_g, - const PtrStep image_sum_b, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float image_sum_b_ = (float)( - (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - - (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b; - } - } - - void matchTemplatePrepared_CCOFF_8UC3( - int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_CCOFF_8UC3<<>>( - w, h, - (float)templ_sum_r / (w * h), - (float)templ_sum_g / (w * h), - (float)templ_sum_b / (w * h), - image_sum_r, image_sum_g, image_sum_b, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_8UC4( - int w, int h, - float templ_sum_scale_r, - float templ_sum_scale_g, - float templ_sum_scale_b, - float templ_sum_scale_a, - const PtrStep image_sum_r, - const PtrStep image_sum_g, - const PtrStep image_sum_b, - const PtrStep image_sum_a, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float image_sum_b_ = (float)( - (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - - (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); - float image_sum_a_ = (float)( - (image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) - - (image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b - - image_sum_a_ * templ_sum_scale_a; - } - } - - void matchTemplatePrepared_CCOFF_8UC4( - int w, int h, - const PtrStepSz image_sum_r, - const PtrStepSz image_sum_g, - const PtrStepSz image_sum_b, - const PtrStepSz image_sum_a, - unsigned int templ_sum_r, - unsigned int templ_sum_g, - unsigned int templ_sum_b, - unsigned int templ_sum_a, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - matchTemplatePreparedKernel_CCOFF_8UC4<<>>( - w, h, - (float)templ_sum_r / (w * h), - (float)templ_sum_g / (w * h), - (float)templ_sum_b / (w * h), - (float)templ_sum_a / (w * h), - image_sum_r, image_sum_g, image_sum_b, image_sum_a, - result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - ////////////////////////////////////////////////////////////////////// - // Prepared_CCOFF_NORMED - - __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U( - int w, int h, float weight, - float templ_sum_scale, float templ_sqsum_scale, - const PtrStep image_sum, - const PtrStep image_sqsum, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float ccorr = result.ptr(y)[x]; - float image_sum_ = (float)( - (image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) - - (image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x])); - float image_sqsum_ = (float)( - (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - - (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); - result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale, - sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_))); - } - } - - void matchTemplatePrepared_CCOFF_NORMED_8U( - int w, int h, const PtrStepSz image_sum, - const PtrStepSz image_sqsum, - unsigned int templ_sum, unsigned long long templ_sqsum, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - float weight = 1.f / (w * h); - float templ_sum_scale = templ_sum * weight; - float templ_sqsum_scale = templ_sqsum - weight * templ_sum * templ_sum; - - matchTemplatePreparedKernel_CCOFF_NORMED_8U<<>>( - w, h, weight, templ_sum_scale, templ_sqsum_scale, - image_sum, image_sqsum, result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2( - int w, int h, float weight, - float templ_sum_scale_r, float templ_sum_scale_g, - float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sqsum_r_ = (float)( - (image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) - - (image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float image_sqsum_g_ = (float)( - (image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) - - (image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x])); - - float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g; - float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_)); - result.ptr(y)[x] = normAcc(num, denum); - } - } - - void matchTemplatePrepared_CCOFF_NORMED_8UC2( - int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - float weight = 1.f / (w * h); - float templ_sum_scale_r = templ_sum_r * weight; - float templ_sum_scale_g = templ_sum_g * weight; - float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r - + templ_sqsum_g - weight * templ_sum_g * templ_sum_g; - - matchTemplatePreparedKernel_CCOFF_NORMED_8UC2<<>>( - w, h, weight, - templ_sum_scale_r, templ_sum_scale_g, - templ_sqsum_scale, - image_sum_r, image_sqsum_r, - image_sum_g, image_sqsum_g, - result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3( - int w, int h, float weight, - float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, - float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, - const PtrStep image_sum_b, const PtrStep image_sqsum_b, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sqsum_r_ = (float)( - (image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) - - (image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float image_sqsum_g_ = (float)( - (image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) - - (image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x])); - float image_sum_b_ = (float)( - (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - - (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); - float image_sqsum_b_ = (float)( - (image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) - - (image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x])); - - float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b; - float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_)); - result.ptr(y)[x] = normAcc(num, denum); - } - } - - void matchTemplatePrepared_CCOFF_NORMED_8UC3( - int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - float weight = 1.f / (w * h); - float templ_sum_scale_r = templ_sum_r * weight; - float templ_sum_scale_g = templ_sum_g * weight; - float templ_sum_scale_b = templ_sum_b * weight; - float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r - + templ_sqsum_g - weight * templ_sum_g * templ_sum_g - + templ_sqsum_b - weight * templ_sum_b * templ_sum_b; - - matchTemplatePreparedKernel_CCOFF_NORMED_8UC3<<>>( - w, h, weight, - templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, - templ_sqsum_scale, - image_sum_r, image_sqsum_r, - image_sum_g, image_sqsum_g, - image_sum_b, image_sqsum_b, - result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - - - __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4( - int w, int h, float weight, - float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, - float templ_sum_scale_a, float templ_sqsum_scale, - const PtrStep image_sum_r, const PtrStep image_sqsum_r, - const PtrStep image_sum_g, const PtrStep image_sqsum_g, - const PtrStep image_sum_b, const PtrStep image_sqsum_b, - const PtrStep image_sum_a, const PtrStep image_sqsum_a, - PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sum_r_ = (float)( - (image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) - - (image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x])); - float image_sqsum_r_ = (float)( - (image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) - - (image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x])); - float image_sum_g_ = (float)( - (image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) - - (image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x])); - float image_sqsum_g_ = (float)( - (image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) - - (image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x])); - float image_sum_b_ = (float)( - (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - - (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); - float image_sqsum_b_ = (float)( - (image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) - - (image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x])); - float image_sum_a_ = (float)( - (image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) - - (image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x])); - float image_sqsum_a_ = (float)( - (image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) - - (image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x])); - - float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b - image_sum_a_ * templ_sum_scale_a; - float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_ - + image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_)); - result.ptr(y)[x] = normAcc(num, denum); - } - } - - void matchTemplatePrepared_CCOFF_NORMED_8UC4( - int w, int h, - const PtrStepSz image_sum_r, const PtrStepSz image_sqsum_r, - const PtrStepSz image_sum_g, const PtrStepSz image_sqsum_g, - const PtrStepSz image_sum_b, const PtrStepSz image_sqsum_b, - const PtrStepSz image_sum_a, const PtrStepSz image_sqsum_a, - unsigned int templ_sum_r, unsigned long long templ_sqsum_r, - unsigned int templ_sum_g, unsigned long long templ_sqsum_g, - unsigned int templ_sum_b, unsigned long long templ_sqsum_b, - unsigned int templ_sum_a, unsigned long long templ_sqsum_a, - PtrStepSzf result, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - float weight = 1.f / (w * h); - float templ_sum_scale_r = templ_sum_r * weight; - float templ_sum_scale_g = templ_sum_g * weight; - float templ_sum_scale_b = templ_sum_b * weight; - float templ_sum_scale_a = templ_sum_a * weight; - float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r - + templ_sqsum_g - weight * templ_sum_g * templ_sum_g - + templ_sqsum_b - weight * templ_sum_b * templ_sum_b - + templ_sqsum_a - weight * templ_sum_a * templ_sum_a; - - matchTemplatePreparedKernel_CCOFF_NORMED_8UC4<<>>( - w, h, weight, - templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, templ_sum_scale_a, - templ_sqsum_scale, - image_sum_r, image_sqsum_r, - image_sum_g, image_sqsum_g, - image_sum_b, image_sqsum_b, - image_sum_a, image_sqsum_a, - result); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - ////////////////////////////////////////////////////////////////////// - // normalize - - template - __global__ void normalizeKernel_8U( - int w, int h, const PtrStep image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - float image_sqsum_ = (float)( - (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - - (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); - result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum)); - } - } - - void normalize_8U(int w, int h, const PtrStepSz image_sqsum, - unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - switch (cn) - { - case 1: - normalizeKernel_8U<1><<>>(w, h, image_sqsum, templ_sqsum, result); - break; - case 2: - normalizeKernel_8U<2><<>>(w, h, image_sqsum, templ_sqsum, result); - break; - case 3: - normalizeKernel_8U<3><<>>(w, h, image_sqsum, templ_sqsum, result); - break; - case 4: - normalizeKernel_8U<4><<>>(w, h, image_sqsum, templ_sqsum, result); - break; - } - - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - ////////////////////////////////////////////////////////////////////// - // extractFirstChannel - - template - __global__ void extractFirstChannel_32F(const PtrStepb image, PtrStepSzf result) - { - typedef typename TypeVec::vec_type Typef; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < result.cols && y < result.rows) - { - Typef val = ((const Typef*)image.ptr(y))[x]; - result.ptr(y)[x] = first(val); - } - } - - void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream) - { - dim3 threads(32, 8); - dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); - - switch (cn) - { - case 1: - extractFirstChannel_32F<1><<>>(image, result); - break; - case 2: - extractFirstChannel_32F<2><<>>(image, result); - break; - case 3: - extractFirstChannel_32F<3><<>>(image, result); - break; - case 4: - extractFirstChannel_32F<4><<>>(image, result); - break; - } - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - } //namespace match_template -}}} // namespace cv { namespace gpu { namespace cudev - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/nlm.cu b/modules/gpu/src/cuda/nlm.cu deleted file mode 100644 index 92bfccf37c..0000000000 --- a/modules/gpu/src/cuda/nlm.cu +++ /dev/null @@ -1,569 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/reduce.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" - -using namespace cv::gpu; - -typedef unsigned char uchar; -typedef unsigned short ushort; - -////////////////////////////////////////////////////////////////////////////////// -//// Non Local Means Denosing - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - __device__ __forceinline__ float norm2(const float& v) { return v*v; } - __device__ __forceinline__ float norm2(const float2& v) { return v.x*v.x + v.y*v.y; } - __device__ __forceinline__ float norm2(const float3& v) { return v.x*v.x + v.y*v.y + v.z*v.z; } - __device__ __forceinline__ float norm2(const float4& v) { return v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w; } - - template - __global__ void nlm_kernel(const PtrStep src, PtrStepSz dst, const B b, int search_radius, int block_radius, float noise_mult) - { - typedef typename TypeVec::cn>::vec_type value_type; - - const int i = blockDim.y * blockIdx.y + threadIdx.y; - const int j = blockDim.x * blockIdx.x + threadIdx.x; - - if (j >= dst.cols || i >= dst.rows) - return; - - int bsize = search_radius + block_radius; - int search_window = 2 * search_radius + 1; - float minus_search_window2_inv = -1.f/(search_window * search_window); - - value_type sum1 = VecTraits::all(0); - float sum2 = 0.f; - - if (j - bsize >= 0 && j + bsize < dst.cols && i - bsize >= 0 && i + bsize < dst.rows) - { - for(float y = -search_radius; y <= search_radius; ++y) - for(float x = -search_radius; x <= search_radius; ++x) - { - float dist2 = 0; - for(float ty = -block_radius; ty <= block_radius; ++ty) - for(float tx = -block_radius; tx <= block_radius; ++tx) - { - value_type bv = saturate_cast(src(i + y + ty, j + x + tx)); - value_type av = saturate_cast(src(i + ty, j + tx)); - - dist2 += norm2(av - bv); - } - - float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv); - - /*if (i == 255 && j == 255) - printf("%f %f\n", w, dist2 * minus_h2_inv + (x * x + y * y) * minus_search_window2_inv);*/ - - sum1 = sum1 + w * saturate_cast(src(i + y, j + x)); - sum2 += w; - } - } - else - { - for(float y = -search_radius; y <= search_radius; ++y) - for(float x = -search_radius; x <= search_radius; ++x) - { - float dist2 = 0; - for(float ty = -block_radius; ty <= block_radius; ++ty) - for(float tx = -block_radius; tx <= block_radius; ++tx) - { - value_type bv = saturate_cast(b.at(i + y + ty, j + x + tx, src)); - value_type av = saturate_cast(b.at(i + ty, j + tx, src)); - dist2 += norm2(av - bv); - } - - float w = __expf(dist2 * noise_mult + (x * x + y * y) * minus_search_window2_inv); - - sum1 = sum1 + w * saturate_cast(b.at(i + y, j + x, src)); - sum2 += w; - } - - } - - dst(i, j) = saturate_cast(sum1 / sum2); - - } - - template class B> - void nlm_caller(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream) - { - dim3 block (32, 8); - dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y)); - - B b(src.rows, src.cols); - - int block_window = 2 * block_radius + 1; - float minus_h2_inv = -1.f/(h * h * VecTraits::cn); - float noise_mult = minus_h2_inv/(block_window * block_window); - - cudaSafeCall( cudaFuncSetCacheConfig (nlm_kernel >, cudaFuncCachePreferL1) ); - nlm_kernel<<>>((PtrStepSz)src, (PtrStepSz)dst, b, search_radius, block_radius, noise_mult); - cudaSafeCall ( cudaGetLastError () ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template - void nlm_bruteforce_gpu(const PtrStepSzb& src, PtrStepSzb dst, int search_radius, int block_radius, float h, int borderMode, cudaStream_t stream) - { - typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int search_radius, int block_radius, float h, cudaStream_t stream); - - static func_t funcs[] = - { - nlm_caller, - nlm_caller, - nlm_caller, - nlm_caller, - nlm_caller, - }; - funcs[borderMode](src, dst, search_radius, block_radius, h, stream); - } - - template void nlm_bruteforce_gpu(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t); - template void nlm_bruteforce_gpu(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t); - template void nlm_bruteforce_gpu(const PtrStepSzb&, PtrStepSzb, int, int, float, int, cudaStream_t); - } -}}} - -////////////////////////////////////////////////////////////////////////////////// -//// Non Local Means Denosing (fast approximate version) - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - - template struct Unroll; - template <> struct Unroll<1> - { - template - static __device__ __forceinline__ thrust::tuple smem_tuple(float* smem) - { - return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE); - } - - static __device__ __forceinline__ thrust::tuple tie(float& val1, float& val2) - { - return thrust::tie(val1, val2); - } - - static __device__ __forceinline__ const thrust::tuple, plus > op() - { - plus op; - return thrust::make_tuple(op, op); - } - }; - template <> struct Unroll<2> - { - template - static __device__ __forceinline__ thrust::tuple smem_tuple(float* smem) - { - return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE); - } - - static __device__ __forceinline__ thrust::tuple tie(float& val1, float2& val2) - { - return thrust::tie(val1, val2.x, val2.y); - } - - static __device__ __forceinline__ const thrust::tuple, plus, plus > op() - { - plus op; - return thrust::make_tuple(op, op, op); - } - }; - template <> struct Unroll<3> - { - template - static __device__ __forceinline__ thrust::tuple smem_tuple(float* smem) - { - return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE); - } - - static __device__ __forceinline__ thrust::tuple tie(float& val1, float3& val2) - { - return thrust::tie(val1, val2.x, val2.y, val2.z); - } - - static __device__ __forceinline__ const thrust::tuple, plus, plus, plus > op() - { - plus op; - return thrust::make_tuple(op, op, op, op); - } - }; - template <> struct Unroll<4> - { - template - static __device__ __forceinline__ thrust::tuple smem_tuple(float* smem) - { - return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE, smem + 4 * BLOCK_SIZE); - } - - static __device__ __forceinline__ thrust::tuple tie(float& val1, float4& val2) - { - return thrust::tie(val1, val2.x, val2.y, val2.z, val2.w); - } - - static __device__ __forceinline__ const thrust::tuple, plus, plus, plus, plus > op() - { - plus op; - return thrust::make_tuple(op, op, op, op, op); - } - }; - - __device__ __forceinline__ int calcDist(const uchar& a, const uchar& b) { return (a-b)*(a-b); } - __device__ __forceinline__ int calcDist(const uchar2& a, const uchar2& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y); } - __device__ __forceinline__ int calcDist(const uchar3& a, const uchar3& b) { return (a.x-b.x)*(a.x-b.x) + (a.y-b.y)*(a.y-b.y) + (a.z-b.z)*(a.z-b.z); } - - template struct FastNonLocalMenas - { - enum - { - CTA_SIZE = 128, - - TILE_COLS = 128, - TILE_ROWS = 32, - - STRIDE = CTA_SIZE - }; - - struct plus - { - __device__ __forceinline__ float operator()(float v1, float v2) const { return v1 + v2; } - }; - - int search_radius; - int block_radius; - - int search_window; - int block_window; - float minus_h2_inv; - - FastNonLocalMenas(int search_window_, int block_window_, float h) : search_radius(search_window_/2), block_radius(block_window_/2), - search_window(search_window_), block_window(block_window_), minus_h2_inv(-1.f/(h * h * VecTraits::cn)) {} - - PtrStep src; - mutable PtrStepi buffer; - - __device__ __forceinline__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const - { - for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) - { - dist_sums[index] = 0; - - for(int tx = 0; tx < block_window; ++tx) - col_sums(tx, index) = 0; - - int y = index / search_window; - int x = index - y * search_window; - - int ay = i; - int ax = j; - - int by = i + y - search_radius; - int bx = j + x - search_radius; - -#if 1 - for (int tx = -block_radius; tx <= block_radius; ++tx) - { - int col_sum = 0; - for (int ty = -block_radius; ty <= block_radius; ++ty) - { - int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx)); - - dist_sums[index] += dist; - col_sum += dist; - } - col_sums(tx + block_radius, index) = col_sum; - } -#else - for (int ty = -block_radius; ty <= block_radius; ++ty) - for (int tx = -block_radius; tx <= block_radius; ++tx) - { - int dist = calcDist(src(ay + ty, ax + tx), src(by + ty, bx + tx)); - - dist_sums[index] += dist; - col_sums(tx + block_radius, index) += dist; - } -#endif - - up_col_sums(j, index) = col_sums(block_window - 1, index); - } - } - - __device__ __forceinline__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const - { - for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) - { - int y = index / search_window; - int x = index - y * search_window; - - int ay = i; - int ax = j + block_radius; - - int by = i + y - search_radius; - int bx = j + x - search_radius + block_radius; - - int col_sum = 0; - - for (int ty = -block_radius; ty <= block_radius; ++ty) - col_sum += calcDist(src(ay + ty, ax), src(by + ty, bx)); - - dist_sums[index] += col_sum - col_sums(first, index); - - col_sums(first, index) = col_sum; - up_col_sums(j, index) = col_sum; - } - } - - __device__ __forceinline__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const - { - int ay = i; - int ax = j + block_radius; - - T a_up = src(ay - block_radius - 1, ax); - T a_down = src(ay + block_radius, ax); - - for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) - { - int y = index / search_window; - int x = index - y * search_window; - - int by = i + y - search_radius; - int bx = j + x - search_radius + block_radius; - - T b_up = src(by - block_radius - 1, bx); - T b_down = src(by + block_radius, bx); - - int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up); - - dist_sums[index] += col_sum - col_sums(first, index); - col_sums(first, index) = col_sum; - up_col_sums(j, index) = col_sum; - } - } - - __device__ __forceinline__ void convolve_window(int i, int j, const int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums, T& dst) const - { - typedef typename TypeVec::cn>::vec_type sum_type; - - float weights_sum = 0; - sum_type sum = VecTraits::all(0); - - float bw2_inv = 1.f/(block_window * block_window); - - int sx = j - search_radius; - int sy = i - search_radius; - - for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) - { - int y = index / search_window; - int x = index - y * search_window; - - float avg_dist = dist_sums[index] * bw2_inv; - float weight = __expf(avg_dist * minus_h2_inv); - weights_sum += weight; - - sum = sum + weight * saturate_cast(src(sy + y, sx + x)); - } - - __shared__ float cta_buffer[CTA_SIZE * (VecTraits::cn + 1)]; - - reduce(Unroll::cn>::template smem_tuple(cta_buffer), - Unroll::cn>::tie(weights_sum, sum), - threadIdx.x, - Unroll::cn>::op()); - - if (threadIdx.x == 0) - dst = saturate_cast(sum / weights_sum); - } - - __device__ __forceinline__ void operator()(PtrStepSz& dst) const - { - int tbx = blockIdx.x * TILE_COLS; - int tby = blockIdx.y * TILE_ROWS; - - int tex = ::min(tbx + TILE_COLS, dst.cols); - int tey = ::min(tby + TILE_ROWS, dst.rows); - - PtrStepi col_sums; - col_sums.data = buffer.ptr(dst.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window; - col_sums.step = buffer.step; - - PtrStepi up_col_sums; - up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window; - up_col_sums.step = buffer.step; - - extern __shared__ int dist_sums[]; //search_window * search_window - - int first = 0; - - for (int i = tby; i < tey; ++i) - for (int j = tbx; j < tex; ++j) - { - __syncthreads(); - - if (j == tbx) - { - initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums); - first = 0; - } - else - { - if (i == tby) - shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums); - else - shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums); - - first = (first + 1) % block_window; - } - - __syncthreads(); - - convolve_window(i, j, dist_sums, col_sums, up_col_sums, dst(i, j)); - } - } - - }; - - template - __global__ void fast_nlm_kernel(const FastNonLocalMenas fnlm, PtrStepSz dst) { fnlm(dst); } - - void nln_fast_get_buffer_size(const PtrStepSzb& src, int search_window, int block_window, int& buffer_cols, int& buffer_rows) - { - typedef FastNonLocalMenas FNLM; - dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS)); - - buffer_cols = search_window * search_window * grid.y; - buffer_rows = src.cols + block_window * grid.x; - } - - template - void nlm_fast_gpu(const PtrStepSzb& src, PtrStepSzb dst, PtrStepi buffer, - int search_window, int block_window, float h, cudaStream_t stream) - { - typedef FastNonLocalMenas FNLM; - FNLM fnlm(search_window, block_window, h); - - fnlm.src = (PtrStepSz)src; - fnlm.buffer = buffer; - - dim3 block(FNLM::CTA_SIZE, 1); - dim3 grid(divUp(src.cols, FNLM::TILE_COLS), divUp(src.rows, FNLM::TILE_ROWS)); - int smem = search_window * search_window * sizeof(int); - - - fast_nlm_kernel<<>>(fnlm, (PtrStepSz)dst); - cudaSafeCall ( cudaGetLastError () ); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void nlm_fast_gpu(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); - template void nlm_fast_gpu(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); - template void nlm_fast_gpu(const PtrStepSzb&, PtrStepSzb, PtrStepi, int, int, float, cudaStream_t); - - - - __global__ void fnlm_split_kernel(const PtrStepSz lab, PtrStepb l, PtrStep ab) - { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - - if (x < lab.cols && y < lab.rows) - { - uchar3 p = lab(y, x); - ab(y,x) = make_uchar2(p.y, p.z); - l(y,x) = p.x; - } - } - - void fnlm_split_channels(const PtrStepSz& lab, PtrStepb l, PtrStep ab, cudaStream_t stream) - { - dim3 b(32, 8); - dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y)); - - fnlm_split_kernel<<>>(lab, l, ab); - cudaSafeCall ( cudaGetLastError () ); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - __global__ void fnlm_merge_kernel(const PtrStepb l, const PtrStep ab, PtrStepSz lab) - { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - - if (x < lab.cols && y < lab.rows) - { - uchar2 p = ab(y, x); - lab(y, x) = make_uchar3(l(y, x), p.x, p.y); - } - } - - void fnlm_merge_channels(const PtrStepb& l, const PtrStep& ab, PtrStepSz lab, cudaStream_t stream) - { - dim3 b(32, 8); - dim3 g(divUp(lab.cols, b.x), divUp(lab.rows, b.y)); - - fnlm_merge_kernel<<>>(l, ab, lab); - cudaSafeCall ( cudaGetLastError () ); - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - } -}}} - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/pyr_down.cu b/modules/gpu/src/cuda/pyr_down.cu deleted file mode 100644 index 904f549bad..0000000000 --- a/modules/gpu/src/cuda/pyr_down.cu +++ /dev/null @@ -1,228 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - template __global__ void pyrDown(const PtrStepSz src, PtrStep dst, const B b, int dst_cols) - { - typedef typename TypeVec::cn>::vec_type work_t; - - __shared__ work_t smem[256 + 4]; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y; - - const int src_y = 2 * y; - - if (src_y >= 2 && src_y < src.rows - 2 && x >= 2 && x < src.cols - 2) - { - { - work_t sum; - - sum = 0.0625f * src(src_y - 2, x); - sum = sum + 0.25f * src(src_y - 1, x); - sum = sum + 0.375f * src(src_y , x); - sum = sum + 0.25f * src(src_y + 1, x); - sum = sum + 0.0625f * src(src_y + 2, x); - - smem[2 + threadIdx.x] = sum; - } - - if (threadIdx.x < 2) - { - const int left_x = x - 2; - - work_t sum; - - sum = 0.0625f * src(src_y - 2, left_x); - sum = sum + 0.25f * src(src_y - 1, left_x); - sum = sum + 0.375f * src(src_y , left_x); - sum = sum + 0.25f * src(src_y + 1, left_x); - sum = sum + 0.0625f * src(src_y + 2, left_x); - - smem[threadIdx.x] = sum; - } - - if (threadIdx.x > 253) - { - const int right_x = x + 2; - - work_t sum; - - sum = 0.0625f * src(src_y - 2, right_x); - sum = sum + 0.25f * src(src_y - 1, right_x); - sum = sum + 0.375f * src(src_y , right_x); - sum = sum + 0.25f * src(src_y + 1, right_x); - sum = sum + 0.0625f * src(src_y + 2, right_x); - - smem[4 + threadIdx.x] = sum; - } - } - else - { - { - work_t sum; - - sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(x)); - sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(x)); - sum = sum + 0.375f * src(src_y , b.idx_col_high(x)); - sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(x)); - sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(x)); - - smem[2 + threadIdx.x] = sum; - } - - if (threadIdx.x < 2) - { - const int left_x = x - 2; - - work_t sum; - - sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col(left_x)); - sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col(left_x)); - sum = sum + 0.375f * src(src_y , b.idx_col(left_x)); - sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col(left_x)); - sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col(left_x)); - - smem[threadIdx.x] = sum; - } - - if (threadIdx.x > 253) - { - const int right_x = x + 2; - - work_t sum; - - sum = 0.0625f * src(b.idx_row_low (src_y - 2), b.idx_col_high(right_x)); - sum = sum + 0.25f * src(b.idx_row_low (src_y - 1), b.idx_col_high(right_x)); - sum = sum + 0.375f * src(src_y , b.idx_col_high(right_x)); - sum = sum + 0.25f * src(b.idx_row_high(src_y + 1), b.idx_col_high(right_x)); - sum = sum + 0.0625f * src(b.idx_row_high(src_y + 2), b.idx_col_high(right_x)); - - smem[4 + threadIdx.x] = sum; - } - } - - __syncthreads(); - - if (threadIdx.x < 128) - { - const int tid2 = threadIdx.x * 2; - - work_t sum; - - sum = 0.0625f * smem[2 + tid2 - 2]; - sum = sum + 0.25f * smem[2 + tid2 - 1]; - sum = sum + 0.375f * smem[2 + tid2 ]; - sum = sum + 0.25f * smem[2 + tid2 + 1]; - sum = sum + 0.0625f * smem[2 + tid2 + 2]; - - const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2; - - if (dst_x < dst_cols) - dst.ptr(y)[dst_x] = saturate_cast(sum); - } - } - - template class B> void pyrDown_caller(PtrStepSz src, PtrStepSz dst, cudaStream_t stream) - { - const dim3 block(256); - const dim3 grid(divUp(src.cols, block.x), dst.rows); - - B b(src.rows, src.cols); - - pyrDown<<>>(src, dst, b, dst.cols); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - pyrDown_caller(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(dst), stream); - } - - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrDown_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - } // namespace imgproc -}}} // namespace cv { namespace gpu { namespace cudev - - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/pyr_up.cu b/modules/gpu/src/cuda/pyr_up.cu deleted file mode 100644 index 36a72274cf..0000000000 --- a/modules/gpu/src/cuda/pyr_up.cu +++ /dev/null @@ -1,196 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - template __global__ void pyrUp(const PtrStepSz src, PtrStepSz dst) - { - typedef typename TypeVec::cn>::vec_type sum_t; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - __shared__ sum_t s_srcPatch[10][10]; - __shared__ sum_t s_dstPatch[20][16]; - - if (threadIdx.x < 10 && threadIdx.y < 10) - { - int srcx = static_cast((blockIdx.x * blockDim.x) / 2 + threadIdx.x) - 1; - int srcy = static_cast((blockIdx.y * blockDim.y) / 2 + threadIdx.y) - 1; - - srcx = ::abs(srcx); - srcx = ::min(src.cols - 1, srcx); - - srcy = ::abs(srcy); - srcy = ::min(src.rows - 1, srcy); - - s_srcPatch[threadIdx.y][threadIdx.x] = saturate_cast(src(srcy, srcx)); - } - - __syncthreads(); - - sum_t sum = VecTraits::all(0); - - const int evenFlag = static_cast((threadIdx.x & 1) == 0); - const int oddFlag = static_cast((threadIdx.x & 1) != 0); - const bool eveny = ((threadIdx.y & 1) == 0); - const int tidx = threadIdx.x; - - if (eveny) - { - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx - 1) >> 1)]; - sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (threadIdx.y >> 1)][1 + ((tidx + 2) >> 1)]; - } - - s_dstPatch[2 + threadIdx.y][threadIdx.x] = sum; - - if (threadIdx.y < 2) - { - sum = VecTraits::all(0); - - if (eveny) - { - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; - sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; - } - - s_dstPatch[threadIdx.y][threadIdx.x] = sum; - } - - if (threadIdx.y > 13) - { - sum = VecTraits::all(0); - - if (eveny) - { - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; - sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; - } - - s_dstPatch[4 + threadIdx.y][threadIdx.x] = sum; - } - - __syncthreads(); - - sum = VecTraits::all(0); - - const int tidy = threadIdx.y; - - sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][threadIdx.x]; - sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][threadIdx.x]; - sum = sum + 0.375f * s_dstPatch[2 + tidy ][threadIdx.x]; - sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][threadIdx.x]; - sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][threadIdx.x]; - - if (x < dst.cols && y < dst.rows) - dst(y, x) = saturate_cast(4.0f * sum); - } - - template void pyrUp_caller(PtrStepSz src, PtrStepSz dst, cudaStream_t stream) - { - const dim3 block(16, 16); - const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); - - pyrUp<<>>(src, dst); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) - { - pyrUp_caller(static_cast< PtrStepSz >(src), static_cast< PtrStepSz >(dst), stream); - } - - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - //template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - template void pyrUp_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - } // namespace imgproc -}}} // namespace cv { namespace gpu { namespace cudev - -#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/remap.cu b/modules/gpu/src/cuda/remap.cu deleted file mode 100644 index dd2c669159..0000000000 --- a/modules/gpu/src/cuda/remap.cu +++ /dev/null @@ -1,274 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/border_interpolate.hpp" -#include "opencv2/core/cuda/vec_traits.hpp" -#include "opencv2/core/cuda/vec_math.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/filters.hpp" - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - template __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, PtrStepSz dst) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < dst.cols && y < dst.rows) - { - const float xcoo = mapx.ptr(y)[x]; - const float ycoo = mapy.ptr(y)[x]; - - dst.ptr(y)[x] = saturate_cast(src(ycoo, xcoo)); - } - } - - template