From: Anatoly Baksheev Date: Fri, 17 Dec 2010 15:41:26 +0000 (+0000) Subject: temporary added NPP_staging, functionality from the library will be moved to NPP... X-Git-Tag: accepted/2.0/20130307.220821~3796 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9dd4a22a5e3ad48a865562b9142dade6e4260d34;p=profile%2Fivi%2Fopencv.git temporary added NPP_staging, functionality from the library will be moved to NPP with next release. --- diff --git a/3rdparty/NPP_staging/NPP_staging_static_Windows_32_v1.lib b/3rdparty/NPP_staging/NPP_staging_static_Windows_32_v1.lib new file mode 100644 index 0000000..39d29b8 Binary files /dev/null and b/3rdparty/NPP_staging/NPP_staging_static_Windows_32_v1.lib differ diff --git a/3rdparty/NPP_staging/NPP_staging_static_Windows_64_v1.lib b/3rdparty/NPP_staging/NPP_staging_static_Windows_64_v1.lib new file mode 100644 index 0000000..f8372bf Binary files /dev/null and b/3rdparty/NPP_staging/NPP_staging_static_Windows_64_v1.lib differ diff --git a/3rdparty/NPP_staging/libNPP_staging_static_Darwin_64_v1.a b/3rdparty/NPP_staging/libNPP_staging_static_Darwin_64_v1.a new file mode 100644 index 0000000..ab0150e Binary files /dev/null and b/3rdparty/NPP_staging/libNPP_staging_static_Darwin_64_v1.a differ diff --git a/3rdparty/NPP_staging/libNPP_staging_static_Linux_32_v1.a b/3rdparty/NPP_staging/libNPP_staging_static_Linux_32_v1.a new file mode 100644 index 0000000..6c16959 Binary files /dev/null and b/3rdparty/NPP_staging/libNPP_staging_static_Linux_32_v1.a differ diff --git a/3rdparty/NPP_staging/libNPP_staging_static_Linux_64_v1.a b/3rdparty/NPP_staging/libNPP_staging_static_Linux_64_v1.a new file mode 100644 index 0000000..56c7bd5 Binary files /dev/null and b/3rdparty/NPP_staging/libNPP_staging_static_Linux_64_v1.a differ diff --git a/3rdparty/NPP_staging/npp_staging.h b/3rdparty/NPP_staging/npp_staging.h new file mode 100644 index 0000000..c54af5c --- /dev/null +++ b/3rdparty/NPP_staging/npp_staging.h @@ -0,0 +1,760 @@ +/* +* Copyright 1993-2010 NVIDIA Corporation. All rights reserved. +* +* NOTICE TO USER: +* +* This source code is subject to NVIDIA ownership rights under U.S. and +* international Copyright laws. +* +* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +* OR PERFORMANCE OF THIS SOURCE CODE. +* +* U.S. Government End Users. This source code is a "commercial item" as +* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +* "commercial computer software" and "commercial computer software +* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +* and is provided to the U.S. Government only as a commercial end item. +* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +* source code with only those rights set forth herein. +*/ +#ifndef _npp_staging_h_ +#define _npp_staging_h_ + + +/** +* \file npp_staging.h +* NPP Staging Library (will become part of NPP next release) +*/ + + +#ifdef __cplusplus + + +/** \defgroup ctassert Compile-time assert functionality +* @{ +*/ + + + /** + * Compile-time assert namespace + */ + namespace NppStCTprep + { + template + struct CT_ASSERT_FAILURE; + + template <> + struct CT_ASSERT_FAILURE {}; + + template + struct assertTest{}; + } + + + #define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro + #define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro + + + /** + * Performs compile-time assertion of a condition on the file scope + */ + #define NPPST_CT_ASSERT(X) \ + typedef NppStCTprep::assertTest)> \ + NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__) + + +/*@}*/ + + +#endif + + +/** \defgroup typedefs NPP Integral and compound types of guaranteed size + * @{ + */ + + +typedef bool NppStBool; ///< Bool of size less than integer +typedef long long NppSt64s; ///< 64-bit signed integer +typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer +typedef int NppSt32s; ///< 32-bit signed integer +typedef unsigned int NppSt32u; ///< 32-bit unsigned integer +typedef short NppSt16s; ///< 16-bit signed short +typedef unsigned short NppSt16u; ///< 16-bit unsigned short +typedef char NppSt8s; ///< 8-bit signed char +typedef unsigned char NppSt8u; ///< 8-bit unsigned char +typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float +typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float + + +/** + * 2D Rectangle, 8-bit unsigned fields + * This struct contains position and size information of a rectangle in two space + */ +struct NppStRect8u +{ + NppSt8u x; ///< x-coordinate of upper left corner + NppSt8u y; ///< y-coordinate of upper left corner + NppSt8u width; ///< Rectangle width + NppSt8u height; ///< Rectangle height +#ifdef __cplusplus + NppStRect8u() : x(0), y(0), width(0), height(0) {}; + NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {} +#endif +}; + + +/** + * 2D Rectangle, 32-bit signed fields + * This struct contains position and size information of a rectangle in two space + */ +struct NppStRect32s +{ + NppSt32s x; ///< x-coordinate of upper left corner + NppSt32s y; ///< y-coordinate of upper left corner + NppSt32s width; ///< Rectangle width + NppSt32s height; ///< Rectangle height +#ifdef __cplusplus + NppStRect32s() : x(0), y(0), width(0), height(0) {}; + NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {} +#endif +}; + + +/** + * 2D Rectangle, 32-bit unsigned fields + * This struct contains position and size information of a rectangle in two space + */ +struct NppStRect32u +{ + NppSt32u x; ///< x-coordinate of upper left corner + NppSt32u y; ///< y-coordinate of upper left corner + NppSt32u width; ///< Rectangle width + NppSt32u height; ///< Rectangle height +#ifdef __cplusplus + NppStRect32u() : x(0), y(0), width(0), height(0) {}; + NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {} +#endif +}; + + +/** + * 2D Size, 32-bit signed fields + * This struct typically represents the size of a a rectangular region in two space + */ +struct NppStSize32s +{ + NppSt32s width; ///< Rectangle width + NppSt32s height; ///< Rectangle height +#ifdef __cplusplus + NppStSize32s() : width(0), height(0) {}; + NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {} +#endif +}; + + +/** + * 2D Size, 32-bit unsigned fields + * This struct typically represents the size of a a rectangular region in two space + */ +struct NppStSize32u +{ + NppSt32u width; ///< Rectangle width + NppSt32u height; ///< Rectangle height +#ifdef __cplusplus + NppStSize32u() : width(0), height(0) {}; + NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {} +#endif +}; + + +/** + * Error Status Codes + * + * Almost all NPP function return error-status information using + * these return codes. + * Negative return codes indicate errors, positive return codes indicate + * warnings, a return code of 0 indicates success. + */ +enum NppStStatus +{ + //already present in NPP + /* NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR) + NPP_ERROR = -1, ///< Unknown error + NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error + NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error + NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned + NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error + NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error + NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error*/ + + //to be added + NPP_INVALID_ROI, ///< Invalid region of interest argument + NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width) + NPP_INVALID_SCALE, ///< Invalid scale parameter passed + NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer + NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned) + NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error +}; + + +/*@}*/ + + +#ifdef __cplusplus + + +/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check +* @{ +*/ + NPPST_CT_ASSERT(sizeof(NppStBool) <= 4); + NPPST_CT_ASSERT(sizeof(NppSt64s) == 8); + NPPST_CT_ASSERT(sizeof(NppSt64u) == 8); + NPPST_CT_ASSERT(sizeof(NppSt32s) == 4); + NPPST_CT_ASSERT(sizeof(NppSt32u) == 4); + NPPST_CT_ASSERT(sizeof(NppSt16s) == 2); + NPPST_CT_ASSERT(sizeof(NppSt16u) == 2); + NPPST_CT_ASSERT(sizeof(NppSt8s) == 1); + NPPST_CT_ASSERT(sizeof(NppSt8u) == 1); + NPPST_CT_ASSERT(sizeof(NppSt32f) == 4); + NPPST_CT_ASSERT(sizeof(NppSt64f) == 8); + NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u)); + NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s)); + NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u)); + NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u)); +/*@}*/ + + +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup core_npp NPP Core + * Basic functions for CUDA streams management. + * WARNING: These functions couldn't be exported from NPP_staging library, so they can't be used + * @{ + */ + + +/** + * Gets an active CUDA stream used by NPP (Not an API yet!) + * \return Current CUDA stream + */ +cudaStream_t nppStGetActiveCUDAstream(); + + +/** + * Sets an active CUDA stream used by NPP (Not an API yet!) + * \param cudaStream [IN] cudaStream CUDA stream to become current + * \return CUDA stream used before + */ +cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream); + + +/*@}*/ + + +/** \defgroup nppi NPP Image Processing +* @{ +*/ + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. + * + * \param d_src [IN] Source image pointer (CUDA device memory) + * \param srcStep [IN] Source image line step + * \param d_dst [OUT] Destination image pointer (CUDA device memory) + * \param dstStep [IN] Destination image line step + * \param srcRoi [IN] Region of interest in the source image + * \param scale [IN] Downsampling scale factor (positive integer) + * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false) + * + * \return NPP status code + */ +NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep, + NppSt32u *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. + * \see nppiStDownsampleNearest_32u_C1R + */ +NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep, + NppSt32s *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. + * \see nppiStDownsampleNearest_32u_C1R + */ +NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep, + NppSt32f *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** +* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. +* \see nppiStDownsampleNearest_32u_C1R +*/ +NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep, + NppSt64u *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. + * \see nppiStDownsampleNearest_32u_C1R + */ +NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep, + NppSt64s *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. + * \see nppiStDownsampleNearest_32u_C1R + */ +NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep, + NppSt64f *d_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale, + NppStBool readThruTexture); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation. + * + * \param h_src [IN] Source image pointer (Host or pinned memory) + * \param srcStep [IN] Source image line step + * \param h_dst [OUT] Destination image pointer (Host or pinned memory) + * \param dstStep [IN] Destination image line step + * \param srcRoi [IN] Region of interest in the source image + * \param scale [IN] Downsampling scale factor (positive integer) + * + * \return NPP status code + */ +NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep, + NppSt32u *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation. + * \see nppiStDownsampleNearest_32u_C1R_host + */ +NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep, + NppSt32s *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation. + * \see nppiStDownsampleNearest_32u_C1R_host + */ +NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep, + NppSt32f *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation. + * \see nppiStDownsampleNearest_32u_C1R_host + */ +NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep, + NppSt64u *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation. + * \see nppiStDownsampleNearest_32u_C1R_host + */ +NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep, + NppSt64s *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation. + * \see nppiStDownsampleNearest_32u_C1R_host + */ +NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep, + NppSt64f *h_dst, NppSt32u dstStep, + NppStSize32u srcRoi, NppSt32u scale); + + +/** + * Computes standard deviation for each rectangular region of the input image using integral images. + * + * \param d_sum [IN] Integral image pointer (CUDA device memory) + * \param sumStep [IN] Integral image line step + * \param d_sqsum [IN] Squared integral image pointer (CUDA device memory) + * \param sqsumStep [IN] Squared integral image line step + * \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image + * \param normStep [IN] Stddev image line step + * \param roi [IN] Region of interest in the source image + * \param rect [IN] Rectangular region to calculate stddev over + * \param scaleArea [IN] Multiplication factor to account decimated scale + * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false) + * + * \return NPP status code + */ +NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep, + NppSt64u *d_sqsum, NppSt32u sqsumStep, + NppSt32f *d_norm, NppSt32u normStep, + NppStSize32u roi, NppStRect32u rect, + NppSt32f scaleArea, NppStBool readThruTexture); + + +/** + * Computes standard deviation for each rectangular region of the input image using integral images. Host implementation + * + * \param h_sum [IN] Integral image pointer (Host or pinned memory) + * \param sumStep [IN] Integral image line step + * \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory) + * \param sqsumStep [IN] Squared integral image line step + * \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image + * \param normStep [IN] Stddev image line step + * \param roi [IN] Region of interest in the source image + * \param rect [IN] Rectangular region to calculate stddev over + * \param scaleArea [IN] Multiplication factor to account decimated scale + * + * \return NPP status code + */ +NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep, + NppSt64u *h_sqsum, NppSt32u sqsumStep, + NppSt32f *h_norm, NppSt32u normStep, + NppStSize32u roi, NppStRect32u rect, + NppSt32f scaleArea); + + +/** + * Transposes an image. 32-bit unsigned pixels, single channel + * + * \param d_src [IN] Source image pointer (CUDA device memory) + * \param srcStride [IN] Source image line step + * \param d_dst [OUT] Destination image pointer (CUDA device memory) + * \param dstStride [IN] Destination image line step + * \param srcRoi [IN] Region of interest of the source image + * + * \return NPP status code + */ +NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride, + NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 32-bit signed pixels, single channel + * \see nppiStTranspose_32u_C1R + */ +NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride, + NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 32-bit float pixels, single channel + * \see nppiStTranspose_32u_C1R + */ +NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride, + NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit unsigned pixels, single channel + * \see nppiStTranspose_32u_C1R + */ +NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride, + NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit signed pixels, single channel + * \see nppiStTranspose_32u_C1R + */ +NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride, + NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit float pixels, single channel + * \see nppiStTranspose_32u_C1R + */ +NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride, + NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 32-bit unsigned pixels, single channel. Host implementation + * + * \param h_src [IN] Source image pointer (Host or pinned memory) + * \param srcStride [IN] Source image line step + * \param h_dst [OUT] Destination image pointer (Host or pinned memory) + * \param dstStride [IN] Destination image line step + * \param srcRoi [IN] Region of interest of the source image + * + * \return NPP status code + */ +NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride, + NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 32-bit signed pixels, single channel. Host implementation + * \see nppiStTranspose_32u_C1R_host + */ +NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride, + NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 32-bit float pixels, single channel. Host implementation + * \see nppiStTranspose_32u_C1R_host + */ +NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride, + NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation + * \see nppiStTranspose_32u_C1R_host + */ +NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride, + NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit signed pixels, single channel. Host implementation + * \see nppiStTranspose_32u_C1R_host + */ +NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride, + NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Transposes an image. 64-bit float pixels, single channel. Host implementation + * \see nppiStTranspose_32u_C1R_host + */ +NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride, + NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi); + + +/** + * Calculates the size of the temporary buffer for integral image creation + * + * \param roiSize [IN] Size of the input image + * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes) + * + * \return NPP status code + */ +NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize); + + +/** + * Creates an integral image representation for the input image + * + * \param d_src [IN] Source image pointer (CUDA device memory) + * \param srcStep [IN] Source image line step + * \param d_dst [OUT] Destination integral image pointer (CUDA device memory) + * \param dstStep [IN] Destination image line step + * \param roiSize [IN] Region of interest of the source image + * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory) + * \param bufSize [IN] Size of the pBuffer in bytes + * + * \return NPP status code + */ +NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep, + NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize, + NppSt8u *pBuffer, NppSt32u bufSize); + + +/** + * Creates an integral image representation for the input image. Host implementation + * + * \param h_src [IN] Source image pointer (Host or pinned memory) + * \param srcStep [IN] Source image line step + * \param h_dst [OUT] Destination integral image pointer (Host or pinned memory) + * \param dstStep [IN] Destination image line step + * \param roiSize [IN] Region of interest of the source image + * + * \return NPP status code + */ +NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep, + NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize); + + +/** + * Calculates the size of the temporary buffer for squared integral image creation + * + * \param roiSize [IN] Size of the input image + * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes) + * + * \return NPP status code + */ +NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize); + + +/** + * Creates a squared integral image representation for the input image + * + * \param d_src [IN] Source image pointer (CUDA device memory) + * \param srcStep [IN] Source image line step + * \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory) + * \param dstStep [IN] Destination image line step + * \param roiSize [IN] Region of interest of the source image + * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory) + * \param bufSize [IN] Size of the pBuffer in bytes + * + * \return NPP status code + */ +NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep, + NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize, + NppSt8u *pBuffer, NppSt32u bufSize); + + +/** + * Creates a squared integral image representation for the input image. Host implementation + * + * \param h_src [IN] Source image pointer (Host or pinned memory) + * \param srcStep [IN] Source image line step + * \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory) + * \param dstStep [IN] Destination image line step + * \param roiSize [IN] Region of interest of the source image + * + * \return NPP status code + */ +NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep, + NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize); + + +/*@}*/ + + +/** \defgroup npps NPP Signal Processing +* @{ +*/ + + +/** + * Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values + * + * \param srcLen [IN] Length of the input vector in elements + * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes) + * + * \return NPP status code + */ +NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize); + + +/** + * Calculates the size of the temporary buffer for vector compaction. 32-bit signed values + * \see nppsStCompactGetSize_32u + */ +NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize); + + +/** + * Calculates the size of the temporary buffer for vector compaction. 32-bit float values + * \see nppsStCompactGetSize_32u + */ +NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit unsigned values + * + * \param d_src [IN] Source vector pointer (CUDA device memory) + * \param srcLen [IN] Source vector length + * \param d_dst [OUT] Destination vector pointer (CUDA device memory) + * \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL) + * \param elemRemove [IN] The value to be removed + * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory) + * \param bufSize [IN] Size of the pBuffer in bytes + * + * \return NPP status code + */ +NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen, + NppSt32u *d_dst, NppSt32u *p_dstLen, + NppSt32u elemRemove, + NppSt8u *pBuffer, NppSt32u bufSize); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit signed values + * \see nppsStCompact_32u + */ +NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen, + NppSt32s *d_dst, NppSt32u *p_dstLen, + NppSt32s elemRemove, + NppSt8u *pBuffer, NppSt32u bufSize); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit float values + * \see nppsStCompact_32u + */ +NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen, + NppSt32f *d_dst, NppSt32u *p_dstLen, + NppSt32f elemRemove, + NppSt8u *pBuffer, NppSt32u bufSize); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation + * + * \param h_src [IN] Source vector pointer (CUDA device memory) + * \param srcLen [IN] Source vector length + * \param h_dst [OUT] Destination vector pointer (CUDA device memory) + * \param dstLen [OUT] Pointer to the destination vector length (can be NULL) + * \param elemRemove [IN] The value to be removed + * + * \return NPP status code + */ +NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen, + NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation + * \see nppsStCompact_32u_host + */ +NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen, + NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove); + + +/** + * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation + * \see nppsStCompact_32u_host + */ +NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen, + NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove); + + +/*@}*/ + + +#ifdef __cplusplus +} +#endif + + +#endif // _npp_staging_h_ diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 77228e5..3c10b0e 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -1,15 +1,12 @@ - set(name "gpu") -#"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed -set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann") - -set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu) - set(the_target "opencv_${name}") - project(${the_target}) + +set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed +set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu) + add_definitions(-DCVAPI_EXPORTS) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" @@ -43,7 +40,7 @@ if (HAVE_CUDA) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${_path_to_findnpp}) find_package(NPP 3.2.16 REQUIRED) message(STATUS "NPP detected: " ${NPP_VERSION}) - + include_directories(${CUDA_INCLUDE_DIRS} ${CUDA_NPP_INCLUDES}) if (UNIX OR APPLE) @@ -79,6 +76,11 @@ endif() add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${cuda_objs}) +IF (HAVE_CUDA) + include(FindNPP_staging.cmake) + include_directories(${NPPST_INC}) + target_link_libraries(${the_target} ${NPPST_LIB}) +endif() if(PCHSupport_FOUND) set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp) @@ -108,7 +110,7 @@ set_target_properties(${the_target} PROPERTIES ) # Add the required libraries for linking: -target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS}) +target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} ) if (HAVE_CUDA) target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES}) diff --git a/modules/gpu/FindNPP_staging.cmake b/modules/gpu/FindNPP_staging.cmake new file mode 100644 index 0000000..e478695 --- /dev/null +++ b/modules/gpu/FindNPP_staging.cmake @@ -0,0 +1,24 @@ +if(CMAKE_SIZEOF_VOID_P EQUAL 4) + set(BIT_SUFF 32) +else() + set(BIT_SUFF 64) +endif() + +if (APPLE) + set(PLATFORM_SUFF Darwin) +elseif (UNIX) + set(PLATFORM_SUFF Linux) +else() + set(PLATFORM_SUFF Windows) +endif() + +set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1) + +find_library(NPPST_LIB + NAMES "${LIB_FILE}" "lib${LIB_FILE}" + PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging" + DOC "NPP staging library" + ) + +SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging") + \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index b30ca48..a8329da 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1305,8 +1305,67 @@ namespace cv explicit BruteForceMatcher_GPU() : BruteForceMatcher_GPU_base(L2Dist) {} explicit BruteForceMatcher_GPU(L2 /*d*/) : BruteForceMatcher_GPU_base(L2Dist) {} }; - } + ////////////////////////////////// CascadeClassifier ////////////////////////////////////////// + // The cascade classifier class for object detection. + class CV_EXPORTS CascadeClassifier + { + public: + struct CV_EXPORTS DTreeNode + { + int featureIdx; + float threshold; // for ordered features only + int left; + int right; + }; + + struct CV_EXPORTS DTree + { + int nodeCount; + }; + + struct CV_EXPORTS Stage + { + int first; + int ntrees; + float threshold; + }; + + enum { BOOST = 0 }; + enum { DO_CANNY_PRUNING = 1, SCALE_IMAGE = 2,FIND_BIGGEST_OBJECT = 4, DO_ROUGH_SEARCH = 8 }; + + CascadeClassifier(); + CascadeClassifier(const string& filename); + ~CascadeClassifier(); + + bool empty() const; + bool load(const string& filename); + bool read(const FileNode& node); + + void detectMultiScale( const Mat& image, vector& objects, double scaleFactor=1.1, + int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()); + + bool setImage( Ptr&, const Mat& ); + int runAt( Ptr&, Point ); + + bool isStumpBased; + + int stageType; + int featureType; + int ncategories; + Size origWinSize; + + vector stages; + vector classifiers; + vector nodes; + vector leaves; + vector subsets; + + Ptr feval; + Ptr oldCascade; + }; + + } //! Speckle filtering - filters small connected components on diparity image. //! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize. diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp new file mode 100644 index 0000000..e6a4e72 --- /dev/null +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -0,0 +1,110 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + + + + +using namespace cv; +using namespace cv::gpu; +using namespace std; + +#if !defined (HAVE_CUDA) + +cv::gpu::CascadeClassifier::CascadeClassifier() { throw_nogpu(); } +cv::gpu::CascadeClassifier::CascadeClassifier(const string&) { throw_nogpu(); } +cv::gpu::CascadeClassifier::~CascadeClassifier() { throw_nogpu(); } + +bool cv::gpu::CascadeClassifier::empty() const { throw_nogpu(); return true; } +bool cv::gpu::CascadeClassifier::load(const string& filename) { throw_nogpu(); return true; } +bool cv::gpu::CascadeClassifier::read(const FileNode& node) { throw_nogpu(); return true; } + +void cv::gpu::CascadeClassifier::detectMultiScale( const Mat&, vector&, double, int, int, Size, Size) { throw_nogpu(); } + + + + + +#else + + +cv::gpu::CascadeClassifier::CascadeClassifier() +{ + +} + +cv::gpu::CascadeClassifier::CascadeClassifier(const string& filename) +{ + +} + +cv::gpu::CascadeClassifier::~CascadeClassifier() +{ + +} + +bool cv::gpu::CascadeClassifier::empty() const +{ + int *a = (int*)&nppiStTranspose_32u_C1R; + return *a == 0xFFFFF; + return true; +} + +bool cv::gpu::CascadeClassifier::load(const string& filename) +{ + return true; +} + +bool cv::gpu::CascadeClassifier::read(const FileNode& node) +{ + return true; +} + +void cv::gpu::CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, double scaleFactor, + int minNeighbors, int flags, Size minSize, Size maxSize) + +{ + +} + +#endif \ No newline at end of file diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index ab6c42d..faf9a18 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -66,6 +66,7 @@ #include "cuda_runtime_api.h" #include "opencv2/gpu/stream_accessor.hpp" #include "npp.h" + #include "npp_staging.h" #define CUDART_MINIMUM_REQUIRED_VERSION 3020 #define NPP_MINIMUM_REQUIRED_VERSION 3216 @@ -78,6 +79,7 @@ #error "Insufficient NPP version, please update it." #endif + static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); } #else /* defined(HAVE_CUDA) */ diff --git a/tests/gpu/src/stereo_bm.cpp b/tests/gpu/src/stereo_bm.cpp index c01912f..5b6062b 100644 --- a/tests/gpu/src/stereo_bm.cpp +++ b/tests/gpu/src/stereo_bm.cpp @@ -55,12 +55,7 @@ struct CV_GpuStereoBMTest : public CvTest void run_stress() - { - //cv::setBreakOnError(true); - int winsz[] = { 13, 15, 17, 19 }; - int disps[] = { 128, 160, 192, 256}; - - Size res[] = { Size(1027, 768), Size(1280, 1024), Size(1600, 1152), Size(1920, 1080) }; + { RNG rng; for(int i = 0; i < 10; ++i)