+++ /dev/null
-/*\r
-* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
-*\r
-* NOTICE TO USER:\r
-*\r
-* This source code is subject to NVIDIA ownership rights under U.S. and\r
-* international Copyright laws.\r
-*\r
-* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE\r
-* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR\r
-* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH\r
-* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF\r
-* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.\r
-* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,\r
-* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS\r
-* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE\r
-* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE\r
-* OR PERFORMANCE OF THIS SOURCE CODE.\r
-*\r
-* U.S. Government End Users. This source code is a "commercial item" as\r
-* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of\r
-* "commercial computer software" and "commercial computer software\r
-* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)\r
-* and is provided to the U.S. Government only as a commercial end item.\r
-* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through\r
-* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the\r
-* source code with only those rights set forth herein.\r
-*/\r
-#ifndef _npp_staging_h_\r
-#define _npp_staging_h_\r
-\r
-\r
-/**\r
-* \file npp_staging.h\r
-* NPP Staging Library (will become part of NPP next release)\r
-*/\r
-\r
-\r
-#ifdef __cplusplus\r
-\r
-\r
-/** \defgroup ctassert Compile-time assert functionality\r
-* @{\r
-*/\r
-\r
-\r
- /**\r
- * Compile-time assert namespace\r
- */\r
- namespace NppStCTprep\r
- {\r
- template <bool x>\r
- struct CT_ASSERT_FAILURE;\r
-\r
- template <>\r
- struct CT_ASSERT_FAILURE<true> {};\r
-\r
- template <int x>\r
- struct assertTest{};\r
- }\r
-\r
-\r
- #define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro\r
- #define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro\r
-\r
-\r
- /**\r
- * Performs compile-time assertion of a condition on the file scope\r
- */\r
- #define NPPST_CT_ASSERT(X) \\r
- typedef NppStCTprep::assertTest<sizeof(NppStCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \\r
- NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)\r
-\r
-\r
-/*@}*/\r
-\r
-\r
-#endif\r
-\r
-\r
-/** \defgroup typedefs NPP Integral and compound types of guaranteed size\r
- * @{\r
- */\r
-\r
-\r
-typedef bool NppStBool; ///< Bool of size less than integer\r
-typedef long long NppSt64s; ///< 64-bit signed integer\r
-typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer\r
-typedef int NppSt32s; ///< 32-bit signed integer\r
-typedef unsigned int NppSt32u; ///< 32-bit unsigned integer\r
-typedef short NppSt16s; ///< 16-bit signed short\r
-typedef unsigned short NppSt16u; ///< 16-bit unsigned short\r
-typedef char NppSt8s; ///< 8-bit signed char\r
-typedef unsigned char NppSt8u; ///< 8-bit unsigned char\r
-typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float\r
-typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float\r
-\r
-\r
-/**\r
- * 2D Rectangle, 8-bit unsigned fields\r
- * This struct contains position and size information of a rectangle in two space\r
- */\r
-struct NppStRect8u\r
-{\r
- NppSt8u x; ///< x-coordinate of upper left corner\r
- NppSt8u y; ///< y-coordinate of upper left corner\r
- NppSt8u width; ///< Rectangle width\r
- NppSt8u height; ///< Rectangle height\r
-#ifdef __cplusplus\r
- NppStRect8u() : x(0), y(0), width(0), height(0) {};\r
- NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {}\r
-#endif\r
-};\r
-\r
-\r
-/**\r
- * 2D Rectangle, 32-bit signed fields\r
- * This struct contains position and size information of a rectangle in two space\r
- */\r
-struct NppStRect32s\r
-{\r
- NppSt32s x; ///< x-coordinate of upper left corner\r
- NppSt32s y; ///< y-coordinate of upper left corner\r
- NppSt32s width; ///< Rectangle width\r
- NppSt32s height; ///< Rectangle height\r
-#ifdef __cplusplus\r
- NppStRect32s() : x(0), y(0), width(0), height(0) {};\r
- NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {}\r
-#endif\r
-};\r
-\r
-\r
-/**\r
- * 2D Rectangle, 32-bit unsigned fields\r
- * This struct contains position and size information of a rectangle in two space\r
- */\r
-struct NppStRect32u\r
-{\r
- NppSt32u x; ///< x-coordinate of upper left corner\r
- NppSt32u y; ///< y-coordinate of upper left corner\r
- NppSt32u width; ///< Rectangle width\r
- NppSt32u height; ///< Rectangle height\r
-#ifdef __cplusplus\r
- NppStRect32u() : x(0), y(0), width(0), height(0) {};\r
- NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {}\r
-#endif\r
-};\r
-\r
-\r
-/**\r
- * 2D Size, 32-bit signed fields\r
- * This struct typically represents the size of a a rectangular region in two space\r
- */\r
-struct NppStSize32s\r
-{\r
- NppSt32s width; ///< Rectangle width\r
- NppSt32s height; ///< Rectangle height\r
-#ifdef __cplusplus\r
- NppStSize32s() : width(0), height(0) {};\r
- NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {}\r
-#endif\r
-};\r
-\r
-\r
-/**\r
- * 2D Size, 32-bit unsigned fields\r
- * This struct typically represents the size of a a rectangular region in two space\r
- */\r
-struct NppStSize32u\r
-{\r
- NppSt32u width; ///< Rectangle width\r
- NppSt32u height; ///< Rectangle height\r
-#ifdef __cplusplus\r
- NppStSize32u() : width(0), height(0) {};\r
- NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {}\r
-#endif\r
-};\r
-\r
-\r
-/**\r
- * Error Status Codes\r
- *\r
- * Almost all NPP function return error-status information using\r
- * these return codes.\r
- * Negative return codes indicate errors, positive return codes indicate\r
- * warnings, a return code of 0 indicates success.\r
- */\r
-enum NppStStatus\r
-{\r
- //already present in NPP\r
- __NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)\r
- __NPP_ERROR = -1, ///< Unknown error\r
- __NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error\r
- __NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error\r
- __NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned\r
- __NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error\r
- __NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error\r
- __NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error\r
-\r
- //to be added\r
- NPP_INVALID_ROI, ///< Invalid region of interest argument\r
- NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)\r
- NPP_INVALID_SCALE, ///< Invalid scale parameter passed\r
- NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer\r
- NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)\r
- NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error\r
-};\r
-\r
-\r
-/*@}*/\r
-\r
-\r
-#ifdef __cplusplus\r
-\r
-\r
-/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check\r
-* @{\r
-*/\r
- NPPST_CT_ASSERT(sizeof(NppStBool) <= 4);\r
- NPPST_CT_ASSERT(sizeof(NppSt64s) == 8);\r
- NPPST_CT_ASSERT(sizeof(NppSt64u) == 8);\r
- NPPST_CT_ASSERT(sizeof(NppSt32s) == 4);\r
- NPPST_CT_ASSERT(sizeof(NppSt32u) == 4);\r
- NPPST_CT_ASSERT(sizeof(NppSt16s) == 2);\r
- NPPST_CT_ASSERT(sizeof(NppSt16u) == 2);\r
- NPPST_CT_ASSERT(sizeof(NppSt8s) == 1);\r
- NPPST_CT_ASSERT(sizeof(NppSt8u) == 1);\r
- NPPST_CT_ASSERT(sizeof(NppSt32f) == 4);\r
- NPPST_CT_ASSERT(sizeof(NppSt64f) == 8);\r
- NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u));\r
- NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s));\r
- NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u));\r
- NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u));\r
-/*@}*/\r
-\r
-\r
-#endif\r
-\r
-\r
-#ifdef __cplusplus\r
-extern "C" {\r
-#endif\r
-\r
-\r
-/** \defgroup core_npp NPP Core\r
- * Basic functions for CUDA streams management.\r
- * WARNING: These functions couldn't be exported into DLL, so they can be used only with static version of NPP_staging\r
- * @{\r
- */\r
-\r
-\r
-/**\r
- * Gets an active CUDA stream used by NPP (Not an API yet!)\r
- * \return Current CUDA stream\r
- */\r
-cudaStream_t nppStGetActiveCUDAstream();\r
-\r
-\r
-/**\r
- * Sets an active CUDA stream used by NPP (Not an API yet!)\r
- * \param cudaStream [IN] cudaStream CUDA stream to become current\r
- * \return CUDA stream used before\r
- */\r
-cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);\r
-\r
-\r
-/*@}*/\r
-\r
-\r
-/** \defgroup nppi NPP Image Processing\r
-* @{\r
-*/\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.\r
- *\r
- * \param d_src [IN] Source image pointer (CUDA device memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param srcRoi [IN] Region of interest in the source image\r
- * \param scale [IN] Downsampling scale factor (positive integer)\r
- * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep,\r
- NppSt32u *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.\r
- * \see nppiStDownsampleNearest_32u_C1R\r
- */\r
-NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep,\r
- NppSt32s *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.\r
- * \see nppiStDownsampleNearest_32u_C1R\r
- */\r
-NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep,\r
- NppSt32f *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
-* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.\r
-* \see nppiStDownsampleNearest_32u_C1R\r
-*/\r
-NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep,\r
- NppSt64u *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.\r
- * \see nppiStDownsampleNearest_32u_C1R\r
- */\r
-NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep,\r
- NppSt64s *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.\r
- * \see nppiStDownsampleNearest_32u_C1R\r
- */\r
-NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep,\r
- NppSt64f *d_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale,\r
- NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.\r
- *\r
- * \param h_src [IN] Source image pointer (Host or pinned memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param srcRoi [IN] Region of interest in the source image\r
- * \param scale [IN] Downsampling scale factor (positive integer)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep,\r
- NppSt32u *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.\r
- * \see nppiStDownsampleNearest_32u_C1R_host\r
- */\r
-NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep,\r
- NppSt32s *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.\r
- * \see nppiStDownsampleNearest_32u_C1R_host\r
- */\r
-NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,\r
- NppSt32f *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.\r
- * \see nppiStDownsampleNearest_32u_C1R_host\r
- */\r
-NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep,\r
- NppSt64u *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.\r
- * \see nppiStDownsampleNearest_32u_C1R_host\r
- */\r
-NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep,\r
- NppSt64s *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.\r
- * \see nppiStDownsampleNearest_32u_C1R_host\r
- */\r
-NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep,\r
- NppSt64f *h_dst, NppSt32u dstStep,\r
- NppStSize32u srcRoi, NppSt32u scale);\r
-\r
-\r
-/**\r
- * Computes standard deviation for each rectangular region of the input image using integral images.\r
- *\r
- * \param d_sum [IN] Integral image pointer (CUDA device memory)\r
- * \param sumStep [IN] Integral image line step\r
- * \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)\r
- * \param sqsumStep [IN] Squared integral image line step\r
- * \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
- * \param normStep [IN] Stddev image line step\r
- * \param roi [IN] Region of interest in the source image\r
- * \param rect [IN] Rectangular region to calculate stddev over\r
- * \param scaleArea [IN] Multiplication factor to account decimated scale\r
- * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep,\r
- NppSt64u *d_sqsum, NppSt32u sqsumStep,\r
- NppSt32f *d_norm, NppSt32u normStep,\r
- NppStSize32u roi, NppStRect32u rect,\r
- NppSt32f scaleArea, NppStBool readThruTexture);\r
-\r
-\r
-/**\r
- * Computes standard deviation for each rectangular region of the input image using integral images. Host implementation\r
- *\r
- * \param h_sum [IN] Integral image pointer (Host or pinned memory)\r
- * \param sumStep [IN] Integral image line step\r
- * \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)\r
- * \param sqsumStep [IN] Squared integral image line step\r
- * \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
- * \param normStep [IN] Stddev image line step\r
- * \param roi [IN] Region of interest in the source image\r
- * \param rect [IN] Rectangular region to calculate stddev over\r
- * \param scaleArea [IN] Multiplication factor to account decimated scale\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep,\r
- NppSt64u *h_sqsum, NppSt32u sqsumStep,\r
- NppSt32f *h_norm, NppSt32u normStep,\r
- NppStSize32u roi, NppStRect32u rect,\r
- NppSt32f scaleArea);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit unsigned pixels, single channel\r
- *\r
- * \param d_src [IN] Source image pointer (CUDA device memory)\r
- * \param srcStride [IN] Source image line step\r
- * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
- * \param dstStride [IN] Destination image line step\r
- * \param srcRoi [IN] Region of interest of the source image\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride,\r
- NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit signed pixels, single channel\r
- * \see nppiStTranspose_32u_C1R\r
- */\r
-NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride,\r
- NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit float pixels, single channel\r
- * \see nppiStTranspose_32u_C1R\r
- */\r
-NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride,\r
- NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit unsigned pixels, single channel\r
- * \see nppiStTranspose_32u_C1R\r
- */\r
-NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride,\r
- NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit signed pixels, single channel\r
- * \see nppiStTranspose_32u_C1R\r
- */\r
-NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride,\r
- NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit float pixels, single channel\r
- * \see nppiStTranspose_32u_C1R\r
- */\r
-NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride,\r
- NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit unsigned pixels, single channel. Host implementation\r
- *\r
- * \param h_src [IN] Source image pointer (Host or pinned memory)\r
- * \param srcStride [IN] Source image line step\r
- * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
- * \param dstStride [IN] Destination image line step\r
- * \param srcRoi [IN] Region of interest of the source image\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride,\r
- NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit signed pixels, single channel. Host implementation\r
- * \see nppiStTranspose_32u_C1R_host\r
- */\r
-NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride,\r
- NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 32-bit float pixels, single channel. Host implementation\r
- * \see nppiStTranspose_32u_C1R_host\r
- */\r
-NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride,\r
- NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation\r
- * \see nppiStTranspose_32u_C1R_host\r
- */\r
-NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride,\r
- NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit signed pixels, single channel. Host implementation\r
- * \see nppiStTranspose_32u_C1R_host\r
- */\r
-NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride,\r
- NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Transposes an image. 64-bit float pixels, single channel. Host implementation\r
- * \see nppiStTranspose_32u_C1R_host\r
- */\r
-NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,\r
- NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for integral image creation\r
- *\r
- * \param roiSize [IN] Size of the input image\r
- * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for integral image creation\r
- * \see nppiStIntegralGetSize_8u32u\r
- */\r
-NppStStatus nppiStIntegralGetSize_32f32f(NppStSize32u roiSize, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Creates an integral image representation for the input image\r
- *\r
- * \param d_src [IN] Source image pointer (CUDA device memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param d_dst [OUT] Destination integral image pointer (CUDA device memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param roiSize [IN] Region of interest of the source image\r
- * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
- * \param bufSize [IN] Size of the pBuffer in bytes\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,\r
- NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Creates an integral image representation for the input image\r
- * \see nppiStIntegral_8u32u_C1R\r
- */\r
-NppStStatus nppiStIntegral_32f32f_C1R(NppSt32f *d_src, NppSt32u srcStep,\r
- NppSt32f *d_dst, NppSt32u dstStep, NppStSize32u roiSize,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Creates an integral image representation for the input image. Host implementation\r
- *\r
- * \param h_src [IN] Source image pointer (Host or pinned memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param roiSize [IN] Region of interest of the source image\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,\r
- NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);\r
-\r
-\r
-/**\r
- * Creates an integral image representation for the input image. Host implementation\r
- * \see nppiStIntegral_8u32u_C1R_host\r
- */\r
-NppStStatus nppiStIntegral_32f32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,\r
- NppSt32f *h_dst, NppSt32u dstStep, NppStSize32u roiSize);\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for squared integral image creation\r
- *\r
- * \param roiSize [IN] Size of the input image\r
- * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Creates a squared integral image representation for the input image\r
- *\r
- * \param d_src [IN] Source image pointer (CUDA device memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param roiSize [IN] Region of interest of the source image\r
- * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
- * \param bufSize [IN] Size of the pBuffer in bytes\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep,\r
- NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Creates a squared integral image representation for the input image. Host implementation\r
- *\r
- * \param h_src [IN] Source image pointer (Host or pinned memory)\r
- * \param srcStep [IN] Source image line step\r
- * \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)\r
- * \param dstStep [IN] Destination image line step\r
- * \param roiSize [IN] Region of interest of the source image\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,\r
- NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);\r
-\r
-\r
-/*@}*/\r
-\r
-\r
-/** \defgroup npps NPP Signal Processing\r
-* @{\r
-*/\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values\r
- *\r
- * \param srcLen [IN] Length of the input vector in elements\r
- * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for vector compaction. 32-bit signed values\r
- * \see nppsStCompactGetSize_32u\r
- */\r
-NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Calculates the size of the temporary buffer for vector compaction. 32-bit float values\r
- * \see nppsStCompactGetSize_32u\r
- */\r
-NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit unsigned values\r
- *\r
- * \param d_src [IN] Source vector pointer (CUDA device memory)\r
- * \param srcLen [IN] Source vector length\r
- * \param d_dst [OUT] Destination vector pointer (CUDA device memory)\r
- * \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)\r
- * \param elemRemove [IN] The value to be removed\r
- * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
- * \param bufSize [IN] Size of the pBuffer in bytes\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen,\r
- NppSt32u *d_dst, NppSt32u *p_dstLen,\r
- NppSt32u elemRemove,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit signed values\r
- * \see nppsStCompact_32u\r
- */\r
-NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen,\r
- NppSt32s *d_dst, NppSt32u *p_dstLen,\r
- NppSt32s elemRemove,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit float values\r
- * \see nppsStCompact_32u\r
- */\r
-NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen,\r
- NppSt32f *d_dst, NppSt32u *p_dstLen,\r
- NppSt32f elemRemove,\r
- NppSt8u *pBuffer, NppSt32u bufSize);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation\r
- *\r
- * \param h_src [IN] Source vector pointer (CUDA device memory)\r
- * \param srcLen [IN] Source vector length\r
- * \param h_dst [OUT] Destination vector pointer (CUDA device memory)\r
- * \param dstLen [OUT] Pointer to the destination vector length (can be NULL)\r
- * \param elemRemove [IN] The value to be removed\r
- *\r
- * \return NPP status code\r
- */\r
-NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen,\r
- NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation\r
- * \see nppsStCompact_32u_host\r
- */\r
-NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen,\r
- NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove);\r
-\r
-\r
-/**\r
- * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation\r
- * \see nppsStCompact_32u_host\r
- */\r
-NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen,\r
- NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove);\r
-\r
-\r
-/*@}*/\r
-\r
-\r
-#ifdef __cplusplus\r
-}\r
-#endif\r
-\r
-\r
-#endif // _npp_staging_h_\r
source_group("Device" FILES ${lib_device_hdrs})
if (HAVE_CUDA AND MSVC)
- file(GLOB ncv_srcs "src/nvidia/*.cpp")
- file(GLOB ncv_hdrs "src/nvidia/*.h*")
- file(GLOB ncv_cuda "src/nvidia/*.cu")
- source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda})
+ file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
+ file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
+ file(GLOB_RECURSE ncv_hdr1 "src/nvidia/*.hpp")
+ file(GLOB_RECURSE ncv_hdr2 "src/nvidia/*.h")
+ source_group("Src\\NVidia" FILES ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda})
+ include_directories("src/nvidia/core" "src/nvidia/NPP_staging")
endif()
if (HAVE_CUDA)
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc-" "/EHs" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
- endif()
-
-
- include(FindNPP_staging.cmake)
- include_directories(${NPPST_INC})
+ endif()
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
#CUDA_BUILD_CLEAN_TARGET()
endif()
-add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
+add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdr1} ${ncv_hdr2} ${ncv_cuda} ${cuda_objs})
if(PCHSupport_FOUND)
set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} )
if (HAVE_CUDA)
- target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
- target_link_libraries(${the_target} ${NPPST_LIB})
+ target_link_libraries(${the_target} ${CUDA_LIBRARIES} ${CUDA_NPP_LIBRARIES})
CUDA_ADD_CUFFT_TO_TARGET(${the_target})
endif()
+++ /dev/null
-if(CMAKE_SIZEOF_VOID_P EQUAL 4) \r
- set(BIT_SUFF 32)\r
-else()\r
- set(BIT_SUFF 64)\r
-endif()\r
-\r
-if (APPLE)\r
- set(PLATFORM_SUFF Darwin)\r
-elseif (UNIX)\r
- set(PLATFORM_SUFF Linux)\r
-else()\r
- set(PLATFORM_SUFF Windows)\r
-endif()\r
-\r
-set(LIB_FILE NPP_staging_static_${PLATFORM_SUFF}_${BIT_SUFF}_v1)\r
-\r
-find_library(NPPST_LIB \r
- NAMES "${LIB_FILE}" "lib${LIB_FILE}" \r
- PATHS "${CMAKE_SOURCE_DIR}/3rdparty/NPP_staging" \r
- DOC "NPP staging library"\r
- ) \r
-\r
-SET(NPPST_INC "${CMAKE_SOURCE_DIR}//3rdparty/NPP_staging")\r
-
\ No newline at end of file
sz.width = src.cols;\r
sz.height = src.rows;\r
\r
- nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) );\r
+ nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz) ); \r
}\r
else if (src.elemSize() == 4)\r
{\r
- NppStSize32u sz;\r
+ NcvSize32u sz;\r
sz.width = src.cols;\r
sz.height = src.rows;\r
\r
- nppSafeCall( nppiStTranspose_32u_C1R(const_cast<NppSt32u*>(src.ptr<NppSt32u>()), src.step, \r
- dst.ptr<NppSt32u>(), dst.step, sz) );\r
+ nppSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), src.step, \r
+ dst.ptr<Ncv32u>(), dst.step, sz) );\r
}\r
else // if (src.elemSize() == 8)\r
{\r
- NppStSize32u sz;\r
+ NcvSize32u sz;\r
sz.width = src.cols;\r
sz.height = src.rows;\r
\r
- nppSafeCall( nppiStTranspose_64u_C1R(const_cast<NppSt64u*>(src.ptr<NppSt64u>()), src.step, \r
- dst.ptr<NppSt64u>(), dst.step, sz) );\r
+ nppSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), src.step, \r
+ dst.ptr<Ncv64u>(), dst.step, sz) ); \r
}\r
\r
cudaSafeCall( cudaThreadSynchronize() );\r
minNeighbors,\r
scaleStep, 1,\r
flags,\r
- *gpuAllocator, *cpuAllocator, devProp.major, devProp.minor, 0);\r
+ *gpuAllocator, *cpuAllocator, devProp, 0);\r
ncvAssertReturnNcvStat(ncvStat);\r
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
\r
ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);\r
\r
// Load the classifier from file (assuming its size is about 1 mb) using a simple allocator\r
- gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice); \r
- cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned);\r
+ gpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment); \r
+ cpuCascadeAllocator = new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);\r
\r
ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);\r
ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);\r
roi.height = d_src.height();\r
Ncv32u numDetections;\r
ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,\r
- *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp.major, devProp.minor, 0);\r
+ *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);\r
\r
ncvAssertReturnNcvStat(ncvStat);\r
ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
\r
sum.create(src.rows + 1, src.cols + 1, CV_32S);\r
\r
- NppStSize32u roiSize;\r
+ NcvSize32u roiSize;\r
roiSize.width = src.cols;\r
roiSize.height = src.rows;\r
\r
- NppSt32u bufSize;\r
- nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );\r
+ cudaDeviceProp prop;\r
+ cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
+\r
+ Ncv32u bufSize;\r
+ nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);\r
\r
- nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step, \r
- sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );\r
+ nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), src.step, \r
+ sum.ptr<Ncv32u>(), sum.step, roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );\r
\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
{\r
CV_Assert(src.type() == CV_8U);\r
\r
- NppStSize32u roiSize;\r
+ NcvSize32u roiSize;\r
roiSize.width = src.cols;\r
roiSize.height = src.rows;\r
\r
- NppSt32u bufSize;\r
- nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize));\r
+ cudaDeviceProp prop;\r
+ cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) );\r
+\r
+ Ncv32u bufSize;\r
+ nppSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); \r
GpuMat buf(1, bufSize, CV_8U);\r
\r
sqsum.create(src.rows + 1, src.cols + 1, CV_64F);\r
- nppSafeCall(nppiStSqrIntegral_8u64u_C1R(\r
- const_cast<NppSt8u*>(src.ptr<NppSt8u>(0)), src.step, \r
- sqsum.ptr<NppSt64u>(0), sqsum.step, roiSize, \r
- buf.ptr<NppSt8u>(0), bufSize));\r
+ nppSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), src.step, \r
+ sqsum.ptr<Ncv64u>(0), sqsum.step, roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));\r
\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
-// \r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#include <cstdio>\r
-#include <cuda_runtime.h>\r
-\r
-#define CV_NO_BACKWARD_COMPATIBILITY\r
-\r
-#include "opencv2/opencv.hpp"\r
-\r
-#include "NCVHaarObjectDetection.hpp"\r
-\r
-using namespace cv;\r
-using namespace std;\r
-\r
-const Size preferredVideoFrameSize(640, 480);\r
-\r
-string preferredClassifier = "haarcascade_frontalface_alt.xml";\r
-string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";\r
-\r
-\r
-void printSyntax(void)\r
-{\r
- printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");\r
-}\r
-\r
-\r
-void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)\r
-{ \r
- int fontFace = CV_FONT_HERSHEY_PLAIN;\r
- double fontScale = 1; \r
- \r
- int baseline;\r
- Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline);\r
-\r
- va_list arg_ptr;\r
- va_start(arg_ptr, format);\r
- int len = _vscprintf(format, arg_ptr) + 1;\r
- \r
- vector<char> strBuf(len); \r
- vsprintf_s(&strBuf[0], len, format, arg_ptr);\r
-\r
- Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2); \r
- putText(img, &strBuf[0], org, fontFace, fontScale, color);\r
- va_end(arg_ptr); \r
-}\r
-\r
-\r
-NCVStatus process(Mat *srcdst,\r
- Ncv32u width, Ncv32u height,\r
- NcvBool bShowAllHypotheses, NcvBool bLargestFace,\r
- HaarClassifierCascadeDescriptor &haar,\r
- NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,\r
- NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,\r
- INCVMemAllocator &gpuAllocator,\r
- INCVMemAllocator &cpuAllocator,\r
- cudaDeviceProp &devProp)\r
-{\r
- ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);\r
-\r
- NCVStatus ncvStat;\r
-\r
- NCV_SET_SKIP_COND(gpuAllocator.isCounting());\r
-\r
- NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);\r
- ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
- NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);\r
- ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
- NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100); \r
- ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
-\r
- Mat h_src_hdr(Size(width, height), CV_8U, h_src.ptr(), h_src.stride());\r
-\r
- NCV_SKIP_COND_BEGIN \r
- \r
- (*srcdst).copyTo(h_src_hdr);\r
- \r
- ncvStat = h_src.copySolid(d_src, 0);\r
- ncvAssertReturnNcvStat(ncvStat);\r
- ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
-\r
- NCV_SKIP_COND_END\r
-\r
- NcvSize32u roi;\r
- roi.width = d_src.width();\r
- roi.height = d_src.height();\r
-\r
- Ncv32u numDetections;\r
- ncvStat = ncvDetectObjectsMultiScale_device(\r
- d_src, roi, d_rects, numDetections, haar, h_haarStages,\r
- d_haarStages, d_haarNodes, d_haarFeatures,\r
- haar.ClassifierSize,\r
- bShowAllHypotheses ? 0 : 4,\r
- 1.2f, 1,\r
- (bLargestFace ? NCVPipeObjDet_FindLargestObject : 0) | NCVPipeObjDet_VisualizeInPlace,\r
- gpuAllocator, cpuAllocator, devProp.major, devProp.minor, 0);\r
- ncvAssertReturnNcvStat(ncvStat);\r
- ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
-\r
- NCV_SKIP_COND_BEGIN\r
-\r
- ncvStat = d_src.copySolid(h_src, 0);\r
- ncvAssertReturnNcvStat(ncvStat);\r
- ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
-\r
- h_src_hdr.copyTo(*srcdst);\r
- \r
- NCV_SKIP_COND_END\r
-\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-int main( int argc, const char** argv )\r
-{\r
- NCVStatus ncvStat;\r
-\r
- printf("NVIDIA Computer Vision SDK\n");\r
- printf("Face Detection in video and live feed\n");\r
- printf("=========================================\n");\r
- printf(" Esc - Quit\n");\r
- printf(" Space - Switch between NCV and OpenCV\n");\r
- printf(" L - Switch between FullSearch and LargestFace modes\n");\r
- printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");\r
- \r
- if (argc != 4 && argc != 1)\r
- return printSyntax(), -1;\r
-\r
- VideoCapture capture; \r
- Size frameSize;\r
-\r
- if (argc == 1 || strcmp(argv[1], "-c") == 0)\r
- {\r
- // Camera input is specified\r
- int camIdx = (argc == 3) ? atoi(argv[2]) : 0;\r
- if(!capture.open(camIdx)) \r
- return printf("Error opening camera\n"), -1; \r
- \r
- capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width);\r
- capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height);\r
- capture.set(CV_CAP_PROP_FPS, 25);\r
- frameSize = preferredVideoFrameSize;\r
- }\r
- else if (strcmp(argv[1], "-v") == 0)\r
- {\r
- // Video file input (avi)\r
- if(!capture.open(argv[2]))\r
- return printf("Error opening video file\n"), -1;\r
-\r
- frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH);\r
- frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT);\r
- }\r
- else\r
- return printSyntax(), -1;\r
-\r
- NcvBool bUseOpenCV = true;\r
- NcvBool bLargestFace = true;\r
- NcvBool bShowAllHypotheses = false; \r
-\r
- string classifierFile = (argc == 1) ? preferredClassifier : argv[3];\r
- \r
- CascadeClassifier classifierOpenCV;\r
- if (!classifierOpenCV.load(classifierFile))\r
- return printf("Error (in OpenCV) opening classifier\n"), printSyntax(), -1;\r
-\r
- int devId;\r
- ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);\r
- cudaDeviceProp devProp;\r
- ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);\r
- printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor);\r
-\r
- //==============================================================================\r
- //\r
- // Load the classifier from file (assuming its size is about 1 mb)\r
- // using a simple allocator\r
- //\r
- //==============================================================================\r
-\r
- NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice);\r
- ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);\r
- NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned);\r
- ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);\r
-\r
- Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;\r
- ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);\r
-\r
- NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);\r
- ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
- NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);\r
- ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
- NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);\r
- ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
-\r
- HaarClassifierCascadeDescriptor haar;\r
- ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);\r
-\r
- NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);\r
- ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
- NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);\r
- ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
- NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);\r
- ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
-\r
- ncvStat = h_haarStages.copySolid(d_haarStages, 0);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
- ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
- ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
-\r
- //==============================================================================\r
- //\r
- // Calculate memory requirements and create real allocators\r
- //\r
- //==============================================================================\r
-\r
- NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
- ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);\r
- NCVMemStackAllocator cpuCounter(devProp.textureAlignment);\r
- ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);\r
-\r
- ncvStat = process(NULL, frameSize.width, frameSize.height,\r
- false, false, haar,\r
- d_haarStages, d_haarNodes,\r
- d_haarFeatures, h_haarStages,\r
- gpuCounter, cpuCounter, devProp);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);\r
-\r
- NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);\r
- ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);\r
- NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);\r
- ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);\r
-\r
- printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);\r
-\r
- //==============================================================================\r
- //\r
- // Main processing loop\r
- //\r
- //==============================================================================\r
- \r
- namedWindow(wndTitle, 1);\r
-\r
- Mat frame, gray, frameDisp;\r
-\r
- for(;;)\r
- {\r
- // For camera and video file, capture the next image \r
- capture >> frame;\r
- if (frame.empty())\r
- break;\r
- \r
- cvtColor(frame, gray, CV_BGR2GRAY);\r
-\r
- // process\r
- NcvSize32u minSize = haar.ClassifierSize;\r
- if (bLargestFace)\r
- {\r
- Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;\r
- Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;\r
- Ncv32u ratioSmallest = std::min(ratioX, ratioY);\r
- ratioSmallest = (Ncv32u)std::max(ratioSmallest / 2.5f, 1.f);\r
- minSize.width *= ratioSmallest;\r
- minSize.height *= ratioSmallest;\r
- }\r
- \r
- NcvTimer timer = ncvStartTimer();\r
-\r
- if (!bUseOpenCV)\r
- {\r
- ncvStat = process(&gray, frameSize.width, frameSize.height,\r
- bShowAllHypotheses, bLargestFace, haar,\r
- d_haarStages, d_haarNodes,\r
- d_haarFeatures, h_haarStages,\r
- gpuAllocator, cpuAllocator, devProp);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);\r
- }\r
- else\r
- {\r
- vector<Rect> rectsOpenCV;\r
-\r
- classifierOpenCV.detectMultiScale(\r
- gray,\r
- rectsOpenCV,\r
- 1.2f,\r
- bShowAllHypotheses && !bLargestFace ? 0 : 4,\r
- (bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0) | CV_HAAR_SCALE_IMAGE,\r
- Size(minSize.width, minSize.height));\r
-\r
- for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)\r
- rectangle(gray, rectsOpenCV[rt], Scalar(255));\r
- }\r
-\r
- Ncv32f avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);\r
- \r
- cvtColor(gray, frameDisp, CV_GRAY2BGR);\r
-\r
- imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");\r
- imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":"");\r
- imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)");\r
- imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU");\r
-\r
- cv::imshow(wndTitle, frameDisp);\r
-\r
- switch (cvWaitKey(1))\r
- {\r
- case ' ':\r
- bUseOpenCV = !bUseOpenCV;\r
- break;\r
- case 'L':case 'l':\r
- bLargestFace = !bLargestFace;\r
- break;\r
- case 'U':case 'u':\r
- bShowAllHypotheses = !bShowAllHypotheses;\r
- break;\r
- case 27:\r
- return 0; \r
- }\r
- }\r
- \r
- return 0;\r
-}\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
-// \r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-\r
-#include <precomp.hpp>\r
-\r
-\r
-#if !defined (HAVE_CUDA)\r
-\r
-\r
-#else /* !defined (HAVE_CUDA) */\r
-\r
-\r
-#include <stdarg.h>\r
-#include "NCV.hpp"\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Error handling helpers\r
-//\r
-//==============================================================================\r
-\r
-\r
-static void stdioDebugOutput(const char *msg)\r
-{\r
- printf("%s", msg);\r
-}\r
-\r
-\r
-static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;\r
-\r
-\r
-void ncvDebugOutput(const char *msg, ...)\r
-{\r
- const int K_DEBUG_STRING_MAXLEN = 1024;\r
- char buffer[K_DEBUG_STRING_MAXLEN];\r
- va_list args;\r
- va_start(args, msg);\r
- vsnprintf_s(buffer, K_DEBUG_STRING_MAXLEN, K_DEBUG_STRING_MAXLEN-1, msg, args);\r
- va_end (args);\r
- debugOutputHandler(buffer);\r
-}\r
-\r
-\r
-void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)\r
-{\r
- debugOutputHandler = func;\r
-}\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Memory wrappers and helpers\r
-//\r
-//==============================================================================\r
-\r
-\r
-NCVStatus GPUAlignmentValue(Ncv32u &alignment)\r
-{\r
- int curDev;\r
- cudaDeviceProp curProp;\r
- ncvAssertCUDAReturn(cudaGetDevice(&curDev), NCV_CUDA_ERROR);\r
- ncvAssertCUDAReturn(cudaGetDeviceProperties(&curProp, curDev), NCV_CUDA_ERROR);\r
- alignment = curProp.textureAlignment; //GPUAlignmentValue(curProp.major);\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-Ncv32u alignUp(Ncv32u what, Ncv32u alignment)\r
-{\r
- Ncv32u alignMask = alignment-1;\r
- Ncv32u inverseAlignMask = ~alignMask;\r
- Ncv32u res = (what + alignMask) & inverseAlignMask;\r
- return res;\r
-}\r
-\r
-\r
-void NCVMemPtr::clear()\r
-{\r
- ptr = NULL;\r
- memtype = NCVMemoryTypeNone;\r
-}\r
-\r
-\r
-void NCVMemSegment::clear()\r
-{\r
- begin.clear();\r
- size = 0;\r
-}\r
-\r
-\r
-NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream)\r
-{\r
- NCVStatus ncvStat;\r
- switch (dstType)\r
- {\r
- case NCVMemoryTypeHostPageable:\r
- case NCVMemoryTypeHostPinned:\r
- switch (srcType)\r
- {\r
- case NCVMemoryTypeHostPageable:\r
- case NCVMemoryTypeHostPinned:\r
- memcpy(dst, src, sz);\r
- ncvStat = NCV_SUCCESS;\r
- break;\r
- case NCVMemoryTypeDevice:\r
- if (cuStream != 0)\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);\r
- }\r
- else\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);\r
- }\r
- ncvStat = NCV_SUCCESS;\r
- break;\r
- default:\r
- ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
- }\r
- break;\r
- case NCVMemoryTypeDevice:\r
- switch (srcType)\r
- {\r
- case NCVMemoryTypeHostPageable:\r
- case NCVMemoryTypeHostPinned:\r
- if (cuStream != 0)\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);\r
- }\r
- else\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);\r
- }\r
- ncvStat = NCV_SUCCESS;\r
- break;\r
- case NCVMemoryTypeDevice:\r
- if (cuStream != 0)\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);\r
- }\r
- else\r
- {\r
- ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);\r
- }\r
- ncvStat = NCV_SUCCESS;\r
- break;\r
- default:\r
- ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
- }\r
- break;\r
- default:\r
- ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
- }\r
-\r
- return ncvStat;\r
-}\r
-\r
-\r
-//===================================================================\r
-//\r
-// NCVMemStackAllocator class members implementation\r
-//\r
-//===================================================================\r
-\r
-\r
-NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)\r
- :\r
- currentSize(0),\r
- _maxSize(0),\r
- allocBegin(NULL),\r
- begin(NULL),\r
- _memType(NCVMemoryTypeNone),\r
- _alignment(alignment)\r
-{\r
- NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;\r
- ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");\r
-}\r
-\r
-\r
-NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment)\r
- :\r
- currentSize(0),\r
- _maxSize(0),\r
- allocBegin(NULL),\r
- _memType(memT),\r
- _alignment(alignment)\r
-{\r
- NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;\r
- ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");\r
-\r
- allocBegin = NULL;\r
-\r
- switch (memT)\r
- {\r
- case NCVMemoryTypeDevice:\r
- ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );\r
- break;\r
- case NCVMemoryTypeHostPinned:\r
- ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );\r
- break;\r
- case NCVMemoryTypeHostPageable:\r
- allocBegin = (Ncv8u *)malloc(capacity);\r
- break;\r
- }\r
-\r
- if (capacity == 0)\r
- {\r
- allocBegin = (Ncv8u *)(0x1);\r
- }\r
-\r
- if (!isCounting())\r
- {\r
- begin = allocBegin;\r
- end = begin + capacity;\r
- }\r
-}\r
-\r
-\r
-NCVMemStackAllocator::~NCVMemStackAllocator()\r
-{\r
- if (allocBegin != NULL)\r
- {\r
- ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");\r
- switch (_memType)\r
- {\r
- case NCVMemoryTypeDevice:\r
- ncvAssertCUDAReturn(cudaFree(allocBegin), );\r
- break;\r
- case NCVMemoryTypeHostPinned:\r
- ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );\r
- break;\r
- case NCVMemoryTypeHostPageable:\r
- free(allocBegin);\r
- break;\r
- }\r
- allocBegin = NULL;\r
- }\r
-}\r
-\r
-\r
-NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)\r
-{\r
- seg.clear();\r
- ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
-\r
- size = alignUp(size, this->_alignment);\r
- this->currentSize += size;\r
- this->_maxSize = std::max(this->_maxSize, this->currentSize);\r
-\r
- if (!isCounting())\r
- {\r
- size_t availSize = end - begin;\r
- ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);\r
- }\r
-\r
- seg.begin.ptr = begin;\r
- seg.begin.memtype = this->_memType;\r
- seg.size = size;\r
- begin += size;\r
-\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)\r
-{\r
- ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
- ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);\r
- ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);\r
- ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);\r
-\r
- currentSize -= seg.size;\r
- begin -= seg.size;\r
-\r
- seg.clear();\r
-\r
- ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);\r
-\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-NcvBool NCVMemStackAllocator::isInitialized(void) const\r
-{\r
- return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;\r
-}\r
-\r
-\r
-NcvBool NCVMemStackAllocator::isCounting(void) const\r
-{\r
- return this->_memType == NCVMemoryTypeNone;\r
-}\r
-\r
-\r
-NCVMemoryType NCVMemStackAllocator::memType(void) const\r
-{\r
- return this->_memType;\r
-}\r
-\r
-\r
-Ncv32u NCVMemStackAllocator::alignment(void) const\r
-{\r
- return this->_alignment;\r
-}\r
-\r
-\r
-size_t NCVMemStackAllocator::maxSize(void) const\r
-{\r
- return this->_maxSize;\r
-}\r
-\r
-\r
-//===================================================================\r
-//\r
-// NCVMemNativeAllocator class members implementation\r
-//\r
-//===================================================================\r
-\r
-\r
-NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT)\r
- :\r
- currentSize(0),\r
- _maxSize(0),\r
- _memType(memT)\r
-{\r
- ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );\r
- ncvAssertPrintReturn(NCV_SUCCESS == GPUAlignmentValue(this->_alignment), "NCVMemNativeAllocator ctor:: couldn't get device _alignment", );\r
-}\r
-\r
-\r
-NCVMemNativeAllocator::~NCVMemNativeAllocator()\r
-{\r
- ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");\r
-}\r
-\r
-\r
-NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)\r
-{\r
- seg.clear();\r
- ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
-\r
- switch (this->_memType)\r
- {\r
- case NCVMemoryTypeDevice:\r
- ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);\r
- break;\r
- case NCVMemoryTypeHostPinned:\r
- ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);\r
- break;\r
- case NCVMemoryTypeHostPageable:\r
- seg.begin.ptr = (Ncv8u *)malloc(size);\r
- break;\r
- }\r
-\r
- this->currentSize += alignUp(size, this->_alignment);\r
- this->_maxSize = std::max(this->_maxSize, this->currentSize);\r
-\r
- seg.begin.memtype = this->_memType;\r
- seg.size = size;\r
-\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)\r
-{\r
- ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
- ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);\r
- ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);\r
-\r
- ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);\r
- currentSize -= alignUp(seg.size, this->_alignment);\r
-\r
- switch (this->_memType)\r
- {\r
- case NCVMemoryTypeDevice:\r
- ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);\r
- break;\r
- case NCVMemoryTypeHostPinned:\r
- ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);\r
- break;\r
- case NCVMemoryTypeHostPageable:\r
- free(seg.begin.ptr);\r
- break;\r
- }\r
-\r
- seg.clear();\r
-\r
- return NCV_SUCCESS;\r
-}\r
-\r
-\r
-NcvBool NCVMemNativeAllocator::isInitialized(void) const\r
-{\r
- return (this->_alignment != 0);\r
-}\r
-\r
-\r
-NcvBool NCVMemNativeAllocator::isCounting(void) const\r
-{\r
- return false;\r
-}\r
-\r
-\r
-NCVMemoryType NCVMemNativeAllocator::memType(void) const\r
-{\r
- return this->_memType;\r
-}\r
-\r
-\r
-Ncv32u NCVMemNativeAllocator::alignment(void) const\r
-{\r
- return this->_alignment;\r
-}\r
-\r
-\r
-size_t NCVMemNativeAllocator::maxSize(void) const\r
-{\r
- return this->_maxSize;\r
-}\r
-\r
-\r
-//===================================================================\r
-//\r
-// Time and timer routines\r
-//\r
-//===================================================================\r
-\r
-\r
-typedef struct _NcvTimeMoment NcvTimeMoment;\r
-\r
-#if defined(_WIN32) || defined(_WIN64)\r
-\r
- #include <Windows.h>\r
-\r
- typedef struct _NcvTimeMoment\r
- {\r
- LONGLONG moment, freq;\r
- } NcvTimeMoment;\r
-\r
-\r
- static void _ncvQueryMoment(NcvTimeMoment *t)\r
- {\r
- QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq));\r
- QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment));\r
- }\r
-\r
-\r
- double _ncvMomentToMicroseconds(NcvTimeMoment *t)\r
- {\r
- return 1000000.0 * t->moment / t->freq;\r
- }\r
-\r
-\r
- double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
- {\r
- return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);\r
- }\r
-\r
-\r
- double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
- {\r
- return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);\r
- }\r
-\r
-#elif defined(__unix__)\r
-\r
- #include <sys/time.h>\r
-\r
- typedef struct _NcvTimeMoment\r
- {\r
- struct timeval tv; \r
- struct timezone tz;\r
- } NcvTimeMoment;\r
-\r
-\r
- void _ncvQueryMoment(NcvTimeMoment *t)\r
- {\r
- gettimeofday(& t->tv, & t->tz);\r
- }\r
-\r
-\r
- double _ncvMomentToMicroseconds(NcvTimeMoment *t)\r
- {\r
- return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec;\r
- }\r
-\r
-\r
- double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
- {\r
- return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec);\r
- }\r
-\r
-\r
-#endif //#if defined(_WIN32) || defined(_WIN64)\r
-\r
-\r
-struct _NcvTimer\r
-{\r
- NcvTimeMoment t1, t2;\r
-};\r
-\r
-\r
-NcvTimer ncvStartTimer(void)\r
-{\r
- struct _NcvTimer *t;\r
- t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer));\r
- _ncvQueryMoment(&t->t1);\r
- return t;\r
-}\r
-\r
-\r
-double ncvEndQueryTimerUs(NcvTimer t)\r
-{\r
- double res;\r
- _ncvQueryMoment(&t->t2);\r
- res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2);\r
- free(t);\r
- return res;\r
-}\r
-\r
-\r
-double ncvEndQueryTimerMs(NcvTimer t)\r
-{\r
- double res;\r
- _ncvQueryMoment(&t->t2);\r
- res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2);\r
- free(t);\r
- return res;\r
-}\r
-\r
-#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
-// \r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other materials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#ifndef _ncv_hpp_\r
-#define _ncv_hpp_\r
-\r
-#include <cuda_runtime.h>\r
-#include "npp_staging.h"\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Alignment macros\r
-//\r
-//==============================================================================\r
-\r
-\r
-#if !defined(__align__) && !defined(__CUDACC__)\r
- #if defined(_WIN32) || defined(_WIN64)\r
- #define __align__(n) __declspec(align(n))\r
- #elif defined(__unix__)\r
- #define __align__(n) __attribute__((__aligned__(n)))\r
- #endif\r
-#endif\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Integral and compound types of guaranteed size\r
-//\r
-//==============================================================================\r
-\r
-\r
-typedef bool NcvBool;\r
-typedef long long Ncv64s;\r
-typedef unsigned long long Ncv64u;\r
-typedef int Ncv32s;\r
-typedef unsigned int Ncv32u;\r
-typedef short Ncv16s;\r
-typedef unsigned short Ncv16u;\r
-typedef char Ncv8s;\r
-typedef unsigned char Ncv8u;\r
-typedef float Ncv32f;\r
-typedef double Ncv64f;\r
-\r
-\r
-typedef struct\r
-{\r
- Ncv8u x;\r
- Ncv8u y;\r
- Ncv8u width;\r
- Ncv8u height;\r
-} NcvRect8u;\r
-\r
-\r
-typedef struct\r
-{\r
- Ncv32s x; ///< x-coordinate of upper left corner.\r
- Ncv32s y; ///< y-coordinate of upper left corner.\r
- Ncv32s width; ///< Rectangle width.\r
- Ncv32s height; ///< Rectangle height.\r
-} NcvRect32s;\r
-\r
-\r
-typedef struct\r
-{\r
- Ncv32u x; ///< x-coordinate of upper left corner.\r
- Ncv32u y; ///< y-coordinate of upper left corner.\r
- Ncv32u width; ///< Rectangle width.\r
- Ncv32u height; ///< Rectangle height.\r
-} NcvRect32u;\r
-\r
-\r
-typedef struct \r
-{\r
- Ncv32s width; ///< Rectangle width.\r
- Ncv32s height; ///< Rectangle height.\r
-} NcvSize32s;\r
-\r
-\r
-typedef struct \r
-{\r
- Ncv32u width; ///< Rectangle width.\r
- Ncv32u height; ///< Rectangle height.\r
-} NcvSize32u;\r
-\r
-\r
-NPPST_CT_ASSERT(sizeof(NcvBool) <= 4);\r
-NPPST_CT_ASSERT(sizeof(Ncv64s) == 8);\r
-NPPST_CT_ASSERT(sizeof(Ncv64u) == 8);\r
-NPPST_CT_ASSERT(sizeof(Ncv32s) == 4);\r
-NPPST_CT_ASSERT(sizeof(Ncv32u) == 4);\r
-NPPST_CT_ASSERT(sizeof(Ncv16s) == 2);\r
-NPPST_CT_ASSERT(sizeof(Ncv16u) == 2);\r
-NPPST_CT_ASSERT(sizeof(Ncv8s) == 1);\r
-NPPST_CT_ASSERT(sizeof(Ncv8u) == 1);\r
-NPPST_CT_ASSERT(sizeof(Ncv32f) == 4);\r
-NPPST_CT_ASSERT(sizeof(Ncv64f) == 8);\r
-NPPST_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));\r
-NPPST_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));\r
-NPPST_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));\r
-NPPST_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Persistent constants\r
-//\r
-//==============================================================================\r
-\r
-\r
-const Ncv32u K_WARP_SIZE = 32;\r
-const Ncv32u K_LOG2_WARP_SIZE = 5;\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Error handling\r
-//\r
-//==============================================================================\r
-\r
-\r
-#define NCV_CT_PREP_STRINGIZE_AUX(x) #x\r
-#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)\r
-\r
-\r
-void ncvDebugOutput(const char *msg, ...);\r
-\r
-\r
-typedef void NCVDebugOutputHandler(const char* msg);\r
-\r
-\r
-void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);\r
-\r
-\r
-#define ncvAssertPrintCheck(pred, msg) \\r
- ((pred) ? true : (ncvDebugOutput("\n%s\n", \\r
- "NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \\r
- ), false))\r
-\r
-\r
-#define ncvAssertPrintReturn(pred, msg, err) \\r
- if (ncvAssertPrintCheck(pred, msg)) ; else return err\r
-\r
-\r
-#define ncvAssertReturn(pred, err) \\r
- do \\r
- { \\r
- if (!(pred)) \\r
- { \\r
- ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
- return err; \\r
- } \\r
- } while (0)\r
-\r
-\r
-#define ncvAssertReturnNcvStat(ncvOp) \\r
- do \\r
- { \\r
- NCVStatus _ncvStat = ncvOp; \\r
- if (NCV_SUCCESS != _ncvStat) \\r
- { \\r
- ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
- return _ncvStat; \\r
- } \\r
- } while (0)\r
-\r
-\r
-#define ncvAssertCUDAReturn(cudacall, errCode) \\r
- do \\r
- { \\r
- cudaError_t resCall = cudacall; \\r
- cudaError_t resGLE = cudaGetLastError(); \\r
- if (cudaSuccess != resCall || cudaSuccess != resGLE) \\r
- { \\r
- ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
- return errCode; \\r
- } \\r
- } while (0)\r
-\r
-\r
-/**\r
-* Return-codes for status notification, errors and warnings\r
-*/\r
-enum NCVStatus\r
-{\r
- NCV_SUCCESS,\r
-\r
- NCV_CUDA_ERROR,\r
- NCV_NPP_ERROR,\r
- NCV_FILE_ERROR,\r
-\r
- NCV_NULL_PTR,\r
- NCV_INCONSISTENT_INPUT,\r
- NCV_TEXTURE_BIND_ERROR,\r
- NCV_DIMENSIONS_INVALID,\r
-\r
- NCV_INVALID_ROI,\r
- NCV_INVALID_STEP,\r
- NCV_INVALID_SCALE,\r
-\r
- NCV_ALLOCATOR_NOT_INITIALIZED,\r
- NCV_ALLOCATOR_BAD_ALLOC,\r
- NCV_ALLOCATOR_BAD_DEALLOC,\r
- NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,\r
- NCV_ALLOCATOR_DEALLOC_ORDER,\r
- NCV_ALLOCATOR_BAD_REUSE,\r
-\r
- NCV_MEM_COPY_ERROR,\r
- NCV_MEM_RESIDENCE_ERROR,\r
- NCV_MEM_INSUFFICIENT_CAPACITY,\r
-\r
- NCV_HAAR_INVALID_PIXEL_STEP,\r
- NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,\r
- NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,\r
- NCV_HAAR_TOO_LARGE_FEATURES,\r
- NCV_HAAR_XML_LOADING_EXCEPTION,\r
-\r
- NCV_NOIMPL_HAAR_TILTED_FEATURES,\r
-\r
- NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,\r
-};\r
-\r
-\r
-#define NCV_SET_SKIP_COND(x) \\r
- bool __ncv_skip_cond = x\r
-\r
-\r
-#define NCV_RESET_SKIP_COND(x) \\r
- __ncv_skip_cond = x\r
-\r
-\r
-#define NCV_SKIP_COND_BEGIN \\r
- if (!__ncv_skip_cond) {\r
-\r
-\r
-#define NCV_SKIP_COND_END \\r
- }\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Timer\r
-//\r
-//==============================================================================\r
-\r
-\r
-typedef struct _NcvTimer *NcvTimer;\r
-\r
-NcvTimer ncvStartTimer(void);\r
-\r
-double ncvEndQueryTimerUs(NcvTimer t);\r
-\r
-double ncvEndQueryTimerMs(NcvTimer t);\r
-\r
-\r
-//==============================================================================\r
-//\r
-// Memory management classes template compound types\r
-//\r
-//==============================================================================\r
-\r
-\r
-/**\r
-* Alignment of GPU memory chunks in bytes\r
-*/\r
-NCVStatus GPUAlignmentValue(Ncv32u &alignment);\r
-\r
-\r
-/**\r
-* Calculates the aligned top bound value\r
-*/\r
-Ncv32u alignUp(Ncv32u what, Ncv32u alignment);\r
-\r
-\r
-/**\r
-* NCVMemoryType\r
-*/\r
-enum NCVMemoryType\r
-{\r
- NCVMemoryTypeNone,\r
- NCVMemoryTypeHostPageable,\r
- NCVMemoryTypeHostPinned,\r
- NCVMemoryTypeDevice\r
-};\r
-\r
-\r
-/**\r
-* NCVMemPtr\r
-*/\r
-struct NCVMemPtr\r
-{\r
- void *ptr;\r
- NCVMemoryType memtype;\r
- void clear();\r
-};\r
-\r
-\r
-/**\r
-* NCVMemSegment\r
-*/\r
-struct NCVMemSegment\r
-{\r
- NCVMemPtr begin;\r
- size_t size;\r
- void clear();\r
-};\r
-\r
-\r
-/**\r
-* INCVMemAllocator (Interface)\r
-*/\r
-class INCVMemAllocator\r
-{\r
-public:\r
- virtual ~INCVMemAllocator() = 0;\r
-\r
- virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;\r
- virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;\r
-\r
- virtual NcvBool isInitialized(void) const = 0;\r
- virtual NcvBool isCounting(void) const = 0;\r
- \r
- virtual NCVMemoryType memType(void) const = 0;\r
- virtual Ncv32u alignment(void) const = 0;\r
- virtual size_t maxSize(void) const = 0;\r
-};\r
-\r
-inline INCVMemAllocator::~INCVMemAllocator() {}\r
-\r
-\r
-/**\r
-* NCVMemStackAllocator\r
-*/\r
-class NCVMemStackAllocator : public INCVMemAllocator\r
-{\r
- NCVMemStackAllocator();\r
- NCVMemStackAllocator(const NCVMemStackAllocator &);\r
-\r
-public:\r
-\r
- explicit NCVMemStackAllocator(Ncv32u alignment);\r
- NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment);\r
- virtual ~NCVMemStackAllocator();\r
-\r
- virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);\r
- virtual NCVStatus dealloc(NCVMemSegment &seg);\r
-\r
- virtual NcvBool isInitialized(void) const;\r
- virtual NcvBool isCounting(void) const;\r
-\r
- virtual NCVMemoryType memType(void) const;\r
- virtual Ncv32u alignment(void) const;\r
- virtual size_t maxSize(void) const;\r
-\r
-private:\r
-\r
- NCVMemoryType _memType;\r
- Ncv32u _alignment;\r
- Ncv8u *allocBegin;\r
- Ncv8u *begin;\r
- Ncv8u *end;\r
- size_t currentSize;\r
- size_t _maxSize;\r
-};\r
-\r
-\r
-/**\r
-* NCVMemNativeAllocator\r
-*/\r
-class NCVMemNativeAllocator : public INCVMemAllocator\r
-{\r
-public:\r
-\r
- NCVMemNativeAllocator(NCVMemoryType memT);\r
- virtual ~NCVMemNativeAllocator();\r
-\r
- virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);\r
- virtual NCVStatus dealloc(NCVMemSegment &seg);\r
-\r
- virtual NcvBool isInitialized(void) const;\r
- virtual NcvBool isCounting(void) const;\r
-\r
- virtual NCVMemoryType memType(void) const;\r
- virtual Ncv32u alignment(void) const;\r
- virtual size_t maxSize(void) const;\r
-\r
-private:\r
-\r
- NCVMemNativeAllocator();\r
- NCVMemNativeAllocator(const NCVMemNativeAllocator &);\r
-\r
- NCVMemoryType _memType;\r
- Ncv32u _alignment;\r
- size_t currentSize;\r
- size_t _maxSize;\r
-};\r
-\r
-\r
-/**\r
-* Copy dispatcher\r
-*/\r
-NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,\r
- const void *src, NCVMemoryType srcType,\r
- size_t sz, cudaStream_t cuStream);\r
-\r
-\r
-/**\r
-* NCVVector (1D)\r
-*/\r
-template <class T>\r
-class NCVVector\r
-{\r
- NCVVector(const NCVVector &);\r
-\r
-public:\r
-\r
- NCVVector()\r
- {\r
- clear();\r
- }\r
-\r
- virtual ~NCVVector() {}\r
-\r
- void clear()\r
- {\r
- _ptr = NULL;\r
- _length = 0;\r
- _memtype = NCVMemoryTypeNone;\r
- }\r
-\r
- NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0)\r
- {\r
- if (howMuch == 0)\r
- {\r
- ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);\r
- howMuch = this->_length * sizeof(T);\r
- }\r
- else\r
- {\r
- ncvAssertReturn(dst._length * sizeof(T) >= howMuch && \r
- this->_length * sizeof(T) >= howMuch &&\r
- howMuch > 0, NCV_MEM_COPY_ERROR);\r
- }\r
- ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && \r
- (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);\r
-\r
- NCVStatus ncvStat = NCV_SUCCESS;\r
- if (this->_memtype != NCVMemoryTypeNone)\r
- {\r
- ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,\r
- this->_ptr, this->_memtype,\r
- howMuch, cuStream);\r
- }\r
-\r
- return ncvStat;\r
- }\r
-\r
- T *ptr() const {return this->_ptr;}\r
- size_t length() const {return this->_length;}\r
- NCVMemoryType memType() const {return this->_memtype;}\r
-\r
-protected:\r
-\r
- T *_ptr;\r
- size_t _length;\r
- NCVMemoryType _memtype;\r
-};\r
-\r
-\r
-/**\r
-* NCVVectorAlloc\r
-*/\r
-template <class T>\r
-class NCVVectorAlloc : public NCVVector<T>\r
-{\r
- NCVVectorAlloc();\r
- NCVVectorAlloc(const NCVVectorAlloc &);\r
-\r
-public:\r
-\r
- NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)\r
- :\r
- allocator(allocator)\r
- {\r
- NCVStatus ncvStat;\r
-\r
- this->clear();\r
- this->allocatedMem.clear();\r
-\r
- ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );\r
-\r
- this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
- this->_length = length;\r
- this->_memtype = this->allocatedMem.begin.memtype;\r
- }\r
-\r
-\r
- ~NCVVectorAlloc()\r
- {\r
- NCVStatus ncvStat;\r
-\r
- ncvStat = allocator.dealloc(this->allocatedMem);\r
- ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");\r
-\r
- this->clear();\r
- }\r
-\r
-\r
- NcvBool isMemAllocated() const\r
- {\r
- return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());\r
- }\r
-\r
-\r
- Ncv32u getAllocatorsAlignment() const\r
- {\r
- return allocator.alignment();\r
- }\r
-\r
-\r
- NCVMemSegment getSegment() const\r
- {\r
- return allocatedMem;\r
- }\r
-\r
-private:\r
-\r
- INCVMemAllocator &allocator;\r
- NCVMemSegment allocatedMem;\r
-};\r
-\r
-\r
-/**\r
-* NCVVectorReuse\r
-*/\r
-template <class T>\r
-class NCVVectorReuse : public NCVVector<T>\r
-{\r
- NCVVectorReuse();\r
- NCVVectorReuse(const NCVVectorReuse &);\r
-\r
-public:\r
-\r
- explicit NCVVectorReuse(const NCVMemSegment &memSegment)\r
- {\r
- this->bReused = false;\r
- this->clear();\r
-\r
- this->_length = memSegment.size / sizeof(T);\r
- this->_ptr = (T *)memSegment.begin.ptr;\r
- this->_memtype = memSegment.begin.memtype;\r
-\r
- this->bReused = true;\r
- }\r
-\r
-\r
- NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)\r
- {\r
- this->bReused = false;\r
- this->clear();\r
-\r
- ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \\r
- "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );\r
-\r
- this->_length = length;\r
- this->_ptr = (T *)memSegment.begin.ptr;\r
- this->_memtype = memSegment.begin.memtype;\r
-\r
- this->bReused = true;\r
- }\r
-\r
-\r
- NcvBool isMemReused() const\r
- {\r
- return this->bReused;\r
- }\r
-\r
-private:\r
-\r
- NcvBool bReused;\r
-};\r
-\r
-\r
-/**\r
-* NCVMatrix (2D)\r
-*/\r
-template <class T>\r
-class NCVMatrix\r
-{\r
- NCVMatrix(const NCVMatrix &);\r
-\r
-public:\r
-\r
- NCVMatrix()\r
- {\r
- clear();\r
- }\r
-\r
- virtual ~NCVMatrix() {}\r
-\r
-\r
- void clear()\r
- {\r
- _ptr = NULL;\r
- _pitch = 0;\r
- _width = 0;\r
- _height = 0;\r
- _memtype = NCVMemoryTypeNone;\r
- }\r
-\r
-\r
- Ncv32u stride() const\r
- {\r
- return _pitch / sizeof(T);\r
- }\r
-\r
-\r
- NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0)\r
- {\r
- if (howMuch == 0)\r
- {\r
- ncvAssertReturn(dst._pitch == this->_pitch &&\r
- dst._height == this->_height, NCV_MEM_COPY_ERROR);\r
- howMuch = this->_pitch * this->_height;\r
- }\r
- else\r
- {\r
- ncvAssertReturn(dst._pitch * dst._height >= howMuch && \r
- this->_pitch * this->_height >= howMuch &&\r
- howMuch > 0, NCV_MEM_COPY_ERROR);\r
- }\r
- ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && \r
- (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);\r
-\r
- NCVStatus ncvStat = NCV_SUCCESS;\r
- if (this->_memtype != NCVMemoryTypeNone)\r
- {\r
- ncvStat = memSegCopyHelper(dst._ptr, dst._memtype, \r
- this->_ptr, this->_memtype, \r
- howMuch, cuStream);\r
- }\r
-\r
- return ncvStat;\r
- }\r
-\r
- T *ptr() const {return this->_ptr;}\r
- Ncv32u width() const {return this->_width;}\r
- Ncv32u height() const {return this->_height;}\r
- Ncv32u pitch() const {return this->_pitch;}\r
- NCVMemoryType memType() const {return this->_memtype;}\r
-\r
-protected:\r
-\r
- T *_ptr;\r
- Ncv32u _width;\r
- Ncv32u _height;\r
- Ncv32u _pitch;\r
- NCVMemoryType _memtype;\r
-};\r
-\r
-\r
-/**\r
-* NCVMatrixAlloc\r
-*/\r
-template <class T>\r
-class NCVMatrixAlloc : public NCVMatrix<T>\r
-{\r
- NCVMatrixAlloc();\r
- NCVMatrixAlloc(const NCVMatrixAlloc &);\r
-\r
-public:\r
-\r
- NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)\r
- :\r
- allocator(allocator)\r
- {\r
- NCVStatus ncvStat;\r
-\r
- this->clear();\r
- this->allocatedMem.clear();\r
-\r
- Ncv32u widthBytes = width * sizeof(T);\r
- Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());\r
-\r
- if (pitch != 0)\r
- {\r
- ncvAssertPrintReturn(pitch >= pitchBytes &&\r
- (pitch & (allocator.alignment() - 1)) == 0,\r
- "NCVMatrixAlloc ctor:: incorrect pitch passed", );\r
- pitchBytes = pitch;\r
- }\r
-\r
- Ncv32u requiredAllocSize = pitchBytes * height;\r
-\r
- ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);\r
- ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );\r
-\r
- this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
- this->_width = width;\r
- this->_height = height;\r
- this->_pitch = pitchBytes;\r
- this->_memtype = this->allocatedMem.begin.memtype;\r
- }\r
-\r
- ~NCVMatrixAlloc()\r
- {\r
- NCVStatus ncvStat;\r
-\r
- ncvStat = allocator.dealloc(this->allocatedMem);\r
- ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");\r
-\r
- this->clear();\r
- }\r
-\r
-\r
- NcvBool isMemAllocated() const\r
- {\r
- return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());\r
- }\r
-\r
-\r
- Ncv32u getAllocatorsAlignment() const\r
- {\r
- return allocator.alignment();\r
- }\r
-\r
-\r
- NCVMemSegment getSegment() const\r
- {\r
- return allocatedMem;\r
- }\r
-\r
-private:\r
-\r
- INCVMemAllocator &allocator;\r
- NCVMemSegment allocatedMem;\r
-};\r
-\r
-\r
-/**\r
-* NCVMatrixReuse\r
-*/\r
-template <class T>\r
-class NCVMatrixReuse : public NCVMatrix<T>\r
-{\r
- NCVMatrixReuse();\r
- NCVMatrixReuse(const NCVMatrixReuse &);\r
-\r
-public:\r
-\r
- NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)\r
- {\r
- this->bReused = false;\r
- this->clear();\r
-\r
- Ncv32u widthBytes = width * sizeof(T);\r
- Ncv32u pitchBytes = alignUp(widthBytes, alignment);\r
-\r
- if (pitch != 0)\r
- {\r
- if (!bSkipPitchCheck)\r
- {\r
- ncvAssertPrintReturn(pitch >= pitchBytes &&\r
- (pitch & (alignment - 1)) == 0,\r
- "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
- }\r
- else\r
- {\r
- ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
- }\r
- pitchBytes = pitch;\r
- }\r
-\r
- ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \\r
- "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );\r
-\r
- this->_width = width;\r
- this->_height = height;\r
- this->_pitch = pitchBytes;\r
- this->_ptr = (T *)memSegment.begin.ptr;\r
- this->_memtype = memSegment.begin.memtype;\r
-\r
- this->bReused = true;\r
- }\r
-\r
-\r
- NcvBool isMemReused() const\r
- {\r
- return this->bReused;\r
- }\r
-\r
-private:\r
-\r
- NcvBool bReused;\r
-};\r
-\r
-#endif // _ncv_hpp_\r
\r
#include <algorithm>\r
\r
-#include "npp.h"\r
#include "NCV.hpp"\r
+#include "NPP_staging/NPP_staging.hpp"\r
#include "NCVRuntimeTemplates.hpp"\r
#include "NCVHaarObjectDetection.hpp"\r
\r
Ncv32f scaleArea,\r
INCVMemAllocator &gpuAllocator,\r
INCVMemAllocator &cpuAllocator,\r
- Ncv32u devPropMajor,\r
- Ncv32u devPropMinor,\r
+ cudaDeviceProp &devProp,\r
cudaStream_t cuStream)\r
{\r
ncvAssertReturn(d_integralImage.memType() == d_weights.memType() &&\r
Ncv32f scaleAreaPixels = scaleArea * ((haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER) *\r
(haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER));\r
\r
- NcvBool bTexCacheCascade = devPropMajor < 2;\r
+ NcvBool bTexCacheCascade = devProp.major < 2;\r
NcvBool bTexCacheIImg = true; //this works better even on Fermi so far\r
- NcvBool bDoAtomicCompaction = devPropMajor >= 2 || (devPropMajor == 1 && devPropMinor >= 3);\r
+ NcvBool bDoAtomicCompaction = devProp.major >= 2 || (devProp.major == 1 && devProp.minor >= 3);\r
\r
NCVVector<Ncv32u> *d_ptrNowData = &d_vecPixelMask;\r
NCVVector<Ncv32u> *d_ptrNowTmp = &d_vecPixelMaskTmp;\r
\r
Ncv32u szNppCompactTmpBuf;\r
- nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf);\r
+ nppsStCompactGetSize_32u(d_vecPixelMask.length(), &szNppCompactTmpBuf, devProp);\r
if (bDoAtomicCompaction)\r
{\r
szNppCompactTmpBuf = 0;\r
}\r
else\r
{\r
- NppStStatus nppSt;\r
+ NCVStatus nppSt;\r
nppSt = nppsStCompact_32u(d_ptrNowTmp->ptr(), d_vecPixelMask.length(),\r
d_ptrNowData->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,\r
- d_tmpBufCompact.ptr(), szNppCompactTmpBuf);\r
- ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);\r
+ ncvAssertReturn(nppSt == NPPST_SUCCESS, NCV_NPP_ERROR);\r
}\r
numDetections = *hp_numDet;\r
}\r
}\r
else\r
{\r
- NppStStatus nppSt;\r
+ NCVStatus nppSt;\r
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), d_vecPixelMask.length(),\r
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,\r
- d_tmpBufCompact.ptr(), szNppCompactTmpBuf);\r
- ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);\r
+ ncvAssertReturnNcvStat(nppSt);\r
}\r
\r
swap(d_ptrNowData, d_ptrNowTmp);\r
}\r
else\r
{\r
- NppStStatus nppSt;\r
+ NCVStatus nppSt;\r
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,\r
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,\r
- d_tmpBufCompact.ptr(), szNppCompactTmpBuf);\r
- ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);\r
+ ncvAssertReturnNcvStat(nppSt);\r
}\r
\r
swap(d_ptrNowData, d_ptrNowTmp);\r
}\r
else\r
{\r
- NppStStatus nppSt;\r
+ NCVStatus nppSt;\r
nppSt = nppsStCompact_32u(d_ptrNowData->ptr(), numDetections,\r
d_ptrNowTmp->ptr(), hp_numDet, OBJDET_MASK_ELEMENT_INVALID_32U,\r
- d_tmpBufCompact.ptr(), szNppCompactTmpBuf);\r
- ncvAssertReturn(nppSt == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ d_tmpBufCompact.ptr(), szNppCompactTmpBuf, devProp);\r
+ ncvAssertReturnNcvStat(nppSt);\r
}\r
\r
swap(d_ptrNowData, d_ptrNowTmp);\r
\r
INCVMemAllocator &gpuAllocator,\r
INCVMemAllocator &cpuAllocator,\r
- Ncv32u devPropMajor,\r
- Ncv32u devPropMinor,\r
+ cudaDeviceProp &devProp,\r
cudaStream_t cuStream)\r
{\r
ncvAssertReturn(d_srcImg.memType() == d_dstRects.memType() &&\r
NCVVectorAlloc<NcvRect32u> h_hypothesesIntermediate(cpuAllocator, d_srcImg.width() * d_srcImg.height());\r
ncvAssertReturn(h_hypothesesIntermediate.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
\r
- NppStStatus nppStat;\r
+ NCVStatus nppStat;\r
Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;\r
- nppStat = nppiStIntegralGetSize_8u32u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
- nppStat = nppiStSqrIntegralGetSize_8u64u(NppStSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufIntegral, devProp);\r
+ ncvAssertReturnNcvStat(nppStat);\r
+ nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(d_srcImg.width(), d_srcImg.height()), &szTmpBufSqIntegral, devProp);\r
+ ncvAssertReturnNcvStat(nppStat);\r
NCVVectorAlloc<Ncv8u> d_tmpIIbuf(gpuAllocator, std::max(szTmpBufIntegral, szTmpBufSqIntegral));\r
ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
\r
\r
nppStat = nppiStIntegral_8u32u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),\r
d_integralImage.ptr(), d_integralImage.pitch(),\r
- NppStSize32u(d_srcImg.width(), d_srcImg.height()),\r
- d_tmpIIbuf.ptr(), szTmpBufIntegral);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ NcvSize32u(d_srcImg.width(), d_srcImg.height()),\r
+ d_tmpIIbuf.ptr(), szTmpBufIntegral, devProp);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
nppStat = nppiStSqrIntegral_8u64u_C1R(d_srcImg.ptr(), d_srcImg.pitch(),\r
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),\r
- NppStSize32u(d_srcImg.width(), d_srcImg.height()),\r
- d_tmpIIbuf.ptr(), szTmpBufSqIntegral);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ NcvSize32u(d_srcImg.width(), d_srcImg.height()),\r
+ d_tmpIIbuf.ptr(), szTmpBufSqIntegral, devProp);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
NCV_SKIP_COND_END\r
\r
Ncv32u scale = scalesVector[i];\r
\r
NcvSize32u srcRoi, scaledIIRoi, searchRoi;\r
- NppStSize32u srcIIRoi;\r
+ NcvSize32u srcIIRoi;\r
srcRoi.width = d_srcImg.width();\r
srcRoi.height = d_srcImg.height();\r
srcIIRoi.width = srcRoi.width + 1;\r
d_integralImage.ptr(), d_integralImage.pitch(),\r
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),\r
srcIIRoi, scale, true);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
nppStat = nppiStDownsampleNearest_64u_C1R(\r
d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),\r
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),\r
srcIIRoi, scale, true);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
- const NppStRect32u rect(\r
+ const NcvRect32u rect(\r
HAAR_STDDEV_BORDER,\r
HAAR_STDDEV_BORDER,\r
haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,\r
d_scaledIntegralImage.ptr(), d_scaledIntegralImage.pitch(),\r
d_scaledSqIntegralImage.ptr(), d_scaledSqIntegralImage.pitch(),\r
d_rectStdDev.ptr(), d_rectStdDev.pitch(),\r
- NppStSize32u(searchRoi.width, searchRoi.height), rect,\r
+ NcvSize32u(searchRoi.width, searchRoi.height), rect,\r
(Ncv32f)scale*scale, true);\r
- ncvAssertReturn(nppStat == NPP_SUCCESS, NCV_NPP_ERROR);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
NCV_SKIP_COND_END\r
\r
detectionsOnThisScale,\r
haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,\r
searchRoi, pixelStep, (Ncv32f)scale*scale,\r
- gpuAllocator, cpuAllocator, devPropMajor, devPropMinor, cuStream);\r
- ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);\r
+ gpuAllocator, cpuAllocator, devProp, cuStream);\r
+ ncvAssertReturnNcvStat(nppStat);\r
\r
NCV_SKIP_COND_BEGIN\r
\r
return ncvStat;\r
}\r
\r
+\r
+\r
+\r
+\r
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,\r
Ncv32u &numHypotheses,\r
Ncv32u minNeighbors,\r
}\r
\r
\r
-NCVStatus ncvHaarStoreNVBIN_host(std::string &filename,\r
+NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,\r
HaarClassifierCascadeDescriptor haar,\r
NCVVector<HaarStage64> &h_HaarStages,\r
NCVVector<HaarClassifierNode128> &h_HaarNodes,\r
\r
#define HaarFeature64_CreateCheck_MaxRectField 0xFF\r
\r
- __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u clsWidth, Ncv32u clsHeight)\r
+ __host__ NCVStatus setRect(Ncv32u rectX, Ncv32u rectY, Ncv32u rectWidth, Ncv32u rectHeight, Ncv32u /*clsWidth*/, Ncv32u /*clsHeight*/)\r
{\r
ncvAssertReturn(rectWidth <= HaarFeature64_CreateCheck_MaxRectField && rectHeight <= HaarFeature64_CreateCheck_MaxRectField, NCV_HAAR_TOO_LARGE_FEATURES);\r
- ((NcvRect8u*)&(this->_ui2.x))->x = rectX;\r
- ((NcvRect8u*)&(this->_ui2.x))->y = rectY;\r
- ((NcvRect8u*)&(this->_ui2.x))->width = rectWidth;\r
- ((NcvRect8u*)&(this->_ui2.x))->height = rectHeight;\r
+ ((NcvRect8u*)&(this->_ui2.x))->x = (Ncv8u)rectX;\r
+ ((NcvRect8u*)&(this->_ui2.x))->y = (Ncv8u)rectY;\r
+ ((NcvRect8u*)&(this->_ui2.x))->width = (Ncv8u)rectWidth;\r
+ ((NcvRect8u*)&(this->_ui2.x))->height = (Ncv8u)rectHeight;\r
return NCV_SUCCESS;\r
}\r
\r
};\r
\r
\r
-NPPST_CT_ASSERT(sizeof(HaarFeature64) == 8);\r
-NPPST_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);\r
-NPPST_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);\r
-NPPST_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);\r
-NPPST_CT_ASSERT(sizeof(HaarStage64) == 8);\r
+NCV_CT_ASSERT(sizeof(HaarFeature64) == 8);\r
+NCV_CT_ASSERT(sizeof(HaarFeatureDescriptor32) == 4);\r
+NCV_CT_ASSERT(sizeof(HaarClassifierNodeDescriptor32) == 4);\r
+NCV_CT_ASSERT(sizeof(HaarClassifierNode128) == 16);\r
+NCV_CT_ASSERT(sizeof(HaarStage64) == 8);\r
\r
\r
//==============================================================================\r
NCVPipeObjDet_VisualizeInPlace = 0x004,\r
};\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvDetectObjectsMultiScale_device(NCVMatrix<Ncv8u> &d_srcImg,\r
NcvSize32u srcRoi,\r
NCVVector<NcvRect32u> &d_dstRects,\r
\r
INCVMemAllocator &gpuAllocator,\r
INCVMemAllocator &cpuAllocator,\r
- Ncv32u devPropMajor,\r
- Ncv32u devPropMinor,\r
+ cudaDeviceProp &devProp,\r
cudaStream_t cuStream);\r
\r
\r
#define OBJDET_MASK_ELEMENT_INVALID_32U 0xFFFFFFFF\r
#define HAAR_STDDEV_BORDER 1\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvApplyHaarClassifierCascade_device(NCVMatrix<Ncv32u> &d_integralImage,\r
NCVMatrix<Ncv32f> &d_weights,\r
NCVMatrixAlloc<Ncv32u> &d_pixelMask,\r
Ncv32f scaleArea,\r
INCVMemAllocator &gpuAllocator,\r
INCVMemAllocator &cpuAllocator,\r
- Ncv32u devPropMajor,\r
- Ncv32u devPropMinor,\r
+ cudaDeviceProp &devProp,\r
cudaStream_t cuStream);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvApplyHaarClassifierCascade_host(NCVMatrix<Ncv32u> &h_integralImage,\r
NCVMatrix<Ncv32f> &h_weights,\r
NCVMatrixAlloc<Ncv32u> &h_pixelMask,\r
Ncv32u pixelStep,\r
Ncv32f scaleArea);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvDrawRects_8u_device(Ncv8u *d_dst,\r
Ncv32u dstStride,\r
Ncv32u dstWidth,\r
Ncv8u color,\r
cudaStream_t cuStream);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvDrawRects_32u_device(Ncv32u *d_dst,\r
Ncv32u dstStride,\r
Ncv32u dstWidth,\r
Ncv32u color,\r
cudaStream_t cuStream);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvDrawRects_8u_host(Ncv8u *h_dst,\r
Ncv32u dstStride,\r
Ncv32u dstWidth,\r
Ncv32u numRects,\r
Ncv8u color);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvDrawRects_32u_host(Ncv32u *h_dst,\r
Ncv32u dstStride,\r
Ncv32u dstWidth,\r
\r
#define RECT_SIMILARITY_PROPORTION 0.2f\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvGrowDetectionsVector_device(NCVVector<Ncv32u> &pixelMask,\r
Ncv32u numPixelMaskDetections,\r
NCVVector<NcvRect32u> &hypotheses,\r
Ncv32f curScale,\r
cudaStream_t cuStream);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvGrowDetectionsVector_host(NCVVector<Ncv32u> &pixelMask,\r
Ncv32u numPixelMaskDetections,\r
NCVVector<NcvRect32u> &hypotheses,\r
Ncv32u rectHeight,\r
Ncv32f curScale);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvFilterHypotheses_host(NCVVector<NcvRect32u> &hypotheses,\r
Ncv32u &numHypotheses,\r
Ncv32u minNeighbors,\r
Ncv32f intersectEps,\r
NCVVector<Ncv32u> *hypothesesWeights);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvHaarGetClassifierSize(const std::string &filename, Ncv32u &numStages,\r
Ncv32u &numNodes, Ncv32u &numFeatures);\r
\r
-\r
+NCV_EXPORTS\r
NCVStatus ncvHaarLoadFromFile_host(const std::string &filename,\r
HaarClassifierCascadeDescriptor &haar,\r
NCVVector<HaarStage64> &h_HaarStages,\r
NCVVector<HaarFeature64> &h_HaarFeatures);\r
\r
\r
+NCV_EXPORTS\r
NCVStatus ncvHaarStoreNVBIN_host(const std::string &filename,\r
HaarClassifierCascadeDescriptor haar,\r
NCVVector<HaarStage64> &h_HaarStages,\r
+++ /dev/null
-////////////////////////////////////////////////////////////////////////////////\r
-// The Loki Library\r
-// Copyright (c) 2001 by Andrei Alexandrescu\r
-// This code accompanies the book:\r
-// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design \r
-// Patterns Applied". Copyright (c) 2001. Addison-Wesley.\r
-// Permission to use, copy, modify, distribute and sell this software for any \r
-// purpose is hereby granted without fee, provided that the above copyright \r
-// notice appear in all copies and that both that copyright notice and this \r
-// permission notice appear in supporting documentation.\r
-// The author or Addison-Welsey Longman make no representations about the \r
-// suitability of this software for any purpose. It is provided "as is" \r
-// without express or implied warranty.\r
-// http://loki-lib.sourceforge.net/index.php?n=Main.License\r
-////////////////////////////////////////////////////////////////////////////////\r
-\r
-#ifndef _ncvruntimetemplates_hpp_\r
-#define _ncvruntimetemplates_hpp_\r
-\r
-#include <stdarg.h>\r
-#include <vector>\r
-\r
-\r
-namespace Loki\r
-{\r
- //==============================================================================\r
- // class NullType\r
- // Used as a placeholder for "no type here"\r
- // Useful as an end marker in typelists \r
- //==============================================================================\r
-\r
- class NullType {};\r
-\r
- //==============================================================================\r
- // class template Typelist\r
- // The building block of typelists of any length\r
- // Use it through the LOKI_TYPELIST_NN macros\r
- // Defines nested types:\r
- // Head (first element, a non-typelist type by convention)\r
- // Tail (second element, can be another typelist)\r
- //==============================================================================\r
-\r
- template <class T, class U>\r
- struct Typelist\r
- {\r
- typedef T Head;\r
- typedef U Tail;\r
- };\r
-\r
- //==============================================================================\r
- // class template Int2Type\r
- // Converts each integral constant into a unique type\r
- // Invocation: Int2Type<v> where v is a compile-time constant integral\r
- // Defines 'value', an enum that evaluates to v\r
- //==============================================================================\r
-\r
- template <int v>\r
- struct Int2Type\r
- {\r
- enum { value = v };\r
- };\r
-\r
- namespace TL\r
- {\r
- //==============================================================================\r
- // class template TypeAt\r
- // Finds the type at a given index in a typelist\r
- // Invocation (TList is a typelist and index is a compile-time integral \r
- // constant):\r
- // TypeAt<TList, index>::Result\r
- // returns the type in position 'index' in TList\r
- // If you pass an out-of-bounds index, the result is a compile-time error\r
- //==============================================================================\r
-\r
- template <class TList, unsigned int index> struct TypeAt;\r
-\r
- template <class Head, class Tail>\r
- struct TypeAt<Typelist<Head, Tail>, 0>\r
- {\r
- typedef Head Result;\r
- };\r
-\r
- template <class Head, class Tail, unsigned int i>\r
- struct TypeAt<Typelist<Head, Tail>, i>\r
- {\r
- typedef typename TypeAt<Tail, i - 1>::Result Result;\r
- };\r
- }\r
-}\r
-\r
-\r
-////////////////////////////////////////////////////////////////////////////////\r
-// Runtime boolean template instance dispatcher\r
-// Cyril Crassin <cyril.crassin@icare3d.org>\r
-// NVIDIA, 2010\r
-////////////////////////////////////////////////////////////////////////////////\r
-\r
-namespace NCVRuntimeTemplateBool\r
-{\r
- //This struct is used to transform a list of parameters into template arguments\r
- //The idea is to build a typelist containing the arguments\r
- //and to pass this typelist to a user defined functor\r
- template<typename TList, int NumArguments, class Func>\r
- struct KernelCaller\r
- {\r
- //Convenience function used by the user\r
- //Takes a variable argument list, transforms it into a list\r
- static void call(Func &functor, int dummy, ...)\r
- {\r
- //Vector used to collect arguments\r
- std::vector<int> templateParamList;\r
-\r
- //Variable argument list manipulation\r
- va_list listPointer;\r
- va_start(listPointer, dummy);\r
- //Collect parameters into the list\r
- for(int i=0; i<NumArguments; i++)\r
- {\r
- int val = va_arg(listPointer, int);\r
- templateParamList.push_back(val);\r
- }\r
- va_end(listPointer);\r
-\r
- //Call the actual typelist building function\r
- call(functor, templateParamList);\r
- }\r
-\r
- //Actual function called recursively to build a typelist based\r
- //on a list of values\r
- static void call( Func &functor, std::vector<int> &templateParamList)\r
- {\r
- //Get current parameter value in the list\r
- int val = templateParamList[templateParamList.size() - 1];\r
- templateParamList.pop_back();\r
-\r
- //Select the compile time value to add into the typelist\r
- //depending on the runtime variable and make recursive call. \r
- //Both versions are really instantiated\r
- if(val)\r
- {\r
- KernelCaller<\r
- Loki::Typelist<typename Loki::Int2Type<true>, TList >,\r
- NumArguments-1, Func >\r
- ::call(functor, templateParamList);\r
- }\r
- else\r
- {\r
- KernelCaller< \r
- Loki::Typelist<typename Loki::Int2Type<false>, TList >,\r
- NumArguments-1, Func >\r
- ::call(functor, templateParamList);\r
- }\r
- }\r
- };\r
-\r
- //Specialization for 0 value left in the list\r
- //-> actual kernel functor call\r
- template<class TList, class Func>\r
- struct KernelCaller<TList, 0, Func>\r
- {\r
- static void call(Func &functor)\r
- {\r
- //Call to the functor's kernel call method\r
- functor.call(TList()); //TList instantiated to get the method template parameter resolved\r
- }\r
-\r
- static void call(Func &functor, std::vector<int> &templateParams)\r
- {\r
- functor.call(TList());\r
- }\r
- };\r
-}\r
-\r
-#endif //_ncvruntimetemplates_hpp_\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
+// \r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+\r
+#include <vector>\r
+#include <cuda_runtime.h>\r
+#include "NPP_staging.hpp"\r
+\r
+#if defined _SELF_TEST_\r
+#include <stdio.h>\r
+#endif\r
+\r
+\r
+texture<Ncv8u, 1, cudaReadModeElementType> tex8u;\r
+texture<Ncv32u, 1, cudaReadModeElementType> tex32u;\r
+texture<uint2, 1, cudaReadModeElementType> tex64u;\r
+\r
+\r
+//==============================================================================\r
+//\r
+// CUDA streams handling\r
+//\r
+//==============================================================================\r
+\r
+\r
+static cudaStream_t nppStream = 0;\r
+\r
+\r
+cudaStream_t nppStGetActiveCUDAstream(void)\r
+{\r
+ return nppStream;\r
+}\r
+\r
+\r
+\r
+cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream)\r
+{\r
+ cudaStream_t tmp = nppStream;\r
+ nppStream = cudaStream;\r
+ return tmp;\r
+}\r
+\r
+\r
+//==============================================================================\r
+//\r
+// BlockScan.cuh\r
+//\r
+//==============================================================================\r
+\r
+\r
+//Almost the same as naive scan1Inclusive, but doesn't need __syncthreads()\r
+//assuming size <= WARP_SIZE and size is power of 2\r
+template <class T>\r
+inline __device__ T warpScanInclusive(T idata, volatile T *s_Data)\r
+{\r
+ Ncv32u pos = 2 * threadIdx.x - (threadIdx.x & (K_WARP_SIZE - 1));\r
+ s_Data[pos] = 0;\r
+ pos += K_WARP_SIZE;\r
+ s_Data[pos] = idata;\r
+\r
+ for(Ncv32u offset = 1; offset < K_WARP_SIZE; offset <<= 1)\r
+ {\r
+ s_Data[pos] += s_Data[pos - offset];\r
+ }\r
+\r
+ return s_Data[pos];\r
+}\r
+\r
+\r
+template <class T>\r
+inline __device__ T warpScanExclusive(T idata, volatile T *s_Data)\r
+{\r
+ return warpScanInclusive(idata, s_Data) - idata;\r
+}\r
+\r
+\r
+template <class T, Ncv32u tiNumScanThreads>\r
+inline __device__ T blockScanInclusive(T idata, volatile T *s_Data)\r
+{\r
+ if (tiNumScanThreads > K_WARP_SIZE)\r
+ {\r
+ //Bottom-level inclusive warp scan\r
+ T warpResult = warpScanInclusive(idata, s_Data);\r
+\r
+ //Save top elements of each warp for exclusive warp scan\r
+ //sync to wait for warp scans to complete (because s_Data is being overwritten)\r
+ __syncthreads();\r
+ if( (threadIdx.x & (K_WARP_SIZE - 1)) == (K_WARP_SIZE - 1) )\r
+ {\r
+ s_Data[threadIdx.x >> K_LOG2_WARP_SIZE] = warpResult;\r
+ }\r
+\r
+ //wait for warp scans to complete\r
+ __syncthreads();\r
+\r
+ if( threadIdx.x < (tiNumScanThreads / K_WARP_SIZE) )\r
+ {\r
+ //grab top warp elements\r
+ T val = s_Data[threadIdx.x];\r
+ //calculate exclusive scan and write back to shared memory\r
+ s_Data[threadIdx.x] = warpScanExclusive(val, s_Data);\r
+ }\r
+\r
+ //return updated warp scans with exclusive scan results\r
+ __syncthreads();\r
+ return warpResult + s_Data[threadIdx.x >> K_LOG2_WARP_SIZE];\r
+ }\r
+ else\r
+ {\r
+ return warpScanInclusive(idata, s_Data);\r
+ }\r
+}\r
+\r
+\r
+//==============================================================================\r
+//\r
+// IntegralImage.cu\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u NUM_SCAN_THREADS = 256;\r
+const Ncv32u LOG2_NUM_SCAN_THREADS = 8;\r
+\r
+\r
+template<class T_in, class T_out>\r
+struct _scanElemOp\r
+{\r
+ template<bool tbDoSqr>\r
+ static inline __host__ __device__ T_out scanElemOp(T_in elem);\r
+\r
+ template<>\r
+ static inline __host__ __device__ T_out scanElemOp<false>(T_in elem)\r
+ {\r
+ return (T_out)elem;\r
+ }\r
+\r
+ template<>\r
+ static inline __host__ __device__ T_out scanElemOp<true>(T_in elem)\r
+ {\r
+ return (T_out)(elem*elem);\r
+ }\r
+};\r
+\r
+\r
+template<class T>\r
+inline __device__ T readElem(T *d_src, Ncv32u srcStride, Ncv32u curElemOffs);\r
+\r
+\r
+template<>\r
+inline __device__ Ncv8u readElem<Ncv8u>(Ncv8u *d_src, Ncv32u srcStride, Ncv32u curElemOffs)\r
+{\r
+ return tex1Dfetch(tex8u, srcStride * blockIdx.x + curElemOffs);\r
+}\r
+\r
+\r
+template<>\r
+inline __device__ Ncv32u readElem<Ncv32u>(Ncv32u *d_src, Ncv32u srcStride, Ncv32u curElemOffs)\r
+{\r
+ return d_src[curElemOffs];\r
+}\r
+\r
+\r
+template<>\r
+inline __device__ Ncv32f readElem<Ncv32f>(Ncv32f *d_src, Ncv32u srcStride, Ncv32u curElemOffs)\r
+{\r
+ return d_src[curElemOffs];\r
+}\r
+\r
+\r
+/**\r
+* \brief Segmented scan kernel\r
+*\r
+* Calculates per-row prefix scans of the input image.\r
+* Out-of-bounds safe: reads 'size' elements, writes 'size+1' elements\r
+*\r
+* \tparam T_in Type of input image elements\r
+* \tparam T_out Type of output image elements\r
+* \tparam T_op Defines an operation to be performed on the input image pixels\r
+*\r
+* \param d_src [IN] Source image pointer\r
+* \param srcWidth [IN] Source image width\r
+* \param srcStride [IN] Source image stride\r
+* \param d_II [OUT] Output image pointer\r
+* \param IIstride [IN] Output image stride\r
+*\r
+* \return None\r
+*/\r
+template <class T_in, class T_out, bool tbDoSqr>\r
+__global__ void scanRows(T_in *d_src, Ncv32u srcWidth, Ncv32u srcStride,\r
+ T_out *d_II, Ncv32u IIstride)\r
+{\r
+ //advance pointers to the current line\r
+ if (sizeof(T_in) != 1)\r
+ {\r
+ d_src += srcStride * blockIdx.x;\r
+ }\r
+ //for initial image 8bit source we use texref tex8u\r
+ d_II += IIstride * blockIdx.x;\r
+\r
+ Ncv32u numBuckets = (srcWidth + NUM_SCAN_THREADS - 1) >> LOG2_NUM_SCAN_THREADS;\r
+ Ncv32u offsetX = 0;\r
+\r
+ __shared__ T_out shmem[NUM_SCAN_THREADS * 2];\r
+ __shared__ T_out carryElem;\r
+ carryElem = 0;\r
+ __syncthreads();\r
+\r
+ while (numBuckets--)\r
+ {\r
+ Ncv32u curElemOffs = offsetX + threadIdx.x;\r
+ T_out curScanElem;\r
+\r
+ T_in curElem;\r
+ T_out curElemMod;\r
+\r
+ if (curElemOffs < srcWidth)\r
+ {\r
+ //load elements\r
+ curElem = readElem<T_in>(d_src, srcStride, curElemOffs);\r
+ }\r
+ curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr>(curElem);\r
+\r
+ //inclusive scan\r
+ curScanElem = blockScanInclusive<T_out, NUM_SCAN_THREADS>(curElemMod, shmem);\r
+\r
+ if (curElemOffs <= srcWidth)\r
+ {\r
+ //make scan exclusive and write the bucket to the output buffer\r
+ d_II[curElemOffs] = carryElem + curScanElem - curElemMod;\r
+ offsetX += NUM_SCAN_THREADS;\r
+ }\r
+\r
+ //remember last element for subsequent buckets adjustment\r
+ __syncthreads();\r
+ if (threadIdx.x == NUM_SCAN_THREADS-1)\r
+ {\r
+ carryElem += curScanElem;\r
+ }\r
+ __syncthreads();\r
+ }\r
+\r
+ if (offsetX == srcWidth && !threadIdx.x)\r
+ {\r
+ d_II[offsetX] = carryElem;\r
+ }\r
+}\r
+\r
+\r
+template <bool tbDoSqr, class T_in, class T_out>\r
+NCVStatus scanRowsWrapperDevice(T_in *d_src, Ncv32u srcStride,\r
+ T_out *d_dst, Ncv32u dstStride, NcvSize32u roi)\r
+{\r
+ cudaChannelFormatDesc cfdTex;\r
+ if (sizeof(T_in) == 1)\r
+ {\r
+ cfdTex = cudaCreateChannelDesc<Ncv8u>();\r
+ size_t alignmentOffset;\r
+ ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex8u, d_src, cfdTex, roi.height * srcStride), NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);\r
+ }\r
+ scanRows\r
+ <T_in, T_out, tbDoSqr>\r
+ <<<roi.height, NUM_SCAN_THREADS, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, roi.width, srcStride, d_dst, dstStride);\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+#if defined _SELF_TEST_\r
+ T_in *h_src;\r
+ T_out *h_dst;\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStride * roi.height * sizeof(T_in)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStride * roi.height * sizeof(T_out)), NPPST_MEM_ALLOC_ERR);\r
+ memset(h_src, 0, srcStride * roi.height * sizeof(T_in));\r
+ memset(h_dst, 0, dstStride * roi.height * sizeof(T_out));\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStride * roi.height * sizeof(T_in), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStride * roi.height * sizeof(T_out), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ NcvBool bPass = true;\r
+ for (Ncv32u i=0; i<roi.height && bPass; i++)\r
+ {\r
+ T_out curElem = 0;\r
+ for (Ncv32u j=0; j<roi.width+1 && bPass; j++)\r
+ {\r
+ if (curElem != h_dst[i * dstStride + j])\r
+ {\r
+ printf("CIntegralImage::scanRowsWrapperDevice self test failed: i=%d, j=%d, cpu=%d, gpu=%d\n", i, j, curElem, h_dst[i * dstStride + j]);\r
+ bPass = false;\r
+ }\r
+ if (j < roi.width)\r
+ {\r
+ curElem += scanElemOp<T_op>(h_src[i*srcStride+j]);\r
+ }\r
+ }\r
+ }\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);\r
+ printf("CIntegralImage::scanRowsWrapperDevice %s\n", bPass?"PASSED":"FAILED");\r
+#endif\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+Ncv32u getPaddedDimension(Ncv32u dim, Ncv32u elemTypeSize, Ncv32u allocatorAlignment)\r
+{\r
+ Ncv32u alignMask = allocatorAlignment-1;\r
+ Ncv32u inverseAlignMask = ~alignMask;\r
+ Ncv32u dimBytes = dim * elemTypeSize;\r
+ Ncv32u pitch = (dimBytes + alignMask) & inverseAlignMask;\r
+ Ncv32u PaddedDim = pitch / elemTypeSize;\r
+ return PaddedDim;\r
+}\r
+\r
+\r
+template <class T_in, class T_out>\r
+NCVStatus ncvIntegralImage_device(T_in *d_src, Ncv32u srcStep,\r
+ T_out *d_dst, Ncv32u dstStep, NcvSize32u roi,\r
+ INCVMemAllocator &gpuAllocator)\r
+{\r
+ ncvAssertReturn(sizeof(T_out) == sizeof(Ncv32u), NPPST_MEM_INTERNAL_ERROR);\r
+ ncvAssertReturn(gpuAllocator.memType() == NCVMemoryTypeDevice ||\r
+ gpuAllocator.memType() == NCVMemoryTypeNone, NPPST_MEM_RESIDENCE_ERROR);\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+ ncvAssertReturn((d_src != NULL && d_dst != NULL) || gpuAllocator.isCounting(), NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roi.width > 0 && roi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStep >= roi.width * sizeof(T_in) &&\r
+ dstStep >= (roi.width + 1) * sizeof(T_out) &&\r
+ srcStep % sizeof(T_in) == 0 &&\r
+ dstStep % sizeof(T_out) == 0, NPPST_INVALID_STEP);\r
+ srcStep /= sizeof(T_in);\r
+ dstStep /= sizeof(T_out);\r
+\r
+ Ncv32u WidthII = roi.width + 1;\r
+ Ncv32u HeightII = roi.height + 1;\r
+ Ncv32u PaddedWidthII32 = getPaddedDimension(WidthII, sizeof(Ncv32u), gpuAllocator.alignment());\r
+ Ncv32u PaddedHeightII32 = getPaddedDimension(HeightII, sizeof(Ncv32u), gpuAllocator.alignment());\r
+\r
+ NCVMatrixAlloc<T_out> Tmp32_1(gpuAllocator, PaddedWidthII32, PaddedHeightII32);\r
+ ncvAssertReturn(gpuAllocator.isCounting() || Tmp32_1.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+ NCVMatrixAlloc<T_out> Tmp32_2(gpuAllocator, PaddedHeightII32, PaddedWidthII32);\r
+ ncvAssertReturn(gpuAllocator.isCounting() || Tmp32_2.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+ ncvAssertReturn(Tmp32_1.pitch() * Tmp32_1.height() == Tmp32_2.pitch() * Tmp32_2.height(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat;\r
+ NCV_SET_SKIP_COND(gpuAllocator.isCounting());\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvStat = scanRowsWrapperDevice\r
+ <false>\r
+ (d_src, srcStep, Tmp32_1.ptr(), PaddedWidthII32, roi);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)Tmp32_1.ptr(), PaddedWidthII32*sizeof(Ncv32u),\r
+ (Ncv32u *)Tmp32_2.ptr(), PaddedHeightII32*sizeof(Ncv32u), NcvSize32u(WidthII, roi.height));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = scanRowsWrapperDevice\r
+ <false>\r
+ (Tmp32_2.ptr(), PaddedHeightII32, Tmp32_1.ptr(), PaddedHeightII32, NcvSize32u(roi.height, WidthII));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)Tmp32_1.ptr(), PaddedHeightII32*sizeof(Ncv32u),\r
+ (Ncv32u *)d_dst, dstStep*sizeof(Ncv32u), NcvSize32u(HeightII, WidthII));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus ncvSquaredIntegralImage_device(Ncv8u *d_src, Ncv32u srcStep,\r
+ Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roi,\r
+ INCVMemAllocator &gpuAllocator)\r
+{\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+ ncvAssertReturn(gpuAllocator.memType() == NCVMemoryTypeDevice ||\r
+ gpuAllocator.memType() == NCVMemoryTypeNone, NPPST_MEM_RESIDENCE_ERROR);\r
+ ncvAssertReturn((d_src != NULL && d_dst != NULL) || gpuAllocator.isCounting(), NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roi.width > 0 && roi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStep >= roi.width &&\r
+ dstStep >= (roi.width + 1) * sizeof(Ncv64u) &&\r
+ dstStep % sizeof(Ncv64u) == 0, NPPST_INVALID_STEP);\r
+ dstStep /= sizeof(Ncv64u);\r
+\r
+ Ncv32u WidthII = roi.width + 1;\r
+ Ncv32u HeightII = roi.height + 1;\r
+ Ncv32u PaddedWidthII32 = getPaddedDimension(WidthII, sizeof(Ncv32u), gpuAllocator.alignment());\r
+ Ncv32u PaddedHeightII32 = getPaddedDimension(HeightII, sizeof(Ncv32u), gpuAllocator.alignment());\r
+ Ncv32u PaddedWidthII64 = getPaddedDimension(WidthII, sizeof(Ncv64u), gpuAllocator.alignment());\r
+ Ncv32u PaddedHeightII64 = getPaddedDimension(HeightII, sizeof(Ncv64u), gpuAllocator.alignment());\r
+ Ncv32u PaddedWidthMax = PaddedWidthII32 > PaddedWidthII64 ? PaddedWidthII32 : PaddedWidthII64;\r
+ Ncv32u PaddedHeightMax = PaddedHeightII32 > PaddedHeightII64 ? PaddedHeightII32 : PaddedHeightII64;\r
+\r
+ NCVMatrixAlloc<Ncv32u> Tmp32_1(gpuAllocator, PaddedWidthII32, PaddedHeightII32);\r
+ ncvAssertReturn(Tmp32_1.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+ NCVMatrixAlloc<Ncv64u> Tmp64(gpuAllocator, PaddedWidthMax, PaddedHeightMax);\r
+ ncvAssertReturn(Tmp64.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVMatrixReuse<Ncv32u> Tmp32_2(Tmp64.getSegment(), gpuAllocator.alignment(), PaddedWidthII32, PaddedHeightII32);\r
+ ncvAssertReturn(Tmp32_2.isMemReused(), NPPST_MEM_INTERNAL_ERROR);\r
+ NCVMatrixReuse<Ncv64u> Tmp64_2(Tmp64.getSegment(), gpuAllocator.alignment(), PaddedWidthII64, PaddedHeightII64);\r
+ ncvAssertReturn(Tmp64_2.isMemReused(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat;\r
+ NCV_SET_SKIP_COND(gpuAllocator.isCounting());\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvStat = scanRowsWrapperDevice\r
+ <true, Ncv8u, Ncv32u>\r
+ (d_src, srcStep, Tmp32_2.ptr(), PaddedWidthII32, roi);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = nppiStTranspose_32u_C1R(Tmp32_2.ptr(), PaddedWidthII32*sizeof(Ncv32u),\r
+ Tmp32_1.ptr(), PaddedHeightII32*sizeof(Ncv32u), NcvSize32u(WidthII, roi.height));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = scanRowsWrapperDevice\r
+ <false, Ncv32u, Ncv64u>\r
+ (Tmp32_1.ptr(), PaddedHeightII32, Tmp64_2.ptr(), PaddedHeightII64, NcvSize32u(roi.height, WidthII));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ ncvStat = nppiStTranspose_64u_C1R(Tmp64_2.ptr(), PaddedHeightII64*sizeof(Ncv64u),\r
+ d_dst, dstStep*sizeof(Ncv64u), NcvSize32u(HeightII, WidthII));\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+\r
+ NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
+ ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvIntegralImage_device((Ncv8u*)NULL, roiSize.width,\r
+ (Ncv32u*)NULL, (roiSize.width+1) * sizeof(Ncv32u),\r
+ roiSize, gpuCounter);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ *pBufsize = (Ncv32u)gpuCounter.maxSize();\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+\r
+ NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
+ ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvIntegralImage_device((Ncv32f*)NULL, roiSize.width * sizeof(Ncv32f),\r
+ (Ncv32f*)NULL, (roiSize.width+1) * sizeof(Ncv32f),\r
+ roiSize, gpuCounter);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ *pBufsize = (Ncv32u)gpuCounter.maxSize();\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+\r
+ NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
+ ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvSquaredIntegralImage_device(NULL, roiSize.width,\r
+ NULL, (roiSize.width+1) * sizeof(Ncv64u),\r
+ roiSize, gpuCounter);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ *pBufsize = (Ncv32u)gpuCounter.maxSize();\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,\r
+ Ncv32u *d_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer);\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,\r
+ Ncv32f *d_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer);\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,\r
+ Ncv64u *d_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer);\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = ncvSquaredIntegralImage_device(d_src, srcStep, d_dst, dstStep, roiSize, gpuAllocator);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,\r
+ Ncv32u *h_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStep >= roiSize.width &&\r
+ dstStep >= (roiSize.width + 1) * sizeof(Ncv32u) &&\r
+ dstStep % sizeof(Ncv32u) == 0, NPPST_INVALID_STEP);\r
+ dstStep /= sizeof(Ncv32u);\r
+\r
+ Ncv32u WidthII = roiSize.width + 1;\r
+ Ncv32u HeightII = roiSize.height + 1;\r
+\r
+ memset(h_dst, 0, WidthII * sizeof(Ncv32u));\r
+ for (Ncv32u i=1; i<HeightII; i++)\r
+ {\r
+ h_dst[i * dstStep] = 0;\r
+ for (Ncv32u j=1; j<WidthII; j++)\r
+ {\r
+ Ncv32u top = h_dst[(i-1) * dstStep + j];\r
+ Ncv32u left = h_dst[i * dstStep + (j - 1)];\r
+ Ncv32u topleft = h_dst[(i - 1) * dstStep + (j - 1)];\r
+ Ncv32u elem = h_src[(i - 1) * srcStep + (j - 1)];\r
+ h_dst[i * dstStep + j] = elem + left - topleft + top;\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,\r
+ Ncv32f *h_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStep >= roiSize.width * sizeof(Ncv32f) &&\r
+ dstStep >= (roiSize.width + 1) * sizeof(Ncv32f) &&\r
+ srcStep % sizeof(Ncv32f) == 0 &&\r
+ dstStep % sizeof(Ncv32f) == 0, NPPST_INVALID_STEP);\r
+ srcStep /= sizeof(Ncv32f);\r
+ dstStep /= sizeof(Ncv32f);\r
+\r
+ Ncv32u WidthII = roiSize.width + 1;\r
+ Ncv32u HeightII = roiSize.height + 1;\r
+\r
+ memset(h_dst, 0, WidthII * sizeof(Ncv32u));\r
+ for (Ncv32u i=1; i<HeightII; i++)\r
+ {\r
+ h_dst[i * dstStep] = 0.0f;\r
+ for (Ncv32u j=1; j<WidthII; j++)\r
+ {\r
+ Ncv32f top = h_dst[(i-1) * dstStep + j];\r
+ Ncv32f left = h_dst[i * dstStep + (j - 1)];\r
+ Ncv32f topleft = h_dst[(i - 1) * dstStep + (j - 1)];\r
+ Ncv32f elem = h_src[(i - 1) * srcStep + (j - 1)];\r
+ h_dst[i * dstStep + j] = elem + left - topleft + top;\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,\r
+ Ncv64u *h_dst, Ncv32u dstStep,\r
+ NcvSize32u roiSize)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roiSize.width > 0 && roiSize.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStep >= roiSize.width &&\r
+ dstStep >= (roiSize.width + 1) * sizeof(Ncv64u) &&\r
+ dstStep % sizeof(Ncv64u) == 0, NPPST_INVALID_STEP);\r
+ dstStep /= sizeof(Ncv64u);\r
+\r
+ Ncv32u WidthII = roiSize.width + 1;\r
+ Ncv32u HeightII = roiSize.height + 1;\r
+\r
+ memset(h_dst, 0, WidthII * sizeof(Ncv64u));\r
+ for (Ncv32u i=1; i<HeightII; i++)\r
+ {\r
+ h_dst[i * dstStep] = 0;\r
+ for (Ncv32u j=1; j<WidthII; j++)\r
+ {\r
+ Ncv64u top = h_dst[(i-1) * dstStep + j];\r
+ Ncv64u left = h_dst[i * dstStep + (j - 1)];\r
+ Ncv64u topleft = h_dst[(i - 1) * dstStep + (j - 1)];\r
+ Ncv64u elem = h_src[(i - 1) * srcStep + (j - 1)];\r
+ h_dst[i * dstStep + j] = elem*elem + left - topleft + top;\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+//==============================================================================\r
+//\r
+// DownsampleNearest.cu\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u NUM_DOWNSAMPLE_NEAREST_THREADS_X = 32;\r
+const Ncv32u NUM_DOWNSAMPLE_NEAREST_THREADS_Y = 8;\r
+\r
+\r
+template<class T, NcvBool tbCacheTexture>\r
+__device__ T getElem_DownsampleNearest(Ncv32u x, T *d_src);\r
+\r
+\r
+template<>\r
+__device__ Ncv32u getElem_DownsampleNearest<Ncv32u, true>(Ncv32u x, Ncv32u *d_src)\r
+{\r
+ return tex1Dfetch(tex32u, x);\r
+}\r
+\r
+\r
+template<>\r
+__device__ Ncv32u getElem_DownsampleNearest<Ncv32u, false>(Ncv32u x, Ncv32u *d_src)\r
+{\r
+ return d_src[x];\r
+}\r
+\r
+\r
+template<>\r
+__device__ Ncv64u getElem_DownsampleNearest<Ncv64u, true>(Ncv32u x, Ncv64u *d_src)\r
+{\r
+ uint2 tmp = tex1Dfetch(tex64u, x);\r
+ Ncv64u res = (Ncv64u)tmp.y;\r
+ res <<= 32;\r
+ res |= tmp.x;\r
+ return res;\r
+}\r
+\r
+\r
+template<>\r
+__device__ Ncv64u getElem_DownsampleNearest<Ncv64u, false>(Ncv32u x, Ncv64u *d_src)\r
+{\r
+ return d_src[x];\r
+}\r
+\r
+\r
+template <class T, NcvBool tbCacheTexture>\r
+__global__ void downsampleNearest_C1R(T *d_src, Ncv32u srcStep, T *d_dst, Ncv32u dstStep,\r
+ NcvSize32u dstRoi, Ncv32u scale)\r
+{\r
+ int curX = blockIdx.x * blockDim.x + threadIdx.x;\r
+ int curY = blockIdx.y * blockDim.y + threadIdx.y;\r
+\r
+ if (curX >= dstRoi.width || curY >= dstRoi.height)\r
+ {\r
+ return;\r
+ }\r
+\r
+ d_dst[curY * dstStep + curX] = getElem_DownsampleNearest<T, tbCacheTexture>((curY * srcStep + curX) * scale, d_src);\r
+}\r
+\r
+\r
+template <class T>\r
+static NCVStatus downsampleNearestWrapperDevice(T *d_src, Ncv32u srcStep,\r
+ T *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture)\r
+{\r
+ ncvAssertReturn(d_src != NULL && d_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(srcRoi.width > 0 && srcRoi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(scale != 0, NPPST_INVALID_SCALE);\r
+ ncvAssertReturn(srcStep >= (Ncv32u)(srcRoi.width) * sizeof(T) &&\r
+ dstStep >= (Ncv32u)(srcRoi.width * sizeof(T) / scale), NPPST_INVALID_STEP);\r
+ srcStep /= sizeof(T);\r
+ dstStep /= sizeof(T);\r
+\r
+ NcvSize32u dstRoi;\r
+ dstRoi.width = srcRoi.width / scale;\r
+ dstRoi.height = srcRoi.height / scale;\r
+\r
+ dim3 grid((dstRoi.width + NUM_DOWNSAMPLE_NEAREST_THREADS_X - 1) / NUM_DOWNSAMPLE_NEAREST_THREADS_X,\r
+ (dstRoi.height + NUM_DOWNSAMPLE_NEAREST_THREADS_Y - 1) / NUM_DOWNSAMPLE_NEAREST_THREADS_Y);\r
+ dim3 block(NUM_DOWNSAMPLE_NEAREST_THREADS_X, NUM_DOWNSAMPLE_NEAREST_THREADS_Y);\r
+\r
+ if (!readThruTexture)\r
+ {\r
+ downsampleNearest_C1R\r
+ <T, false>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcStep, d_dst, dstStep, dstRoi, scale);\r
+ }\r
+ else\r
+ {\r
+ cudaChannelFormatDesc cfdTexSrc;\r
+\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ cfdTexSrc = cudaCreateChannelDesc<Ncv32u>();\r
+\r
+ size_t alignmentOffset;\r
+ ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex32u, d_src, cfdTexSrc, srcRoi.height * srcStep * sizeof(T)), NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);\r
+ }\r
+ else\r
+ {\r
+ cfdTexSrc = cudaCreateChannelDesc<uint2>();\r
+\r
+ size_t alignmentOffset;\r
+ ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex64u, d_src, cfdTexSrc, srcRoi.height * srcStep * sizeof(T)), NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);\r
+ }\r
+\r
+ downsampleNearest_C1R\r
+ <T, true>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcStep, d_dst, dstStep, dstRoi, scale);\r
+ }\r
+\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+#if defined _SELF_TEST_\r
+ T *h_src;\r
+ T *h_dst;\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStep * srcRoi.height * sizeof(T)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStep * dstRoi.height * sizeof(T)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStep * srcRoi.height * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStep * dstRoi.height * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+\r
+ bool bPass = true;\r
+\r
+ for (Ncv32u i=0; i<dstRoi.height && bPass; i++)\r
+ {\r
+ for (Ncv32u j=0; j<dstRoi.width && bPass; j++)\r
+ {\r
+ if (h_dst[i*dstStep+j] != h_src[i*scale*srcStep + j*scale])\r
+ {\r
+ printf("::downsampleNearestWrapperDevice self test failed: i=%d, j=%d, cpu=%ld, gpu=%ld\n", i, j, (long long)h_src[i*scale*srcStep + j*scale], (long long)h_dst[i*dstStep+j]);\r
+ bPass = false;\r
+ }\r
+ }\r
+ }\r
+\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);\r
+ printf("::downsampleNearestWrapperDevice %s\n", bPass?"PASSED":"FAILED");\r
+#endif\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+template <class T>\r
+static NCVStatus downsampleNearestWrapperHost(T *h_src, Ncv32u srcStep,\r
+ T *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(srcRoi.width != 0 && srcRoi.height != 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(scale != 0, NPPST_INVALID_SCALE);\r
+ ncvAssertReturn(srcStep >= (Ncv32u)(srcRoi.width) * sizeof(T) &&\r
+ dstStep >= (Ncv32u)(srcRoi.width * sizeof(T) / scale) &&\r
+ srcStep % sizeof(T) == 0 && dstStep % sizeof(T) == 0, NPPST_INVALID_STEP);\r
+ srcStep /= sizeof(T);\r
+ dstStep /= sizeof(T);\r
+\r
+ NcvSize32u dstRoi;\r
+ dstRoi.width = srcRoi.width / scale;\r
+ dstRoi.height = srcRoi.height / scale;\r
+\r
+ for (Ncv32u i=0; i<dstRoi.height; i++)\r
+ {\r
+ for (Ncv32u j=0; j<dstRoi.width; j++)\r
+ {\r
+ h_dst[i*dstStep+j] = h_src[i*scale*srcStep + j*scale];\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+#define implementNppDownsampleNearest(bit, typ) \\r
+ NCVStatus nppiStDownsampleNearest_##bit##typ##_C1R(Ncv##bit##typ *d_src, Ncv32u srcStep, \\r
+ Ncv##bit##typ *d_dst, Ncv32u dstStep, \\r
+ NcvSize32u srcRoi, Ncv32u scale, NcvBool readThruTexture) \\r
+ { \\r
+ return downsampleNearestWrapperDevice<Ncv##bit##u>((Ncv##bit##u *)d_src, srcStep, \\r
+ (Ncv##bit##u *)d_dst, dstStep, \\r
+ srcRoi, scale, readThruTexture); \\r
+ }\r
+\r
+\r
+#define implementNppDownsampleNearestHost(bit, typ) \\r
+ NCVStatus nppiStDownsampleNearest_##bit##typ##_C1R_host(Ncv##bit##typ *h_src, Ncv32u srcStep, \\r
+ Ncv##bit##typ *h_dst, Ncv32u dstStep, \\r
+ NcvSize32u srcRoi, Ncv32u scale) \\r
+ { \\r
+ return downsampleNearestWrapperHost<Ncv##bit##u>((Ncv##bit##u *)h_src, srcStep, \\r
+ (Ncv##bit##u *)h_dst, dstStep, \\r
+ srcRoi, scale); \\r
+ }\r
+\r
+\r
+implementNppDownsampleNearest(32, u)\r
+implementNppDownsampleNearest(32, s)\r
+implementNppDownsampleNearest(32, f)\r
+implementNppDownsampleNearest(64, u)\r
+implementNppDownsampleNearest(64, s)\r
+implementNppDownsampleNearest(64, f)\r
+implementNppDownsampleNearestHost(32, u)\r
+implementNppDownsampleNearestHost(32, s)\r
+implementNppDownsampleNearestHost(32, f)\r
+implementNppDownsampleNearestHost(64, u)\r
+implementNppDownsampleNearestHost(64, s)\r
+implementNppDownsampleNearestHost(64, f)\r
+\r
+\r
+//==============================================================================\r
+//\r
+// RectStdDev.cu\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u NUM_RECTSTDDEV_THREADS = 128;\r
+\r
+\r
+template <NcvBool tbCacheTexture>\r
+__device__ Ncv32u getElemSum(Ncv32u x, Ncv32u *d_sum)\r
+{\r
+ if (tbCacheTexture)\r
+ {\r
+ return tex1Dfetch(tex32u, x);\r
+ }\r
+ else\r
+ {\r
+ return d_sum[x];\r
+ }\r
+}\r
+\r
+\r
+template <NcvBool tbCacheTexture>\r
+__device__ Ncv64u getElemSqSum(Ncv32u x, Ncv64u *d_sqsum)\r
+{\r
+ if (tbCacheTexture)\r
+ {\r
+ uint2 tmp = tex1Dfetch(tex64u, x);\r
+ Ncv64u res = (Ncv64u)tmp.y;\r
+ res <<= 32;\r
+ res |= tmp.x;\r
+ return res;\r
+ }\r
+ else\r
+ {\r
+ return d_sqsum[x];\r
+ }\r
+}\r
+\r
+\r
+template <NcvBool tbCacheTexture>\r
+__global__ void rectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,\r
+ Ncv64u *d_sqsum, Ncv32u sqsumStep,\r
+ Ncv32f *d_norm, Ncv32u normStep,\r
+ NcvSize32u roi, NcvRect32u rect, Ncv32f invRectArea)\r
+{\r
+ Ncv32u x_offs = blockIdx.x * NUM_RECTSTDDEV_THREADS + threadIdx.x;\r
+ if (x_offs >= roi.width)\r
+ {\r
+ return;\r
+ }\r
+\r
+ Ncv32u sum_offset = blockIdx.y * sumStep + x_offs;\r
+ Ncv32u sqsum_offset = blockIdx.y * sqsumStep + x_offs;\r
+\r
+ //OPT: try swapping order (could change cache hit/miss ratio)\r
+ Ncv32u sum_tl = getElemSum<tbCacheTexture>(sum_offset + rect.y * sumStep + rect.x, d_sum);\r
+ Ncv32u sum_bl = getElemSum<tbCacheTexture>(sum_offset + (rect.y + rect.height) * sumStep + rect.x, d_sum);\r
+ Ncv32u sum_tr = getElemSum<tbCacheTexture>(sum_offset + rect.y * sumStep + rect.x + rect.width, d_sum);\r
+ Ncv32u sum_br = getElemSum<tbCacheTexture>(sum_offset + (rect.y + rect.height) * sumStep + rect.x + rect.width, d_sum);\r
+ Ncv32u sum_val = sum_br + sum_tl - sum_tr - sum_bl;\r
+\r
+ Ncv64u sqsum_tl, sqsum_bl, sqsum_tr, sqsum_br;\r
+ sqsum_tl = getElemSqSum<tbCacheTexture>(sqsum_offset + rect.y * sqsumStep + rect.x, d_sqsum);\r
+ sqsum_bl = getElemSqSum<tbCacheTexture>(sqsum_offset + (rect.y + rect.height) * sqsumStep + rect.x, d_sqsum);\r
+ sqsum_tr = getElemSqSum<tbCacheTexture>(sqsum_offset + rect.y * sqsumStep + rect.x + rect.width, d_sqsum);\r
+ sqsum_br = getElemSqSum<tbCacheTexture>(sqsum_offset + (rect.y + rect.height) * sqsumStep + rect.x + rect.width, d_sqsum);\r
+ Ncv64u sqsum_val = sqsum_br + sqsum_tl - sqsum_tr - sqsum_bl;\r
+\r
+ Ncv32f mean = sum_val * invRectArea;\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // sqsum_val_res = sqsum_val / rectArea\r
+ //////////////////////////////////////////////////////////////////////////\r
+\r
+ Ncv32f sqsum_val_1 = __ull2float_rz(sqsum_val);\r
+ Ncv64u sqsum_val_2 = __float2ull_rz(sqsum_val_1);\r
+ Ncv64u sqsum_val_3 = sqsum_val - sqsum_val_2;\r
+ Ncv32f sqsum_val_4 = __ull2float_rn(sqsum_val_3);\r
+ sqsum_val_1 *= invRectArea;\r
+ sqsum_val_4 *= invRectArea;\r
+ Ncv32f sqsum_val_res = sqsum_val_1 + sqsum_val_4;\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // variance = sqsum_val_res - mean * mean\r
+ //////////////////////////////////////////////////////////////////////////\r
+\r
+#if defined DISABLE_MAD_SELECTIVELY\r
+ Ncv32f variance = sqsum_val_2 - __fmul_rn(mean, mean);\r
+#else\r
+ Ncv32f variance = sqsum_val_res - mean * mean;\r
+#endif\r
+\r
+ //////////////////////////////////////////////////////////////////////////\r
+ // stddev = sqrtf(variance)\r
+ //////////////////////////////////////////////////////////////////////////\r
+\r
+ //Ncv32f stddev = sqrtf(variance);\r
+ Ncv32f stddev = __fsqrt_rn(variance);\r
+\r
+ d_norm[blockIdx.y * normStep + x_offs] = stddev;\r
+}\r
+\r
+\r
+NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,\r
+ Ncv64u *d_sqsum, Ncv32u sqsumStep,\r
+ Ncv32f *d_norm, Ncv32u normStep,\r
+ NcvSize32u roi, NcvRect32u rect,\r
+ Ncv32f scaleArea, NcvBool readThruTexture)\r
+{\r
+ ncvAssertReturn(d_sum != NULL && d_sqsum != NULL && d_norm != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roi.width > 0 && roi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(sumStep >= (Ncv32u)(roi.width + rect.x + rect.width - 1) * sizeof(Ncv32u) &&\r
+ sqsumStep >= (Ncv32u)(roi.width + rect.x + rect.width - 1) * sizeof(Ncv64u) &&\r
+ normStep >= (Ncv32u)roi.width * sizeof(Ncv32f) &&\r
+ sumStep % sizeof(Ncv32u) == 0 &&\r
+ sqsumStep % sizeof(Ncv64u) == 0 &&\r
+ normStep % sizeof(Ncv32f) == 0, NPPST_INVALID_STEP);\r
+ ncvAssertReturn(scaleArea >= 1.0f, NPPST_INVALID_SCALE);\r
+ sumStep /= sizeof(Ncv32u);\r
+ sqsumStep /= sizeof(Ncv64u);\r
+ normStep /= sizeof(Ncv32f);\r
+\r
+ Ncv32f rectArea = rect.width * rect.height * scaleArea;\r
+ Ncv32f invRectArea = 1.0f / rectArea;\r
+\r
+ dim3 grid(((roi.width + NUM_RECTSTDDEV_THREADS - 1) / NUM_RECTSTDDEV_THREADS), roi.height);\r
+ dim3 block(NUM_RECTSTDDEV_THREADS);\r
+\r
+ if (!readThruTexture)\r
+ {\r
+ rectStdDev_32f_C1R\r
+ <false>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_sum, sumStep, d_sqsum, sqsumStep, d_norm, normStep, roi, rect, invRectArea);\r
+ }\r
+ else\r
+ {\r
+ cudaChannelFormatDesc cfdTexSrc;\r
+ cudaChannelFormatDesc cfdTexSqr;\r
+ cfdTexSrc = cudaCreateChannelDesc<Ncv32u>();\r
+ cfdTexSqr = cudaCreateChannelDesc<uint2>();\r
+\r
+ size_t alignmentOffset;\r
+ ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex32u, d_sum, cfdTexSrc, (roi.height + rect.y + rect.height) * sumStep * sizeof(Ncv32u)), NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertCUDAReturn(cudaBindTexture(&alignmentOffset, tex64u, d_sqsum, cfdTexSqr, (roi.height + rect.y + rect.height) * sqsumStep * sizeof(Ncv64u)), NPPST_TEXTURE_BIND_ERROR);\r
+ ncvAssertReturn(alignmentOffset==0, NPPST_TEXTURE_BIND_ERROR);\r
+\r
+ rectStdDev_32f_C1R\r
+ <true>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (NULL, sumStep, NULL, sqsumStep, d_norm, normStep, roi, rect, invRectArea);\r
+ }\r
+\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+#if defined _SELF_TEST_\r
+ Ncv32u *h_sum;\r
+ Ncv64u *h_sqsum;\r
+ Ncv32f *h_norm_d;\r
+ Ncv32u ExtHeight = roi.height + rect.y + rect.height;\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_sum, sumStep * ExtHeight * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_sqsum, sqsumStep * ExtHeight * sizeof(Ncv64u)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_norm_d, normStep * roi.height * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_sum, d_sum, sumStep * ExtHeight * sizeof(Ncv32u), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_sqsum, d_sqsum, sqsumStep * ExtHeight * sizeof(Ncv64u), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_norm_d, d_norm, normStep * roi.height * sizeof(Ncv32f), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+\r
+ Ncv32f *h_norm_h;\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_norm_h, normStep * roi.height * sizeof(Ncv32u)), NPPST_MEM_ALLOC_ERR);\r
+\r
+ ncvAssertReturnNcvStat(nppRectStdDev_32f_C1R_host(h_sum, sqsumStep, h_sqsum, sqsumStep, h_norm_h, normStep, roi, rect, scaleArea));\r
+\r
+ const Ncv64f relEPS = 0.005;\r
+ bool bPass = true;\r
+ for (Ncv32u i=0; i<roi.height && bPass; i++)\r
+ {\r
+ for (Ncv32u j=0; j<roi.width && bPass; j++)\r
+ {\r
+ Ncv64f absErr = fabs(h_norm_h[i * normStep + j] - h_norm_d[i * normStep + j]);\r
+ Ncv64f relErr = absErr / h_norm_h[i * normStep + j];\r
+\r
+ if (relErr > relEPS)\r
+ {\r
+ printf("::ncvRectStdDev_32f_C1R self test failed: i=%d, j=%d, cpu=%f, gpu=%f\n", i, j, h_norm_h[i * normStep + j], h_norm_d[i * normStep + j]);\r
+ bPass = false;\r
+ }\r
+ }\r
+ }\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_sum), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_sqsum), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_norm_d), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_norm_h), NPPST_MEMFREE_ERR);\r
+ printf("::ncvRectStdDev_32f_C1R %s\n", bPass?"PASSED":"FAILED");\r
+#endif\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,\r
+ Ncv64u *h_sqsum, Ncv32u sqsumStep,\r
+ Ncv32f *h_norm, Ncv32u normStep,\r
+ NcvSize32u roi, NcvRect32u rect,\r
+ Ncv32f scaleArea)\r
+{\r
+ ncvAssertReturn(h_sum != NULL && h_sqsum != NULL && h_norm != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(roi.width > 0 && roi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(sumStep >= (Ncv32u)(roi.width + rect.x + rect.width - 1) * sizeof(Ncv32u) &&\r
+ sqsumStep >= (Ncv32u)(roi.width + rect.x + rect.width - 1) * sizeof(Ncv64u) &&\r
+ normStep >= (Ncv32u)roi.width * sizeof(Ncv32f) &&\r
+ sumStep % sizeof(Ncv32u) == 0 &&\r
+ sqsumStep % sizeof(Ncv64u) == 0 &&\r
+ normStep % sizeof(Ncv32f) == 0, NPPST_INVALID_STEP);\r
+ ncvAssertReturn(scaleArea >= 1.0f, NPPST_INVALID_SCALE);\r
+ sumStep /= sizeof(Ncv32u);\r
+ sqsumStep /= sizeof(Ncv64u);\r
+ normStep /= sizeof(Ncv32f);\r
+\r
+ Ncv32f rectArea = rect.width * rect.height * scaleArea;\r
+ Ncv32f invRectArea = 1.0f / rectArea;\r
+\r
+ for (Ncv32u i=0; i<roi.height; i++)\r
+ {\r
+ for (Ncv32u j=0; j<roi.width; j++)\r
+ {\r
+ Ncv32u sum_offset = i * sumStep + j;\r
+ Ncv32u sqsum_offset = i * sqsumStep + j;\r
+\r
+ Ncv32u sum_tl = h_sum[sum_offset + rect.y * sumStep + rect.x];\r
+ Ncv32u sum_bl = h_sum[sum_offset + (rect.y + rect.height) * sumStep + rect.x];\r
+ Ncv32u sum_tr = h_sum[sum_offset + rect.y * sumStep + rect.x + rect.width];\r
+ Ncv32u sum_br = h_sum[sum_offset + (rect.y + rect.height) * sumStep + rect.x + rect.width];\r
+ Ncv64f sum_val = sum_br + sum_tl - sum_tr - sum_bl;\r
+\r
+ Ncv64u sqsum_tl = h_sqsum[sqsum_offset + rect.y * sqsumStep + rect.x];\r
+ Ncv64u sqsum_bl = h_sqsum[sqsum_offset + (rect.y + rect.height) * sqsumStep + rect.x];\r
+ Ncv64u sqsum_tr = h_sqsum[sqsum_offset + rect.y * sqsumStep + rect.x + rect.width];\r
+ Ncv64u sqsum_br = h_sqsum[sqsum_offset + (rect.y + rect.height) * sqsumStep + rect.x + rect.width];\r
+ Ncv64f sqsum_val = (Ncv64f)(sqsum_br + sqsum_tl - sqsum_tr - sqsum_bl);\r
+\r
+ Ncv64f mean = sum_val * invRectArea;\r
+ Ncv64f sqsum_val_2 = sqsum_val / rectArea;\r
+ Ncv64f variance = sqsum_val_2 - mean * mean;\r
+\r
+ h_norm[i * normStep + j] = (Ncv32f)sqrt(variance);\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Transpose.cu\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u TRANSPOSE_TILE_DIM = 16;\r
+const Ncv32u TRANSPOSE_BLOCK_ROWS = 16;\r
+\r
+\r
+/**\r
+* \brief Matrix transpose kernel\r
+*\r
+* Calculates transpose of the input image\r
+* \see TRANSPOSE_TILE_DIM\r
+*\r
+* \tparam T_in Type of input image elements\r
+* \tparam T_out Type of output image elements\r
+*\r
+* \param d_src [IN] Source image pointer\r
+* \param srcStride [IN] Source image stride\r
+* \param d_dst [OUT] Output image pointer\r
+* \param dstStride [IN] Output image stride\r
+*\r
+* \return None\r
+*/\r
+template <class T>\r
+__global__ void transpose(T *d_src, Ncv32u srcStride,\r
+ T *d_dst, Ncv32u dstStride, NcvSize32u srcRoi)\r
+{\r
+ __shared__ T tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM+1];\r
+\r
+ Ncv32u blockIdx_x, blockIdx_y;\r
+\r
+ // do diagonal reordering\r
+ if (gridDim.x == gridDim.y)\r
+ {\r
+ blockIdx_y = blockIdx.x;\r
+ blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;\r
+ }\r
+ else\r
+ {\r
+ Ncv32u bid = blockIdx.x + gridDim.x * blockIdx.y;\r
+ blockIdx_y = bid % gridDim.y;\r
+ blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;\r
+ }\r
+\r
+ Ncv32u xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;\r
+ Ncv32u yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;\r
+ Ncv32u index_in = xIndex + yIndex * srcStride;\r
+\r
+ xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;\r
+ yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;\r
+ Ncv32u index_out = xIndex + yIndex * dstStride;\r
+\r
+ for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)\r
+ {\r
+ tile[threadIdx.y+i][threadIdx.x] = d_src[index_in+i*srcStride];\r
+ }\r
+\r
+ __syncthreads();\r
+\r
+ if (xIndex < srcRoi.height)\r
+ {\r
+ for (Ncv32u i=0; i<TRANSPOSE_TILE_DIM; i+=TRANSPOSE_BLOCK_ROWS)\r
+ {\r
+ if (yIndex + i < srcRoi.width)\r
+ {\r
+ d_dst[index_out+i*dstStride] = tile[threadIdx.x][threadIdx.y+i];\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+\r
+template <class T>\r
+NCVStatus transposeWrapperDevice(T *d_src, Ncv32u srcStride,\r
+ T *d_dst, Ncv32u dstStride, NcvSize32u srcRoi)\r
+{\r
+ ncvAssertReturn(d_src != NULL && d_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(srcRoi.width > 0 && srcRoi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStride >= srcRoi.width * sizeof(T) &&\r
+ dstStride >= srcRoi.height * sizeof(T) &&\r
+ srcStride % sizeof(T) == 0 && dstStride % sizeof(T) == 0, NPPST_INVALID_STEP);\r
+ srcStride /= sizeof(T);\r
+ dstStride /= sizeof(T);\r
+\r
+ dim3 grid((srcRoi.width + TRANSPOSE_TILE_DIM - 1) / TRANSPOSE_TILE_DIM,\r
+ (srcRoi.height + TRANSPOSE_TILE_DIM - 1) / TRANSPOSE_TILE_DIM);\r
+ dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);\r
+ transpose\r
+ <T>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcStride, d_dst, dstStride, srcRoi);\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+#if defined _SELF_TEST_\r
+ Ncv32u widthExt = grid.x * TRANSPOSE_TILE_DIM;\r
+ Ncv32u heightExt = grid.y * TRANSPOSE_TILE_DIM;\r
+ T *h_src;\r
+ T *h_dst;\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_src, srcStride * heightExt * sizeof(T)), NPPST_MEM_ALLOC_ERR);\r
+ ncvAssertCUDAReturn(cudaMallocHost(&h_dst, dstStride * widthExt * sizeof(T)), NPPST_MEM_ALLOC_ERR);\r
+ memset(h_src, 0, srcStride * heightExt * sizeof(T));\r
+ memset(h_dst, 0, dstStride * widthExt * sizeof(T));\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_src, d_src, srcStride * heightExt * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ ncvAssertCUDAReturn(cudaMemcpy(h_dst, d_dst, dstStride * widthExt * sizeof(T), cudaMemcpyDeviceToHost), NPPST_MEMCPY_ERROR);\r
+ NcvBool bPass = true;\r
+ for (Ncv32u i=0; i<srcRoi.height && bPass; i++)\r
+ {\r
+ for (Ncv32u j=0; j<srcRoi.width && bPass; j++)\r
+ {\r
+ if (h_src[i * srcStride + j] != h_dst[j * dstStride + i])\r
+ {\r
+ printf("CIntegralImage::transposeWrapperDevice self test failed: i=%d, j=%d, cpu=%d, gpu=%d\n", i, j, h_src[j * srcStride + i], h_dst[i * dstStride + j]);\r
+ bPass = false;\r
+ }\r
+ }\r
+ }\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_src), NPPST_MEMFREE_ERR);\r
+ ncvAssertCUDAReturn(cudaFreeHost(h_dst), NPPST_MEMFREE_ERR);\r
+ printf("CIntegralImage::transposeWrapperDevice %s\n", bPass?"PASSED":"FAILED");\r
+#endif\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+template <class T>\r
+static NCVStatus transposeWrapperHost(T *h_src, Ncv32u srcStride,\r
+ T *h_dst, Ncv32u dstStride, NcvSize32u srcRoi)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+ ncvAssertReturn(srcRoi.width > 0 && srcRoi.height > 0, NPPST_INVALID_ROI);\r
+ ncvAssertReturn(srcStride >= srcRoi.width * sizeof(T) &&\r
+ dstStride >= srcRoi.height * sizeof(T) &&\r
+ srcStride % sizeof(T) == 0 && dstStride % sizeof(T) == 0, NPPST_INVALID_STEP);\r
+ srcStride /= sizeof(T);\r
+ dstStride /= sizeof(T);\r
+\r
+ for (Ncv32u i=0; i<srcRoi.height; i++)\r
+ {\r
+ for (Ncv32u j=0; j<srcRoi.width; j++)\r
+ {\r
+ h_dst[j*dstStride+i] = h_src[i*srcStride + j];\r
+ }\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+#define implementNppTranspose(bit, typ) \\r
+ NCVStatus nppiStTranspose_##bit##typ##_C1R(Ncv##bit##typ *d_src, Ncv32u srcStep, \\r
+ Ncv##bit##typ *d_dst, Ncv32u dstStep, NcvSize32u srcRoi) \\r
+ { \\r
+ return transposeWrapperDevice<Ncv##bit##u>((Ncv##bit##u *)d_src, srcStep, \\r
+ (Ncv##bit##u *)d_dst, dstStep, srcRoi); \\r
+ }\r
+\r
+\r
+#define implementNppTransposeHost(bit, typ) \\r
+ NCVStatus nppiStTranspose_##bit##typ##_C1R_host(Ncv##bit##typ *h_src, Ncv32u srcStep, \\r
+ Ncv##bit##typ *h_dst, Ncv32u dstStep, \\r
+ NcvSize32u srcRoi) \\r
+ { \\r
+ return transposeWrapperHost<Ncv##bit##u>((Ncv##bit##u *)h_src, srcStep, \\r
+ (Ncv##bit##u *)h_dst, dstStep, srcRoi); \\r
+ }\r
+\r
+\r
+implementNppTranspose(32,u)\r
+implementNppTranspose(32,s)\r
+implementNppTranspose(32,f)\r
+implementNppTranspose(64,u)\r
+implementNppTranspose(64,s)\r
+implementNppTranspose(64,f)\r
+\r
+implementNppTransposeHost(32,u)\r
+implementNppTransposeHost(32,s)\r
+implementNppTransposeHost(32,f)\r
+implementNppTransposeHost(64,u)\r
+implementNppTransposeHost(64,s)\r
+implementNppTransposeHost(64,f)\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Compact.cu\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u NUM_REMOVE_THREADS = 256;\r
+\r
+\r
+template <bool bRemove, bool bWritePartial>\r
+__global__ void removePass1Scan(Ncv32u *d_src, Ncv32u srcLen,\r
+ Ncv32u *d_offsets, Ncv32u *d_blockSums,\r
+ Ncv32u elemRemove)\r
+{\r
+ Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;\r
+ Ncv32u elemAddrIn = blockId * NUM_REMOVE_THREADS + threadIdx.x;\r
+\r
+ if (elemAddrIn > srcLen + blockDim.x)\r
+ {\r
+ return;\r
+ }\r
+\r
+ __shared__ Ncv32u shmem[NUM_REMOVE_THREADS * 2];\r
+\r
+ Ncv32u scanElem = 0;\r
+ if (elemAddrIn < srcLen)\r
+ {\r
+ if (bRemove)\r
+ {\r
+ scanElem = (d_src[elemAddrIn] != elemRemove) ? 1 : 0;\r
+ }\r
+ else\r
+ {\r
+ scanElem = d_src[elemAddrIn];\r
+ }\r
+ }\r
+\r
+ Ncv32u localScanInc = blockScanInclusive<Ncv32u, NUM_REMOVE_THREADS>(scanElem, shmem);\r
+ __syncthreads();\r
+\r
+ if (elemAddrIn < srcLen)\r
+ {\r
+ if (threadIdx.x == NUM_REMOVE_THREADS-1 && bWritePartial)\r
+ {\r
+ d_blockSums[blockId] = localScanInc;\r
+ }\r
+\r
+ if (bRemove)\r
+ {\r
+ d_offsets[elemAddrIn] = localScanInc - scanElem;\r
+ }\r
+ else\r
+ {\r
+ d_src[elemAddrIn] = localScanInc - scanElem;\r
+ }\r
+ }\r
+}\r
+\r
+\r
+__global__ void removePass2Adjust(Ncv32u *d_offsets, Ncv32u srcLen, Ncv32u *d_blockSums)\r
+{\r
+ Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;\r
+ Ncv32u elemAddrIn = blockId * NUM_REMOVE_THREADS + threadIdx.x;\r
+ if (elemAddrIn >= srcLen)\r
+ {\r
+ return;\r
+ }\r
+\r
+ __shared__ Ncv32u valOffs;\r
+ valOffs = d_blockSums[blockId];\r
+ __syncthreads();\r
+\r
+ d_offsets[elemAddrIn] += valOffs;\r
+}\r
+\r
+\r
+__global__ void removePass3Compact(Ncv32u *d_src, Ncv32u srcLen,\r
+ Ncv32u *d_offsets, Ncv32u *d_dst,\r
+ Ncv32u elemRemove, Ncv32u *dstLenValue)\r
+{\r
+ Ncv32u blockId = blockIdx.y * 65535 + blockIdx.x;\r
+ Ncv32u elemAddrIn = blockId * NUM_REMOVE_THREADS + threadIdx.x;\r
+ if (elemAddrIn >= srcLen)\r
+ {\r
+ return;\r
+ }\r
+\r
+ Ncv32u elem = d_src[elemAddrIn];\r
+ Ncv32u elemAddrOut = d_offsets[elemAddrIn];\r
+ if (elem != elemRemove)\r
+ {\r
+ d_dst[elemAddrOut] = elem;\r
+ }\r
+\r
+ if (elemAddrIn == srcLen-1)\r
+ {\r
+ if (elem != elemRemove)\r
+ {\r
+ *dstLenValue = elemAddrOut + 1;\r
+ }\r
+ else\r
+ {\r
+ *dstLenValue = elemAddrOut;\r
+ }\r
+ }\r
+}\r
+\r
+\r
+NCVStatus compactVector_32u_device(Ncv32u *d_src, Ncv32u srcLen,\r
+ Ncv32u *d_dst, Ncv32u *dstLenPinned,\r
+ Ncv32u elemRemove,\r
+ INCVMemAllocator &gpuAllocator)\r
+{\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+ ncvAssertReturn((d_src != NULL && d_dst != NULL) || gpuAllocator.isCounting(), NPPST_NULL_POINTER_ERROR);\r
+\r
+ if (srcLen == 0)\r
+ {\r
+ if (dstLenPinned != NULL)\r
+ {\r
+ *dstLenPinned = 0;\r
+ }\r
+ return NPPST_SUCCESS;\r
+ }\r
+\r
+ std::vector<Ncv32u> partSumNums;\r
+ std::vector<Ncv32u> partSumOffsets;\r
+ Ncv32u partSumLastNum = srcLen;\r
+ Ncv32u partSumLastOffs = 0;\r
+ do\r
+ {\r
+ partSumNums.push_back(partSumLastNum);\r
+ partSumOffsets.push_back(partSumLastOffs);\r
+\r
+ Ncv32u curPartSumAlignedLength = alignUp(partSumLastNum * sizeof(Ncv32u),\r
+ gpuAllocator.alignment()) / sizeof(Ncv32u);\r
+ partSumLastOffs += curPartSumAlignedLength;\r
+\r
+ partSumLastNum = (partSumLastNum + NUM_REMOVE_THREADS - 1) / NUM_REMOVE_THREADS;\r
+ }\r
+ while (partSumLastNum>1);\r
+ partSumNums.push_back(partSumLastNum);\r
+ partSumOffsets.push_back(partSumLastOffs);\r
+\r
+ NCVVectorAlloc<Ncv32u> d_hierSums(gpuAllocator, partSumLastOffs+1);\r
+ ncvAssertReturn(gpuAllocator.isCounting() || d_hierSums.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+ NCVVectorAlloc<Ncv32u> d_numDstElements(gpuAllocator, 1);\r
+ ncvAssertReturn(gpuAllocator.isCounting() || d_numDstElements.isMemAllocated(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCV_SET_SKIP_COND(gpuAllocator.isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ dim3 block(NUM_REMOVE_THREADS);\r
+\r
+ //calculate zero-level partial sums for indices calculation\r
+ if (partSumNums.size() > 2)\r
+ {\r
+ dim3 grid(partSumNums[1]);\r
+\r
+ if (grid.x > 65535)\r
+ {\r
+ grid.y = (grid.x + 65534) / 65535;\r
+ grid.x = 65535;\r
+ }\r
+ removePass1Scan\r
+ <true, true>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcLen,\r
+ d_hierSums.ptr(),\r
+ d_hierSums.ptr() + partSumOffsets[1],\r
+ elemRemove);\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+ //calculate hierarchical partial sums\r
+ for (Ncv32u i=1; i<partSumNums.size()-1; i++)\r
+ {\r
+ dim3 grid(partSumNums[i+1]);\r
+ if (grid.x > 65535)\r
+ {\r
+ grid.y = (grid.x + 65534) / 65535;\r
+ grid.x = 65535;\r
+ }\r
+ if (grid.x != 1)\r
+ {\r
+ removePass1Scan\r
+ <false, true>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_hierSums.ptr() + partSumOffsets[i],\r
+ partSumNums[i], NULL,\r
+ d_hierSums.ptr() + partSumOffsets[i+1],\r
+ NULL);\r
+ }\r
+ else\r
+ {\r
+ removePass1Scan\r
+ <false, false>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_hierSums.ptr() + partSumOffsets[i],\r
+ partSumNums[i], NULL,\r
+ NULL,\r
+ NULL);\r
+ }\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+ }\r
+\r
+ //adjust hierarchical partial sums\r
+ for (Ncv32s i=(Ncv32s)partSumNums.size()-3; i>=0; i--)\r
+ {\r
+ dim3 grid(partSumNums[i+1]);\r
+ if (grid.x > 65535)\r
+ {\r
+ grid.y = (grid.x + 65534) / 65535;\r
+ grid.x = 65535;\r
+ }\r
+ removePass2Adjust\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_hierSums.ptr() + partSumOffsets[i], partSumNums[i],\r
+ d_hierSums.ptr() + partSumOffsets[i+1]);\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ dim3 grid(partSumNums[1]);\r
+ removePass1Scan\r
+ <true, false>\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcLen,\r
+ d_hierSums.ptr(),\r
+ NULL, elemRemove);\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+ }\r
+\r
+ //compact source vector using indices\r
+ dim3 grid(partSumNums[1]);\r
+ if (grid.x > 65535)\r
+ {\r
+ grid.y = (grid.x + 65534) / 65535;\r
+ grid.x = 65535;\r
+ }\r
+ removePass3Compact\r
+ <<<grid, block, 0, nppStGetActiveCUDAstream()>>>\r
+ (d_src, srcLen, d_hierSums.ptr(), d_dst,\r
+ elemRemove, d_numDstElements.ptr());\r
+ ncvAssertCUDAReturn(cudaGetLastError(), NPPST_CUDA_KERNEL_EXECUTION_ERROR);\r
+\r
+ //get number of dst elements\r
+ if (dstLenPinned != NULL)\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpyAsync(dstLenPinned, d_numDstElements.ptr(), sizeof(Ncv32u),\r
+ cudaMemcpyDeviceToHost, nppStGetActiveCUDAstream()), NPPST_MEM_RESIDENCE_ERROR);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(nppStGetActiveCUDAstream()), NPPST_MEM_RESIDENCE_ERROR);\r
+ }\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ ncvAssertReturn(pBufsize != NULL, NPPST_NULL_POINTER_ERROR);\r
+\r
+ if (srcLen == 0)\r
+ {\r
+ *pBufsize = 0;\r
+ return NPPST_SUCCESS;\r
+ }\r
+\r
+ NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
+ ncvAssertReturn(gpuCounter.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = compactVector_32u_device(NULL, srcLen, NULL, NULL, 0xC001C0DE,\r
+ gpuCounter);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ *pBufsize = (Ncv32u)gpuCounter.maxSize();\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ return nppsStCompactGetSize_32u(srcLen, pBufsize, devProp);\r
+}\r
+\r
+\r
+NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp)\r
+{\r
+ return nppsStCompactGetSize_32u(srcLen, pBufsize, devProp);\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,\r
+ Ncv32u *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32u elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, devProp.textureAlignment, pBuffer);\r
+ ncvAssertReturn(gpuAllocator.isInitialized(), NPPST_MEM_INTERNAL_ERROR);\r
+\r
+ NCVStatus ncvStat = compactVector_32u_device(d_src, srcLen, d_dst, p_dstLen, elemRemove,\r
+ gpuAllocator);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,\r
+ Ncv32s *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32s elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ return nppsStCompact_32u((Ncv32u *)d_src, srcLen, (Ncv32u *)d_dst, p_dstLen,\r
+ *(Ncv32u *)&elemRemove, pBuffer, bufSize, devProp);\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,\r
+ Ncv32f *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32f elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp)\r
+{\r
+ return nppsStCompact_32u((Ncv32u *)d_src, srcLen, (Ncv32u *)d_dst, p_dstLen,\r
+ *(Ncv32u *)&elemRemove, pBuffer, bufSize, devProp);\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,\r
+ Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove)\r
+{\r
+ ncvAssertReturn(h_src != NULL && h_dst != NULL, NPPST_NULL_POINTER_ERROR);\r
+\r
+ if (srcLen == 0)\r
+ {\r
+ if (dstLen != NULL)\r
+ {\r
+ *dstLen = 0;\r
+ }\r
+ return NPPST_SUCCESS;\r
+ }\r
+\r
+ Ncv32u dstIndex = 0;\r
+ for (Ncv32u srcIndex=0; srcIndex<srcLen; srcIndex++)\r
+ {\r
+ if (h_src[srcIndex] != elemRemove)\r
+ {\r
+ h_dst[dstIndex++] = h_src[srcIndex];\r
+ }\r
+ }\r
+\r
+ if (dstLen != NULL)\r
+ {\r
+ *dstLen = dstIndex;\r
+ }\r
+\r
+ return NPPST_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,\r
+ Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove)\r
+{\r
+ return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);\r
+}\r
+\r
+\r
+NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,\r
+ Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove)\r
+{\r
+ return nppsStCompact_32u_host((Ncv32u *)h_src, srcLen, (Ncv32u *)h_dst, dstLen, *(Ncv32u *)&elemRemove);\r
+}\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
+// \r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef _npp_staging_hpp_\r
+#define _npp_staging_hpp_\r
+\r
+#include "NCV.hpp"\r
+\r
+\r
+/**\r
+* \file NPP_staging.hpp\r
+* NPP Staging Library\r
+*/\r
+\r
+\r
+/** \defgroup core_npp NPPST Core\r
+ * Basic functions for CUDA streams management.\r
+ * @{\r
+ */\r
+\r
+\r
+/**\r
+ * Gets an active CUDA stream used by NPPST\r
+ * NOT THREAD SAFE\r
+ * \return Current CUDA stream\r
+ */\r
+cudaStream_t nppStGetActiveCUDAstream();\r
+\r
+\r
+/**\r
+ * Sets an active CUDA stream used by NPPST\r
+ * NOT THREAD SAFE\r
+ * \param cudaStream [IN] cudaStream CUDA stream to become current\r
+ * \return CUDA stream used before\r
+ */\r
+cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+/** \defgroup nppi NPPST Image Processing\r
+* @{\r
+*/\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest in the source image\r
+ * \param scale [IN] Downsampling scale factor (positive integer)\r
+ * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32u_C1R(Ncv32u *d_src, Ncv32u srcStep,\r
+ Ncv32u *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32s_C1R(Ncv32s *d_src, Ncv32u srcStep,\r
+ Ncv32s *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32f_C1R(Ncv32f *d_src, Ncv32u srcStep,\r
+ Ncv32f *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.\r
+* \see nppiStDownsampleNearest_32u_C1R\r
+*/\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64u_C1R(Ncv64u *d_src, Ncv32u srcStep,\r
+ Ncv64u *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64s_C1R(Ncv64s *d_src, Ncv32u srcStep,\r
+ Ncv64s *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64f_C1R(Ncv64f *d_src, Ncv32u srcStep,\r
+ Ncv64f *d_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale,\r
+ NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest in the source image\r
+ * \param scale [IN] Downsampling scale factor (positive integer)\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStep,\r
+ Ncv32u *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStep,\r
+ Ncv32s *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,\r
+ Ncv32f *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStep,\r
+ Ncv64u *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStep,\r
+ Ncv64s *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStDownsampleNearest_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStep,\r
+ Ncv64f *h_dst, Ncv32u dstStep,\r
+ NcvSize32u srcRoi, Ncv32u scale);\r
+\r
+\r
+/**\r
+ * Computes standard deviation for each rectangular region of the input image using integral images.\r
+ *\r
+ * \param d_sum [IN] Integral image pointer (CUDA device memory)\r
+ * \param sumStep [IN] Integral image line step\r
+ * \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)\r
+ * \param sqsumStep [IN] Squared integral image line step\r
+ * \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
+ * \param normStep [IN] Stddev image line step\r
+ * \param roi [IN] Region of interest in the source image\r
+ * \param rect [IN] Rectangular region to calculate stddev over\r
+ * \param scaleArea [IN] Multiplication factor to account decimated scale\r
+ * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStRectStdDev_32f_C1R(Ncv32u *d_sum, Ncv32u sumStep,\r
+ Ncv64u *d_sqsum, Ncv32u sqsumStep,\r
+ Ncv32f *d_norm, Ncv32u normStep,\r
+ NcvSize32u roi, NcvRect32u rect,\r
+ Ncv32f scaleArea, NcvBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Computes standard deviation for each rectangular region of the input image using integral images. Host implementation\r
+ *\r
+ * \param h_sum [IN] Integral image pointer (Host or pinned memory)\r
+ * \param sumStep [IN] Integral image line step\r
+ * \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)\r
+ * \param sqsumStep [IN] Squared integral image line step\r
+ * \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
+ * \param normStep [IN] Stddev image line step\r
+ * \param roi [IN] Region of interest in the source image\r
+ * \param rect [IN] Rectangular region to calculate stddev over\r
+ * \param scaleArea [IN] Multiplication factor to account decimated scale\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStRectStdDev_32f_C1R_host(Ncv32u *h_sum, Ncv32u sumStep,\r
+ Ncv64u *h_sqsum, Ncv32u sqsumStep,\r
+ Ncv32f *h_norm, Ncv32u normStep,\r
+ NcvSize32u roi, NcvRect32u rect,\r
+ Ncv32f scaleArea);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit unsigned pixels, single channel\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStride [IN] Source image line step\r
+ * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
+ * \param dstStride [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest of the source image\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32u_C1R(Ncv32u *d_src, Ncv32u srcStride,\r
+ Ncv32u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit signed pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32s_C1R(Ncv32s *d_src, Ncv32u srcStride,\r
+ Ncv32s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit float pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32f_C1R(Ncv32f *d_src, Ncv32u srcStride,\r
+ Ncv32f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit unsigned pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64u_C1R(Ncv64u *d_src, Ncv32u srcStride,\r
+ Ncv64u *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit signed pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64s_C1R(Ncv64s *d_src, Ncv32u srcStride,\r
+ Ncv64s *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit float pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64f_C1R(Ncv64f *d_src, Ncv32u srcStride,\r
+ Ncv64f *d_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit unsigned pixels, single channel. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStride [IN] Source image line step\r
+ * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
+ * \param dstStride [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest of the source image\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32u_C1R_host(Ncv32u *h_src, Ncv32u srcStride,\r
+ Ncv32u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit signed pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32s_C1R_host(Ncv32s *h_src, Ncv32u srcStride,\r
+ Ncv32s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit float pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_32f_C1R_host(Ncv32f *h_src, Ncv32u srcStride,\r
+ Ncv32f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64u_C1R_host(Ncv64u *h_src, Ncv32u srcStride,\r
+ Ncv64u *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit signed pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64s_C1R_host(Ncv64s *h_src, Ncv32u srcStride,\r
+ Ncv64s *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit float pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStTranspose_64f_C1R_host(Ncv64f *h_src, Ncv32u srcStride,\r
+ Ncv64f *h_dst, Ncv32u dstStride, NcvSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for integral image creation\r
+ *\r
+ * \param roiSize [IN] Size of the input image\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegralGetSize_8u32u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for integral image creation\r
+ * \see nppiStIntegralGetSize_8u32u\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegralGetSize_32f32f(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination integral image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegral_8u32u_C1R(Ncv8u *d_src, Ncv32u srcStep,\r
+ Ncv32u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,\r
+ Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image\r
+ * \see nppiStIntegral_8u32u_C1R\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegral_32f32f_C1R(Ncv32f *d_src, Ncv32u srcStep,\r
+ Ncv32f *d_dst, Ncv32u dstStep, NcvSize32u roiSize,\r
+ Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegral_8u32u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,\r
+ Ncv32u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image. Host implementation\r
+ * \see nppiStIntegral_8u32u_C1R_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStIntegral_32f32f_C1R_host(Ncv32f *h_src, Ncv32u srcStep,\r
+ Ncv32f *h_dst, Ncv32u dstStep, NcvSize32u roiSize);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for squared integral image creation\r
+ *\r
+ * \param roiSize [IN] Size of the input image\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStSqrIntegralGetSize_8u64u(NcvSize32u roiSize, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Creates a squared integral image representation for the input image\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStSqrIntegral_8u64u_C1R(Ncv8u *d_src, Ncv32u srcStep,\r
+ Ncv64u *d_dst, Ncv32u dstStep, NcvSize32u roiSize,\r
+ Ncv8u *pBuffer, Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Creates a squared integral image representation for the input image. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppiStSqrIntegral_8u64u_C1R_host(Ncv8u *h_src, Ncv32u srcStep,\r
+ Ncv64u *h_dst, Ncv32u dstStep, NcvSize32u roiSize);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+/** \defgroup npps NPPST Signal Processing\r
+* @{\r
+*/\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values\r
+ *\r
+ * \param srcLen [IN] Length of the input vector in elements\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompactGetSize_32u(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit signed values\r
+ * \see nppsStCompactGetSize_32u\r
+ */\r
+NCVStatus nppsStCompactGetSize_32s(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit float values\r
+ * \see nppsStCompactGetSize_32u\r
+ */\r
+NCVStatus nppsStCompactGetSize_32f(Ncv32u srcLen, Ncv32u *pBufsize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values\r
+ *\r
+ * \param d_src [IN] Source vector pointer (CUDA device memory)\r
+ * \param srcLen [IN] Source vector length\r
+ * \param d_dst [OUT] Destination vector pointer (CUDA device memory)\r
+ * \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)\r
+ * \param elemRemove [IN] The value to be removed\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ * \param devProp [IN] CUDA device properties structure, containing texture alignment information\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32u(Ncv32u *d_src, Ncv32u srcLen,\r
+ Ncv32u *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32u elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values\r
+ * \see nppsStCompact_32u\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32s(Ncv32s *d_src, Ncv32u srcLen,\r
+ Ncv32s *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32s elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit float values\r
+ * \see nppsStCompact_32u\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32f(Ncv32f *d_src, Ncv32u srcLen,\r
+ Ncv32f *d_dst, Ncv32u *p_dstLen,\r
+ Ncv32f elemRemove, Ncv8u *pBuffer,\r
+ Ncv32u bufSize, cudaDeviceProp &devProp);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation\r
+ *\r
+ * \param h_src [IN] Source vector pointer (CUDA device memory)\r
+ * \param srcLen [IN] Source vector length\r
+ * \param h_dst [OUT] Destination vector pointer (CUDA device memory)\r
+ * \param dstLen [OUT] Pointer to the destination vector length (can be NULL)\r
+ * \param elemRemove [IN] The value to be removed\r
+ *\r
+ * \return NCV status code\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32u_host(Ncv32u *h_src, Ncv32u srcLen,\r
+ Ncv32u *h_dst, Ncv32u *dstLen, Ncv32u elemRemove);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation\r
+ * \see nppsStCompact_32u_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32s_host(Ncv32s *h_src, Ncv32u srcLen,\r
+ Ncv32s *h_dst, Ncv32u *dstLen, Ncv32s elemRemove);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation\r
+ * \see nppsStCompact_32u_host\r
+ */\r
+NCV_EXPORTS\r
+NCVStatus nppsStCompact_32f_host(Ncv32f *h_src, Ncv32u srcLen,\r
+ Ncv32f *h_dst, Ncv32u *dstLen, Ncv32f elemRemove);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+#endif // _npp_staging_hpp_\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
+// \r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
+\r
+\r
+#include <ios>\r
+#include <stdarg.h>\r
+#include "NCV.hpp"\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Error handling helpers\r
+//\r
+//==============================================================================\r
+\r
+\r
+static void stdioDebugOutput(const char *msg)\r
+{\r
+ printf("%s", msg);\r
+}\r
+\r
+\r
+static NCVDebugOutputHandler *debugOutputHandler = stdioDebugOutput;\r
+\r
+\r
+void ncvDebugOutput(const char *msg, ...)\r
+{\r
+ const int K_DEBUG_STRING_MAXLEN = 1024;\r
+ char buffer[K_DEBUG_STRING_MAXLEN];\r
+ va_list args;\r
+ va_start(args, msg);\r
+ vsnprintf_s(buffer, K_DEBUG_STRING_MAXLEN, K_DEBUG_STRING_MAXLEN-1, msg, args);\r
+ va_end (args);\r
+ debugOutputHandler(buffer);\r
+}\r
+\r
+\r
+void ncvSetDebugOutputHandler(NCVDebugOutputHandler *func)\r
+{\r
+ debugOutputHandler = func;\r
+}\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Memory wrappers and helpers\r
+//\r
+//==============================================================================\r
+\r
+\r
+Ncv32u alignUp(Ncv32u what, Ncv32u alignment)\r
+{\r
+ Ncv32u alignMask = alignment-1;\r
+ Ncv32u inverseAlignMask = ~alignMask;\r
+ Ncv32u res = (what + alignMask) & inverseAlignMask;\r
+ return res;\r
+}\r
+\r
+\r
+void NCVMemPtr::clear()\r
+{\r
+ ptr = NULL;\r
+ memtype = NCVMemoryTypeNone;\r
+}\r
+\r
+\r
+void NCVMemSegment::clear()\r
+{\r
+ begin.clear();\r
+ size = 0;\r
+}\r
+\r
+\r
+NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType, const void *src, NCVMemoryType srcType, size_t sz, cudaStream_t cuStream)\r
+{\r
+ NCVStatus ncvStat;\r
+ switch (dstType)\r
+ {\r
+ case NCVMemoryTypeHostPageable:\r
+ case NCVMemoryTypeHostPinned:\r
+ switch (srcType)\r
+ {\r
+ case NCVMemoryTypeHostPageable:\r
+ case NCVMemoryTypeHostPinned:\r
+ memcpy(dst, src, sz);\r
+ ncvStat = NCV_SUCCESS;\r
+ break;\r
+ case NCVMemoryTypeDevice:\r
+ if (cuStream != 0)\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToHost, cuStream), NCV_CUDA_ERROR);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToHost), NCV_CUDA_ERROR);\r
+ }\r
+ ncvStat = NCV_SUCCESS;\r
+ break;\r
+ default:\r
+ ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
+ }\r
+ break;\r
+ case NCVMemoryTypeDevice:\r
+ switch (srcType)\r
+ {\r
+ case NCVMemoryTypeHostPageable:\r
+ case NCVMemoryTypeHostPinned:\r
+ if (cuStream != 0)\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyHostToDevice, cuStream), NCV_CUDA_ERROR);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyHostToDevice), NCV_CUDA_ERROR);\r
+ }\r
+ ncvStat = NCV_SUCCESS;\r
+ break;\r
+ case NCVMemoryTypeDevice:\r
+ if (cuStream != 0)\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpyAsync(dst, src, sz, cudaMemcpyDeviceToDevice, cuStream), NCV_CUDA_ERROR);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertCUDAReturn(cudaMemcpy(dst, src, sz, cudaMemcpyDeviceToDevice), NCV_CUDA_ERROR);\r
+ }\r
+ ncvStat = NCV_SUCCESS;\r
+ break;\r
+ default:\r
+ ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
+ }\r
+ break;\r
+ default:\r
+ ncvStat = NCV_MEM_RESIDENCE_ERROR;\r
+ }\r
+\r
+ return ncvStat;\r
+}\r
+\r
+\r
+//===================================================================\r
+//\r
+// NCVMemStackAllocator class members implementation\r
+//\r
+//===================================================================\r
+\r
+\r
+NCVMemStackAllocator::NCVMemStackAllocator(Ncv32u alignment)\r
+ :\r
+ currentSize(0),\r
+ _maxSize(0),\r
+ allocBegin(NULL),\r
+ begin(NULL),\r
+ _memType(NCVMemoryTypeNone),\r
+ _alignment(alignment)\r
+{\r
+ NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;\r
+ ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: alignment not power of 2");\r
+}\r
+\r
+\r
+NCVMemStackAllocator::NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr)\r
+ :\r
+ currentSize(0),\r
+ _maxSize(0),\r
+ allocBegin(NULL),\r
+ _memType(memT),\r
+ _alignment(alignment)\r
+{\r
+ NcvBool bProperAlignment = (alignment & (alignment-1)) == 0;\r
+ ncvAssertPrintCheck(bProperAlignment, "NCVMemStackAllocator ctor:: _alignment not power of 2");\r
+\r
+ allocBegin = NULL;\r
+\r
+ if (reusePtr == NULL)\r
+ {\r
+ bReusesMemory = false;\r
+ switch (memT)\r
+ {\r
+ case NCVMemoryTypeDevice:\r
+ ncvAssertCUDAReturn(cudaMalloc(&allocBegin, capacity), );\r
+ break;\r
+ case NCVMemoryTypeHostPinned:\r
+ ncvAssertCUDAReturn(cudaMallocHost(&allocBegin, capacity), );\r
+ break;\r
+ case NCVMemoryTypeHostPageable:\r
+ allocBegin = (Ncv8u *)malloc(capacity);\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ bReusesMemory = true;\r
+ allocBegin = (Ncv8u *)reusePtr;\r
+ }\r
+\r
+ if (capacity == 0)\r
+ {\r
+ allocBegin = (Ncv8u *)(0x1);\r
+ }\r
+\r
+ if (!isCounting())\r
+ {\r
+ begin = allocBegin;\r
+ end = begin + capacity;\r
+ }\r
+}\r
+\r
+\r
+NCVMemStackAllocator::~NCVMemStackAllocator()\r
+{\r
+ if (allocBegin != NULL)\r
+ {\r
+ ncvAssertPrintCheck(currentSize == 0, "NCVMemStackAllocator dtor:: not all objects were deallocated properly, forcing destruction");\r
+\r
+ if (!bReusesMemory)\r
+ {\r
+ switch (_memType)\r
+ {\r
+ case NCVMemoryTypeDevice:\r
+ ncvAssertCUDAReturn(cudaFree(allocBegin), );\r
+ break;\r
+ case NCVMemoryTypeHostPinned:\r
+ ncvAssertCUDAReturn(cudaFreeHost(allocBegin), );\r
+ break;\r
+ case NCVMemoryTypeHostPageable:\r
+ free(allocBegin);\r
+ break;\r
+ }\r
+ }\r
+\r
+ allocBegin = NULL;\r
+ }\r
+}\r
+\r
+\r
+NCVStatus NCVMemStackAllocator::alloc(NCVMemSegment &seg, size_t size)\r
+{\r
+ seg.clear();\r
+ ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
+\r
+ size = alignUp(size, this->_alignment);\r
+ this->currentSize += size;\r
+ this->_maxSize = std::max(this->_maxSize, this->currentSize);\r
+\r
+ if (!isCounting())\r
+ {\r
+ size_t availSize = end - begin;\r
+ ncvAssertReturn(size <= availSize, NCV_ALLOCATOR_INSUFFICIENT_CAPACITY);\r
+ }\r
+\r
+ seg.begin.ptr = begin;\r
+ seg.begin.memtype = this->_memType;\r
+ seg.size = size;\r
+ begin += size;\r
+\r
+ return NCV_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus NCVMemStackAllocator::dealloc(NCVMemSegment &seg)\r
+{\r
+ ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
+ ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);\r
+ ncvAssertReturn(seg.begin.ptr != NULL || isCounting(), NCV_ALLOCATOR_BAD_DEALLOC);\r
+ ncvAssertReturn(seg.begin.ptr == begin - seg.size, NCV_ALLOCATOR_DEALLOC_ORDER);\r
+\r
+ currentSize -= seg.size;\r
+ begin -= seg.size;\r
+\r
+ seg.clear();\r
+\r
+ ncvAssertReturn(allocBegin <= begin, NCV_ALLOCATOR_BAD_DEALLOC);\r
+\r
+ return NCV_SUCCESS;\r
+}\r
+\r
+\r
+NcvBool NCVMemStackAllocator::isInitialized(void) const\r
+{\r
+ return ((this->_alignment & (this->_alignment-1)) == 0) && isCounting() || this->allocBegin != NULL;\r
+}\r
+\r
+\r
+NcvBool NCVMemStackAllocator::isCounting(void) const\r
+{\r
+ return this->_memType == NCVMemoryTypeNone;\r
+}\r
+\r
+\r
+NCVMemoryType NCVMemStackAllocator::memType(void) const\r
+{\r
+ return this->_memType;\r
+}\r
+\r
+\r
+Ncv32u NCVMemStackAllocator::alignment(void) const\r
+{\r
+ return this->_alignment;\r
+}\r
+\r
+\r
+size_t NCVMemStackAllocator::maxSize(void) const\r
+{\r
+ return this->_maxSize;\r
+}\r
+\r
+\r
+//===================================================================\r
+//\r
+// NCVMemNativeAllocator class members implementation\r
+//\r
+//===================================================================\r
+\r
+\r
+NCVMemNativeAllocator::NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment)\r
+ :\r
+ currentSize(0),\r
+ _maxSize(0),\r
+ _memType(memT),\r
+ _alignment(alignment)\r
+{\r
+ ncvAssertPrintReturn(memT != NCVMemoryTypeNone, "NCVMemNativeAllocator ctor:: counting not permitted for this allocator type", );\r
+}\r
+\r
+\r
+NCVMemNativeAllocator::~NCVMemNativeAllocator()\r
+{\r
+ ncvAssertPrintCheck(currentSize == 0, "NCVMemNativeAllocator dtor:: detected memory leak");\r
+}\r
+\r
+\r
+NCVStatus NCVMemNativeAllocator::alloc(NCVMemSegment &seg, size_t size)\r
+{\r
+ seg.clear();\r
+ ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
+\r
+ switch (this->_memType)\r
+ {\r
+ case NCVMemoryTypeDevice:\r
+ ncvAssertCUDAReturn(cudaMalloc(&seg.begin.ptr, size), NCV_CUDA_ERROR);\r
+ break;\r
+ case NCVMemoryTypeHostPinned:\r
+ ncvAssertCUDAReturn(cudaMallocHost(&seg.begin.ptr, size), NCV_CUDA_ERROR);\r
+ break;\r
+ case NCVMemoryTypeHostPageable:\r
+ seg.begin.ptr = (Ncv8u *)malloc(size);\r
+ break;\r
+ }\r
+\r
+ this->currentSize += alignUp(size, this->_alignment);\r
+ this->_maxSize = std::max(this->_maxSize, this->currentSize);\r
+\r
+ seg.begin.memtype = this->_memType;\r
+ seg.size = size;\r
+\r
+ return NCV_SUCCESS;\r
+}\r
+\r
+\r
+NCVStatus NCVMemNativeAllocator::dealloc(NCVMemSegment &seg)\r
+{\r
+ ncvAssertReturn(isInitialized(), NCV_ALLOCATOR_BAD_ALLOC);\r
+ ncvAssertReturn(seg.begin.memtype == this->_memType, NCV_ALLOCATOR_BAD_DEALLOC);\r
+ ncvAssertReturn(seg.begin.ptr != NULL, NCV_ALLOCATOR_BAD_DEALLOC);\r
+\r
+ ncvAssertReturn(currentSize >= alignUp(seg.size, this->_alignment), NCV_ALLOCATOR_BAD_DEALLOC);\r
+ currentSize -= alignUp(seg.size, this->_alignment);\r
+\r
+ switch (this->_memType)\r
+ {\r
+ case NCVMemoryTypeDevice:\r
+ ncvAssertCUDAReturn(cudaFree(seg.begin.ptr), NCV_CUDA_ERROR);\r
+ break;\r
+ case NCVMemoryTypeHostPinned:\r
+ ncvAssertCUDAReturn(cudaFreeHost(seg.begin.ptr), NCV_CUDA_ERROR);\r
+ break;\r
+ case NCVMemoryTypeHostPageable:\r
+ free(seg.begin.ptr);\r
+ break;\r
+ }\r
+\r
+ seg.clear();\r
+\r
+ return NCV_SUCCESS;\r
+}\r
+\r
+\r
+NcvBool NCVMemNativeAllocator::isInitialized(void) const\r
+{\r
+ return (this->_alignment != 0);\r
+}\r
+\r
+\r
+NcvBool NCVMemNativeAllocator::isCounting(void) const\r
+{\r
+ return false;\r
+}\r
+\r
+\r
+NCVMemoryType NCVMemNativeAllocator::memType(void) const\r
+{\r
+ return this->_memType;\r
+}\r
+\r
+\r
+Ncv32u NCVMemNativeAllocator::alignment(void) const\r
+{\r
+ return this->_alignment;\r
+}\r
+\r
+\r
+size_t NCVMemNativeAllocator::maxSize(void) const\r
+{\r
+ return this->_maxSize;\r
+}\r
+\r
+\r
+//===================================================================\r
+//\r
+// Time and timer routines\r
+//\r
+//===================================================================\r
+\r
+\r
+typedef struct _NcvTimeMoment NcvTimeMoment;\r
+\r
+#if defined(_WIN32) || defined(_WIN64)\r
+\r
+ #include <Windows.h>\r
+\r
+ typedef struct _NcvTimeMoment\r
+ {\r
+ LONGLONG moment, freq;\r
+ } NcvTimeMoment;\r
+\r
+\r
+ static void _ncvQueryMoment(NcvTimeMoment *t)\r
+ {\r
+ QueryPerformanceFrequency((LARGE_INTEGER *)&(t->freq));\r
+ QueryPerformanceCounter((LARGE_INTEGER *)&(t->moment));\r
+ }\r
+\r
+\r
+ double _ncvMomentToMicroseconds(NcvTimeMoment *t)\r
+ {\r
+ return 1000000.0 * t->moment / t->freq;\r
+ }\r
+\r
+\r
+ double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
+ {\r
+ return 1000000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);\r
+ }\r
+\r
+\r
+ double _ncvMomentsDiffToMilliseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
+ {\r
+ return 1000.0 * 2 * ((t2->moment) - (t1->moment)) / (t1->freq + t2->freq);\r
+ }\r
+\r
+#elif defined(__unix__)\r
+\r
+ #include <sys/time.h>\r
+\r
+ typedef struct _NcvTimeMoment\r
+ {\r
+ struct timeval tv; \r
+ struct timezone tz;\r
+ } NcvTimeMoment;\r
+\r
+\r
+ void _ncvQueryMoment(NcvTimeMoment *t)\r
+ {\r
+ gettimeofday(& t->tv, & t->tz);\r
+ }\r
+\r
+\r
+ double _ncvMomentToMicroseconds(NcvTimeMoment *t)\r
+ {\r
+ return 1000000.0 * t->tv.tv_sec + (double)t->tv.tv_usec;\r
+ }\r
+\r
+\r
+ double _ncvMomentsDiffToMicroseconds(NcvTimeMoment *t1, NcvTimeMoment *t2)\r
+ {\r
+ return (((double)t2->tv.tv_sec - (double)t1->tv.tv_sec) * 1000000 + (double)t2->tv.tv_usec - (double)t1->tv.tv_usec);\r
+ }\r
+\r
+\r
+#endif //#if defined(_WIN32) || defined(_WIN64)\r
+\r
+\r
+struct _NcvTimer\r
+{\r
+ NcvTimeMoment t1, t2;\r
+};\r
+\r
+\r
+NcvTimer ncvStartTimer(void)\r
+{\r
+ struct _NcvTimer *t;\r
+ t = (struct _NcvTimer *)malloc(sizeof(struct _NcvTimer));\r
+ _ncvQueryMoment(&t->t1);\r
+ return t;\r
+}\r
+\r
+\r
+double ncvEndQueryTimerUs(NcvTimer t)\r
+{\r
+ double res;\r
+ _ncvQueryMoment(&t->t2);\r
+ res = _ncvMomentsDiffToMicroseconds(&t->t1, &t->t2);\r
+ free(t);\r
+ return res;\r
+}\r
+\r
+\r
+double ncvEndQueryTimerMs(NcvTimer t)\r
+{\r
+ double res;\r
+ _ncvQueryMoment(&t->t2);\r
+ res = _ncvMomentsDiffToMilliseconds(&t->t1, &t->t2);\r
+ free(t);\r
+ return res;\r
+}\r
+\r
+#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
+// \r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef _ncv_hpp_\r
+#define _ncv_hpp_\r
+\r
+#if (defined WIN32 || defined _WIN32 || defined WINCE) && defined CVAPI_EXPORTS //&& !defined(__CUDACC__) \r
+ #define NCV_EXPORTS __declspec(dllexport)\r
+#else\r
+ #define NCV_EXPORTS\r
+#endif\r
+\r
+#include <cuda_runtime.h>\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Compile-time assert functionality\r
+//\r
+//==============================================================================\r
+\r
+\r
+/**\r
+* Compile-time assert namespace\r
+*/\r
+namespace NcvCTprep\r
+{\r
+ template <bool x>\r
+ struct CT_ASSERT_FAILURE;\r
+\r
+ template <>\r
+ struct CT_ASSERT_FAILURE<true> {};\r
+\r
+ template <int x>\r
+ struct assertTest{};\r
+}\r
+\r
+\r
+#define NCV_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro\r
+#define NCV_CT_PREP_PASTE(a,b) NCV_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro\r
+\r
+\r
+/**\r
+* Performs compile-time assertion of a condition on the file scope\r
+*/\r
+#define NCV_CT_ASSERT(X) \\r
+ typedef NcvCTprep::assertTest<sizeof(NcvCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \\r
+ NCV_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)\r
+\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Alignment macros\r
+//\r
+//==============================================================================\r
+\r
+\r
+#if !defined(__align__) && !defined(__CUDACC__)\r
+ #if defined(_WIN32) || defined(_WIN64)\r
+ #define __align__(n) __declspec(align(n))\r
+ #elif defined(__unix__)\r
+ #define __align__(n) __attribute__((__aligned__(n)))\r
+ #endif\r
+#endif\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Integral and compound types of guaranteed size\r
+//\r
+//==============================================================================\r
+\r
+\r
+typedef bool NcvBool;\r
+typedef long long Ncv64s;\r
+typedef unsigned long long Ncv64u;\r
+typedef int Ncv32s;\r
+typedef unsigned int Ncv32u;\r
+typedef short Ncv16s;\r
+typedef unsigned short Ncv16u;\r
+typedef char Ncv8s;\r
+typedef unsigned char Ncv8u;\r
+typedef float Ncv32f;\r
+typedef double Ncv64f;\r
+\r
+\r
+struct NcvRect8u\r
+{\r
+ Ncv8u x;\r
+ Ncv8u y;\r
+ Ncv8u width;\r
+ Ncv8u height;\r
+ NcvRect8u() : x(0), y(0), width(0), height(0) {};\r
+ NcvRect8u(Ncv8u x, Ncv8u y, Ncv8u width, Ncv8u height) : x(x), y(y), width(width), height(height) {}\r
+};\r
+\r
+\r
+struct NcvRect32s\r
+{\r
+ Ncv32s x; ///< x-coordinate of upper left corner.\r
+ Ncv32s y; ///< y-coordinate of upper left corner.\r
+ Ncv32s width; ///< Rectangle width.\r
+ Ncv32s height; ///< Rectangle height.\r
+ NcvRect32s() : x(0), y(0), width(0), height(0) {};\r
+ NcvRect32s(Ncv32s x, Ncv32s y, Ncv32s width, Ncv32s height) : x(x), y(y), width(width), height(height) {}\r
+};\r
+\r
+\r
+struct NcvRect32u\r
+{\r
+ Ncv32u x; ///< x-coordinate of upper left corner.\r
+ Ncv32u y; ///< y-coordinate of upper left corner.\r
+ Ncv32u width; ///< Rectangle width.\r
+ Ncv32u height; ///< Rectangle height.\r
+ NcvRect32u() : x(0), y(0), width(0), height(0) {};\r
+ NcvRect32u(Ncv32u x, Ncv32u y, Ncv32u width, Ncv32u height) : x(x), y(y), width(width), height(height) {}\r
+};\r
+\r
+\r
+struct NcvSize32s\r
+{\r
+ Ncv32s width; ///< Rectangle width.\r
+ Ncv32s height; ///< Rectangle height.\r
+ NcvSize32s() : width(0), height(0) {};\r
+ NcvSize32s(Ncv32s width, Ncv32s height) : width(width), height(height) {}\r
+};\r
+\r
+\r
+struct NcvSize32u\r
+{\r
+ Ncv32u width; ///< Rectangle width.\r
+ Ncv32u height; ///< Rectangle height.\r
+ NcvSize32u() : width(0), height(0) {};\r
+ NcvSize32u(Ncv32u width, Ncv32u height) : width(width), height(height) {}\r
+};\r
+\r
+\r
+NCV_CT_ASSERT(sizeof(NcvBool) <= 4);\r
+NCV_CT_ASSERT(sizeof(Ncv64s) == 8);\r
+NCV_CT_ASSERT(sizeof(Ncv64u) == 8);\r
+NCV_CT_ASSERT(sizeof(Ncv32s) == 4);\r
+NCV_CT_ASSERT(sizeof(Ncv32u) == 4);\r
+NCV_CT_ASSERT(sizeof(Ncv16s) == 2);\r
+NCV_CT_ASSERT(sizeof(Ncv16u) == 2);\r
+NCV_CT_ASSERT(sizeof(Ncv8s) == 1);\r
+NCV_CT_ASSERT(sizeof(Ncv8u) == 1);\r
+NCV_CT_ASSERT(sizeof(Ncv32f) == 4);\r
+NCV_CT_ASSERT(sizeof(Ncv64f) == 8);\r
+NCV_CT_ASSERT(sizeof(NcvRect8u) == sizeof(Ncv32u));\r
+NCV_CT_ASSERT(sizeof(NcvRect32s) == 4 * sizeof(Ncv32s));\r
+NCV_CT_ASSERT(sizeof(NcvRect32u) == 4 * sizeof(Ncv32u));\r
+NCV_CT_ASSERT(sizeof(NcvSize32u) == 2 * sizeof(Ncv32u));\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Persistent constants\r
+//\r
+//==============================================================================\r
+\r
+\r
+const Ncv32u K_WARP_SIZE = 32;\r
+const Ncv32u K_LOG2_WARP_SIZE = 5;\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Error handling\r
+//\r
+//==============================================================================\r
+\r
+\r
+#define NCV_CT_PREP_STRINGIZE_AUX(x) #x\r
+#define NCV_CT_PREP_STRINGIZE(x) NCV_CT_PREP_STRINGIZE_AUX(x)\r
+\r
+\r
+NCV_EXPORTS void ncvDebugOutput(const char *msg, ...);\r
+\r
+\r
+typedef void NCVDebugOutputHandler(const char* msg);\r
+\r
+\r
+NCV_EXPORTS void ncvSetDebugOutputHandler(NCVDebugOutputHandler* func);\r
+\r
+\r
+#define ncvAssertPrintCheck(pred, msg) \\r
+ ((pred) ? true : (ncvDebugOutput("\n%s\n", \\r
+ "NCV Assertion Failed: " msg ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__) \\r
+ ), false))\r
+\r
+\r
+#define ncvAssertPrintReturn(pred, msg, err) \\r
+ if (ncvAssertPrintCheck(pred, msg)) ; else return err\r
+\r
+\r
+#define ncvAssertReturn(pred, err) \\r
+ do \\r
+ { \\r
+ if (!(pred)) \\r
+ { \\r
+ ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: retcode=", (int)err, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
+ return err; \\r
+ } \\r
+ } while (0)\r
+\r
+\r
+#define ncvAssertReturnNcvStat(ncvOp) \\r
+ do \\r
+ { \\r
+ NCVStatus _ncvStat = ncvOp; \\r
+ if (NCV_SUCCESS != _ncvStat) \\r
+ { \\r
+ ncvDebugOutput("\n%s%d%s\n", "NCV Assertion Failed: NcvStat=", (int)_ncvStat, ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
+ return _ncvStat; \\r
+ } \\r
+ } while (0)\r
+\r
+\r
+#define ncvAssertCUDAReturn(cudacall, errCode) \\r
+ do \\r
+ { \\r
+ cudaError_t resCall = cudacall; \\r
+ cudaError_t resGLE = cudaGetLastError(); \\r
+ if (cudaSuccess != resCall || cudaSuccess != resGLE) \\r
+ { \\r
+ ncvDebugOutput("\n%s%d%s\n", "NCV CUDA Assertion Failed: cudaError_t=", (int)(resCall | resGLE), ", file=" __FILE__ ", line=" NCV_CT_PREP_STRINGIZE(__LINE__)); \\r
+ return errCode; \\r
+ } \\r
+ } while (0)\r
+\r
+\r
+/**\r
+* Return-codes for status notification, errors and warnings\r
+*/\r
+enum NCVStatus\r
+{\r
+ //NCV statuses\r
+ NCV_SUCCESS,\r
+\r
+ NCV_CUDA_ERROR,\r
+ NCV_NPP_ERROR,\r
+ NCV_FILE_ERROR,\r
+\r
+ NCV_NULL_PTR,\r
+ NCV_INCONSISTENT_INPUT,\r
+ NCV_TEXTURE_BIND_ERROR,\r
+ NCV_DIMENSIONS_INVALID,\r
+\r
+ NCV_INVALID_ROI,\r
+ NCV_INVALID_STEP,\r
+ NCV_INVALID_SCALE,\r
+\r
+ NCV_ALLOCATOR_NOT_INITIALIZED,\r
+ NCV_ALLOCATOR_BAD_ALLOC,\r
+ NCV_ALLOCATOR_BAD_DEALLOC,\r
+ NCV_ALLOCATOR_INSUFFICIENT_CAPACITY,\r
+ NCV_ALLOCATOR_DEALLOC_ORDER,\r
+ NCV_ALLOCATOR_BAD_REUSE,\r
+\r
+ NCV_MEM_COPY_ERROR,\r
+ NCV_MEM_RESIDENCE_ERROR,\r
+ NCV_MEM_INSUFFICIENT_CAPACITY,\r
+\r
+ NCV_HAAR_INVALID_PIXEL_STEP,\r
+ NCV_HAAR_TOO_MANY_FEATURES_IN_CLASSIFIER,\r
+ NCV_HAAR_TOO_MANY_FEATURES_IN_CASCADE,\r
+ NCV_HAAR_TOO_LARGE_FEATURES,\r
+ NCV_HAAR_XML_LOADING_EXCEPTION,\r
+\r
+ NCV_NOIMPL_HAAR_TILTED_FEATURES,\r
+\r
+ NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,\r
+\r
+ //NPP statuses\r
+ NPPST_SUCCESS = NCV_SUCCESS, ///< Successful operation (same as NPP_NO_ERROR)\r
+ NPPST_ERROR, ///< Unknown error\r
+ NPPST_CUDA_KERNEL_EXECUTION_ERROR, ///< CUDA kernel execution error\r
+ NPPST_NULL_POINTER_ERROR, ///< NULL pointer argument error\r
+ NPPST_TEXTURE_BIND_ERROR, ///< CUDA texture binding error or non-zero offset returned\r
+ NPPST_MEMCPY_ERROR, ///< CUDA memory copy error\r
+ NPPST_MEM_ALLOC_ERR, ///< CUDA memory allocation error\r
+ NPPST_MEMFREE_ERR, ///< CUDA memory deallocation error\r
+\r
+ //NPPST statuses\r
+ NPPST_INVALID_ROI, ///< Invalid region of interest argument\r
+ NPPST_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)\r
+ NPPST_INVALID_SCALE, ///< Invalid scale parameter passed\r
+ NPPST_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer\r
+ NPPST_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)\r
+ NPPST_MEM_INTERNAL_ERROR, ///< Internal memory management error\r
+};\r
+\r
+\r
+#define NCV_SET_SKIP_COND(x) \\r
+ bool __ncv_skip_cond = x\r
+\r
+\r
+#define NCV_RESET_SKIP_COND(x) \\r
+ __ncv_skip_cond = x\r
+\r
+\r
+#define NCV_SKIP_COND_BEGIN \\r
+ if (!__ncv_skip_cond) {\r
+\r
+\r
+#define NCV_SKIP_COND_END \\r
+ }\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Timer\r
+//\r
+//==============================================================================\r
+\r
+\r
+typedef struct _NcvTimer *NcvTimer;\r
+\r
+NCV_EXPORTS NcvTimer ncvStartTimer(void);\r
+\r
+NCV_EXPORTS double ncvEndQueryTimerUs(NcvTimer t);\r
+\r
+NCV_EXPORTS double ncvEndQueryTimerMs(NcvTimer t);\r
+\r
+\r
+//==============================================================================\r
+//\r
+// Memory management classes template compound types\r
+//\r
+//==============================================================================\r
+\r
+\r
+/**\r
+* Calculates the aligned top bound value\r
+*/\r
+NCV_EXPORTS Ncv32u alignUp(Ncv32u what, Ncv32u alignment);\r
+\r
+\r
+/**\r
+* NCVMemoryType\r
+*/\r
+enum NCVMemoryType\r
+{\r
+ NCVMemoryTypeNone,\r
+ NCVMemoryTypeHostPageable,\r
+ NCVMemoryTypeHostPinned,\r
+ NCVMemoryTypeDevice\r
+};\r
+\r
+\r
+/**\r
+* NCVMemPtr\r
+*/\r
+struct NCV_EXPORTS NCVMemPtr\r
+{\r
+ void *ptr;\r
+ NCVMemoryType memtype;\r
+ void clear();\r
+};\r
+\r
+\r
+/**\r
+* NCVMemSegment\r
+*/\r
+struct NCV_EXPORTS NCVMemSegment\r
+{\r
+ NCVMemPtr begin;\r
+ size_t size;\r
+ void clear();\r
+};\r
+\r
+\r
+/**\r
+* INCVMemAllocator (Interface)\r
+*/\r
+class NCV_EXPORTS INCVMemAllocator\r
+{\r
+public:\r
+ virtual ~INCVMemAllocator() = 0;\r
+\r
+ virtual NCVStatus alloc(NCVMemSegment &seg, size_t size) = 0;\r
+ virtual NCVStatus dealloc(NCVMemSegment &seg) = 0;\r
+\r
+ virtual NcvBool isInitialized(void) const = 0;\r
+ virtual NcvBool isCounting(void) const = 0;\r
+ \r
+ virtual NCVMemoryType memType(void) const = 0;\r
+ virtual Ncv32u alignment(void) const = 0;\r
+ virtual size_t maxSize(void) const = 0;\r
+};\r
+\r
+inline INCVMemAllocator::~INCVMemAllocator() {}\r
+\r
+\r
+/**\r
+* NCVMemStackAllocator\r
+*/\r
+class NCV_EXPORTS NCVMemStackAllocator : public INCVMemAllocator\r
+{\r
+ NCVMemStackAllocator();\r
+ NCVMemStackAllocator(const NCVMemStackAllocator &);\r
+\r
+public:\r
+\r
+ explicit NCVMemStackAllocator(Ncv32u alignment);\r
+ NCVMemStackAllocator(NCVMemoryType memT, size_t capacity, Ncv32u alignment, void *reusePtr=NULL);\r
+ virtual ~NCVMemStackAllocator();\r
+\r
+ virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);\r
+ virtual NCVStatus dealloc(NCVMemSegment &seg);\r
+\r
+ virtual NcvBool isInitialized(void) const;\r
+ virtual NcvBool isCounting(void) const;\r
+\r
+ virtual NCVMemoryType memType(void) const;\r
+ virtual Ncv32u alignment(void) const;\r
+ virtual size_t maxSize(void) const;\r
+\r
+private:\r
+\r
+ NCVMemoryType _memType;\r
+ Ncv32u _alignment;\r
+ Ncv8u *allocBegin;\r
+ Ncv8u *begin;\r
+ Ncv8u *end;\r
+ size_t currentSize;\r
+ size_t _maxSize;\r
+ NcvBool bReusesMemory;\r
+};\r
+\r
+\r
+/**\r
+* NCVMemNativeAllocator\r
+*/\r
+class NCV_EXPORTS NCVMemNativeAllocator : public INCVMemAllocator\r
+{\r
+public:\r
+\r
+ NCVMemNativeAllocator(NCVMemoryType memT, Ncv32u alignment);\r
+ virtual ~NCVMemNativeAllocator();\r
+\r
+ virtual NCVStatus alloc(NCVMemSegment &seg, size_t size);\r
+ virtual NCVStatus dealloc(NCVMemSegment &seg);\r
+\r
+ virtual NcvBool isInitialized(void) const;\r
+ virtual NcvBool isCounting(void) const;\r
+\r
+ virtual NCVMemoryType memType(void) const;\r
+ virtual Ncv32u alignment(void) const;\r
+ virtual size_t maxSize(void) const;\r
+\r
+private:\r
+\r
+ NCVMemNativeAllocator();\r
+ NCVMemNativeAllocator(const NCVMemNativeAllocator &);\r
+\r
+ NCVMemoryType _memType;\r
+ Ncv32u _alignment;\r
+ size_t currentSize;\r
+ size_t _maxSize;\r
+};\r
+\r
+\r
+/**\r
+* Copy dispatcher\r
+*/\r
+NCV_EXPORTS NCVStatus memSegCopyHelper(void *dst, NCVMemoryType dstType,\r
+ const void *src, NCVMemoryType srcType,\r
+ size_t sz, cudaStream_t cuStream);\r
+\r
+\r
+/**\r
+* NCVVector (1D)\r
+*/\r
+template <class T>\r
+class NCVVector\r
+{\r
+ NCVVector(const NCVVector &);\r
+\r
+public:\r
+\r
+ NCVVector()\r
+ {\r
+ clear();\r
+ }\r
+\r
+ virtual ~NCVVector() {}\r
+\r
+ void clear()\r
+ {\r
+ _ptr = NULL;\r
+ _length = 0;\r
+ _memtype = NCVMemoryTypeNone;\r
+ }\r
+\r
+ NCVStatus copySolid(NCVVector<T> &dst, cudaStream_t cuStream, size_t howMuch=0)\r
+ {\r
+ if (howMuch == 0)\r
+ {\r
+ ncvAssertReturn(dst._length == this->_length, NCV_MEM_COPY_ERROR);\r
+ howMuch = this->_length * sizeof(T);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertReturn(dst._length * sizeof(T) >= howMuch && \r
+ this->_length * sizeof(T) >= howMuch &&\r
+ howMuch > 0, NCV_MEM_COPY_ERROR);\r
+ }\r
+ ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && \r
+ (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);\r
+\r
+ NCVStatus ncvStat = NCV_SUCCESS;\r
+ if (this->_memtype != NCVMemoryTypeNone)\r
+ {\r
+ ncvStat = memSegCopyHelper(dst._ptr, dst._memtype,\r
+ this->_ptr, this->_memtype,\r
+ howMuch, cuStream);\r
+ }\r
+\r
+ return ncvStat;\r
+ }\r
+\r
+ T *ptr() const {return this->_ptr;}\r
+ size_t length() const {return this->_length;}\r
+ NCVMemoryType memType() const {return this->_memtype;}\r
+\r
+protected:\r
+\r
+ T *_ptr;\r
+ size_t _length;\r
+ NCVMemoryType _memtype;\r
+};\r
+\r
+\r
+/**\r
+* NCVVectorAlloc\r
+*/\r
+template <class T>\r
+class NCVVectorAlloc : public NCVVector<T>\r
+{\r
+ NCVVectorAlloc();\r
+ NCVVectorAlloc(const NCVVectorAlloc &);\r
+ NCVVectorAlloc& operator=(const NCVVectorAlloc<T>&); \r
+\r
+public:\r
+\r
+ NCVVectorAlloc(INCVMemAllocator &allocator, Ncv32u length)\r
+ :\r
+ allocator(allocator)\r
+ {\r
+ NCVStatus ncvStat;\r
+\r
+ this->clear();\r
+ this->allocatedMem.clear();\r
+\r
+ ncvStat = allocator.alloc(this->allocatedMem, length * sizeof(T));\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVVectorAlloc ctor:: alloc failed", );\r
+\r
+ this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
+ this->_length = length;\r
+ this->_memtype = this->allocatedMem.begin.memtype;\r
+ }\r
+\r
+\r
+ ~NCVVectorAlloc()\r
+ {\r
+ NCVStatus ncvStat;\r
+\r
+ ncvStat = allocator.dealloc(this->allocatedMem);\r
+ ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVVectorAlloc dtor:: dealloc failed");\r
+\r
+ this->clear();\r
+ }\r
+\r
+\r
+ NcvBool isMemAllocated() const\r
+ {\r
+ return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());\r
+ }\r
+\r
+\r
+ Ncv32u getAllocatorsAlignment() const\r
+ {\r
+ return allocator.alignment();\r
+ }\r
+\r
+\r
+ NCVMemSegment getSegment() const\r
+ {\r
+ return allocatedMem;\r
+ }\r
+\r
+private: \r
+ INCVMemAllocator &allocator;\r
+ NCVMemSegment allocatedMem;\r
+};\r
+\r
+\r
+/**\r
+* NCVVectorReuse\r
+*/\r
+template <class T>\r
+class NCVVectorReuse : public NCVVector<T>\r
+{\r
+ NCVVectorReuse();\r
+ NCVVectorReuse(const NCVVectorReuse &);\r
+\r
+public:\r
+\r
+ explicit NCVVectorReuse(const NCVMemSegment &memSegment)\r
+ {\r
+ this->bReused = false;\r
+ this->clear();\r
+\r
+ this->_length = memSegment.size / sizeof(T);\r
+ this->_ptr = (T *)memSegment.begin.ptr;\r
+ this->_memtype = memSegment.begin.memtype;\r
+\r
+ this->bReused = true;\r
+ }\r
+\r
+\r
+ NCVVectorReuse(const NCVMemSegment &memSegment, Ncv32u length)\r
+ {\r
+ this->bReused = false;\r
+ this->clear();\r
+\r
+ ncvAssertPrintReturn(length * sizeof(T) <= memSegment.size, \\r
+ "NCVVectorReuse ctor:: memory binding failed due to size mismatch", );\r
+\r
+ this->_length = length;\r
+ this->_ptr = (T *)memSegment.begin.ptr;\r
+ this->_memtype = memSegment.begin.memtype;\r
+\r
+ this->bReused = true;\r
+ }\r
+\r
+\r
+ NcvBool isMemReused() const\r
+ {\r
+ return this->bReused;\r
+ }\r
+\r
+private:\r
+\r
+ NcvBool bReused;\r
+};\r
+\r
+\r
+/**\r
+* NCVMatrix (2D)\r
+*/\r
+template <class T>\r
+class NCVMatrix\r
+{\r
+ NCVMatrix(const NCVMatrix &);\r
+\r
+public:\r
+\r
+ NCVMatrix()\r
+ {\r
+ clear();\r
+ }\r
+\r
+ virtual ~NCVMatrix() {}\r
+\r
+\r
+ void clear()\r
+ {\r
+ _ptr = NULL;\r
+ _pitch = 0;\r
+ _width = 0;\r
+ _height = 0;\r
+ _memtype = NCVMemoryTypeNone;\r
+ }\r
+\r
+\r
+ Ncv32u stride() const\r
+ {\r
+ return _pitch / sizeof(T);\r
+ }\r
+\r
+\r
+ NCVStatus copySolid(NCVMatrix<T> &dst, cudaStream_t cuStream, size_t howMuch=0)\r
+ {\r
+ if (howMuch == 0)\r
+ {\r
+ ncvAssertReturn(dst._pitch == this->_pitch &&\r
+ dst._height == this->_height, NCV_MEM_COPY_ERROR);\r
+ howMuch = this->_pitch * this->_height;\r
+ }\r
+ else\r
+ {\r
+ ncvAssertReturn(dst._pitch * dst._height >= howMuch && \r
+ this->_pitch * this->_height >= howMuch &&\r
+ howMuch > 0, NCV_MEM_COPY_ERROR);\r
+ }\r
+ ncvAssertReturn((this->_ptr != NULL || this->_memtype == NCVMemoryTypeNone) && \r
+ (dst._ptr != NULL || dst._memtype == NCVMemoryTypeNone), NCV_NULL_PTR);\r
+\r
+ NCVStatus ncvStat = NCV_SUCCESS;\r
+ if (this->_memtype != NCVMemoryTypeNone)\r
+ {\r
+ ncvStat = memSegCopyHelper(dst._ptr, dst._memtype, \r
+ this->_ptr, this->_memtype, \r
+ howMuch, cuStream);\r
+ }\r
+\r
+ return ncvStat;\r
+ }\r
+\r
+ T *ptr() const {return this->_ptr;}\r
+ Ncv32u width() const {return this->_width;}\r
+ Ncv32u height() const {return this->_height;}\r
+ Ncv32u pitch() const {return this->_pitch;}\r
+ NCVMemoryType memType() const {return this->_memtype;}\r
+\r
+protected:\r
+\r
+ T *_ptr;\r
+ Ncv32u _width;\r
+ Ncv32u _height;\r
+ Ncv32u _pitch;\r
+ NCVMemoryType _memtype;\r
+};\r
+\r
+\r
+/**\r
+* NCVMatrixAlloc\r
+*/\r
+template <class T>\r
+class NCVMatrixAlloc : public NCVMatrix<T>\r
+{\r
+ NCVMatrixAlloc();\r
+ NCVMatrixAlloc(const NCVMatrixAlloc &);\r
+ NCVMatrixAlloc& operator=(const NCVMatrixAlloc &);\r
+public:\r
+\r
+ NCVMatrixAlloc(INCVMemAllocator &allocator, Ncv32u width, Ncv32u height, Ncv32u pitch=0)\r
+ :\r
+ allocator(allocator)\r
+ {\r
+ NCVStatus ncvStat;\r
+\r
+ this->clear();\r
+ this->allocatedMem.clear();\r
+\r
+ Ncv32u widthBytes = width * sizeof(T);\r
+ Ncv32u pitchBytes = alignUp(widthBytes, allocator.alignment());\r
+\r
+ if (pitch != 0)\r
+ {\r
+ ncvAssertPrintReturn(pitch >= pitchBytes &&\r
+ (pitch & (allocator.alignment() - 1)) == 0,\r
+ "NCVMatrixAlloc ctor:: incorrect pitch passed", );\r
+ pitchBytes = pitch;\r
+ }\r
+\r
+ Ncv32u requiredAllocSize = pitchBytes * height;\r
+\r
+ ncvStat = allocator.alloc(this->allocatedMem, requiredAllocSize);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc ctor:: alloc failed", );\r
+\r
+ this->_ptr = (T *)this->allocatedMem.begin.ptr;\r
+ this->_width = width;\r
+ this->_height = height;\r
+ this->_pitch = pitchBytes;\r
+ this->_memtype = this->allocatedMem.begin.memtype;\r
+ }\r
+\r
+ ~NCVMatrixAlloc()\r
+ {\r
+ NCVStatus ncvStat;\r
+\r
+ ncvStat = allocator.dealloc(this->allocatedMem);\r
+ ncvAssertPrintCheck(ncvStat == NCV_SUCCESS, "NCVMatrixAlloc dtor:: dealloc failed");\r
+\r
+ this->clear();\r
+ }\r
+\r
+\r
+ NcvBool isMemAllocated() const\r
+ {\r
+ return (this->allocatedMem.begin.ptr != NULL) || (this->allocator.isCounting());\r
+ }\r
+\r
+\r
+ Ncv32u getAllocatorsAlignment() const\r
+ {\r
+ return allocator.alignment();\r
+ }\r
+\r
+\r
+ NCVMemSegment getSegment() const\r
+ {\r
+ return allocatedMem;\r
+ }\r
+\r
+private:\r
+\r
+ INCVMemAllocator &allocator;\r
+ NCVMemSegment allocatedMem;\r
+};\r
+\r
+\r
+/**\r
+* NCVMatrixReuse\r
+*/\r
+template <class T>\r
+class NCVMatrixReuse : public NCVMatrix<T>\r
+{\r
+ NCVMatrixReuse();\r
+ NCVMatrixReuse(const NCVMatrixReuse &);\r
+\r
+public:\r
+\r
+ NCVMatrixReuse(const NCVMemSegment &memSegment, Ncv32u alignment, Ncv32u width, Ncv32u height, Ncv32u pitch=0, NcvBool bSkipPitchCheck=false)\r
+ {\r
+ this->bReused = false;\r
+ this->clear();\r
+\r
+ Ncv32u widthBytes = width * sizeof(T);\r
+ Ncv32u pitchBytes = alignUp(widthBytes, alignment);\r
+\r
+ if (pitch != 0)\r
+ {\r
+ if (!bSkipPitchCheck)\r
+ {\r
+ ncvAssertPrintReturn(pitch >= pitchBytes &&\r
+ (pitch & (alignment - 1)) == 0,\r
+ "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(pitch >= widthBytes, "NCVMatrixReuse ctor:: incorrect pitch passed", );\r
+ }\r
+ pitchBytes = pitch;\r
+ }\r
+\r
+ ncvAssertPrintReturn(pitchBytes * height <= memSegment.size, \\r
+ "NCVMatrixReuse ctor:: memory binding failed due to size mismatch", );\r
+\r
+ this->_width = width;\r
+ this->_height = height;\r
+ this->_pitch = pitchBytes;\r
+ this->_ptr = (T *)memSegment.begin.ptr;\r
+ this->_memtype = memSegment.begin.memtype;\r
+\r
+ this->bReused = true;\r
+ }\r
+\r
+\r
+ NcvBool isMemReused() const\r
+ {\r
+ return this->bReused;\r
+ }\r
+\r
+private:\r
+\r
+ NcvBool bReused;\r
+};\r
+\r
+#endif // _ncv_hpp_\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. \r
+// \r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef _ncvruntimetemplates_hpp_\r
+#define _ncvruntimetemplates_hpp_\r
+\r
+#include <stdarg.h>\r
+#include <vector>\r
+\r
+\r
+////////////////////////////////////////////////////////////////////////////////\r
+// The Loki Library\r
+// Copyright (c) 2001 by Andrei Alexandrescu\r
+// This code accompanies the book:\r
+// Alexandrescu, Andrei. "Modern C++ Design: Generic Programming and Design \r
+// Patterns Applied". Copyright (c) 2001. Addison-Wesley.\r
+// Permission to use, copy, modify, distribute and sell this software for any \r
+// purpose is hereby granted without fee, provided that the above copyright \r
+// notice appear in all copies and that both that copyright notice and this \r
+// permission notice appear in supporting documentation.\r
+// The author or Addison-Welsey Longman make no representations about the \r
+// suitability of this software for any purpose. It is provided "as is" \r
+// without express or implied warranty.\r
+// http://loki-lib.sourceforge.net/index.php?n=Main.License\r
+////////////////////////////////////////////////////////////////////////////////\r
+\r
+namespace Loki\r
+{\r
+ //==============================================================================\r
+ // class NullType\r
+ // Used as a placeholder for "no type here"\r
+ // Useful as an end marker in typelists \r
+ //==============================================================================\r
+\r
+ class NullType {};\r
+\r
+ //==============================================================================\r
+ // class template Typelist\r
+ // The building block of typelists of any length\r
+ // Use it through the LOKI_TYPELIST_NN macros\r
+ // Defines nested types:\r
+ // Head (first element, a non-typelist type by convention)\r
+ // Tail (second element, can be another typelist)\r
+ //==============================================================================\r
+\r
+ template <class T, class U>\r
+ struct Typelist\r
+ {\r
+ typedef T Head;\r
+ typedef U Tail;\r
+ };\r
+\r
+ //==============================================================================\r
+ // class template Int2Type\r
+ // Converts each integral constant into a unique type\r
+ // Invocation: Int2Type<v> where v is a compile-time constant integral\r
+ // Defines 'value', an enum that evaluates to v\r
+ //==============================================================================\r
+\r
+ template <int v>\r
+ struct Int2Type\r
+ {\r
+ enum { value = v };\r
+ };\r
+\r
+ namespace TL\r
+ {\r
+ //==============================================================================\r
+ // class template TypeAt\r
+ // Finds the type at a given index in a typelist\r
+ // Invocation (TList is a typelist and index is a compile-time integral \r
+ // constant):\r
+ // TypeAt<TList, index>::Result\r
+ // returns the type in position 'index' in TList\r
+ // If you pass an out-of-bounds index, the result is a compile-time error\r
+ //==============================================================================\r
+\r
+ template <class TList, unsigned int index> struct TypeAt;\r
+\r
+ template <class Head, class Tail>\r
+ struct TypeAt<Typelist<Head, Tail>, 0>\r
+ {\r
+ typedef Head Result;\r
+ };\r
+\r
+ template <class Head, class Tail, unsigned int i>\r
+ struct TypeAt<Typelist<Head, Tail>, i>\r
+ {\r
+ typedef typename TypeAt<Tail, i - 1>::Result Result;\r
+ };\r
+ }\r
+}\r
+\r
+\r
+////////////////////////////////////////////////////////////////////////////////\r
+// Runtime boolean template instance dispatcher\r
+// Cyril Crassin <cyril.crassin@icare3d.org>\r
+// NVIDIA, 2010\r
+////////////////////////////////////////////////////////////////////////////////\r
+\r
+namespace NCVRuntimeTemplateBool\r
+{\r
+ //This struct is used to transform a list of parameters into template arguments\r
+ //The idea is to build a typelist containing the arguments\r
+ //and to pass this typelist to a user defined functor\r
+ template<typename TList, int NumArguments, class Func>\r
+ struct KernelCaller\r
+ {\r
+ //Convenience function used by the user\r
+ //Takes a variable argument list, transforms it into a list\r
+ static void call(Func &functor, int dummy, ...)\r
+ {\r
+ //Vector used to collect arguments\r
+ std::vector<int> templateParamList;\r
+\r
+ //Variable argument list manipulation\r
+ va_list listPointer;\r
+ va_start(listPointer, dummy);\r
+ //Collect parameters into the list\r
+ for(int i=0; i<NumArguments; i++)\r
+ {\r
+ int val = va_arg(listPointer, int);\r
+ templateParamList.push_back(val);\r
+ }\r
+ va_end(listPointer);\r
+\r
+ //Call the actual typelist building function\r
+ call(functor, templateParamList);\r
+ }\r
+\r
+ //Actual function called recursively to build a typelist based\r
+ //on a list of values\r
+ static void call( Func &functor, std::vector<int> &templateParamList)\r
+ {\r
+ //Get current parameter value in the list\r
+ int val = templateParamList[templateParamList.size() - 1];\r
+ templateParamList.pop_back();\r
+\r
+ //Select the compile time value to add into the typelist\r
+ //depending on the runtime variable and make recursive call. \r
+ //Both versions are really instantiated\r
+ if(val)\r
+ {\r
+ KernelCaller<\r
+ Loki::Typelist<typename Loki::Int2Type<true>, TList >,\r
+ NumArguments-1, Func >\r
+ ::call(functor, templateParamList);\r
+ }\r
+ else\r
+ {\r
+ KernelCaller< \r
+ Loki::Typelist<typename Loki::Int2Type<false>, TList >,\r
+ NumArguments-1, Func >\r
+ ::call(functor, templateParamList);\r
+ }\r
+ }\r
+ };\r
+\r
+ //Specialization for 0 value left in the list\r
+ //-> actual kernel functor call\r
+ template<class TList, class Func>\r
+ struct KernelCaller<TList, 0, Func>\r
+ {\r
+ static void call(Func &functor)\r
+ {\r
+ //Call to the functor's kernel call method\r
+ functor.call(TList()); //TList instantiated to get the method template parameter resolved\r
+ }\r
+\r
+ static void call(Func &functor, std::vector<int> &templateParams)\r
+ {\r
+ functor.call(TList());\r
+ }\r
+ };\r
+}\r
+\r
+#endif //_ncvruntimetemplates_hpp_\r
\r
//! Read Write Traits\r
\r
- template <size_t src_elem_size, size_t dst_elem_size>
- struct UnReadWriteTraits_
- {
- enum {shift=1};
- };
- template <size_t src_elem_size>
- struct UnReadWriteTraits_<src_elem_size, 1>
- {
- enum {shift=4};
- };
- template <size_t src_elem_size>
- struct UnReadWriteTraits_<src_elem_size, 2>
- {
- enum {shift=2};
+ template <size_t src_elem_size, size_t dst_elem_size>\r
+ struct UnReadWriteTraits_\r
+ {\r
+ enum {shift=1};\r
+ };\r
+ template <size_t src_elem_size>\r
+ struct UnReadWriteTraits_<src_elem_size, 1>\r
+ {\r
+ enum {shift=4};\r
+ };\r
+ template <size_t src_elem_size>\r
+ struct UnReadWriteTraits_<src_elem_size, 2>\r
+ {\r
+ enum {shift=2};\r
};\r
- template <typename T, typename D> struct UnReadWriteTraits
- {
- enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};
-
- typedef typename TypeVec<T, shift>::vec_t read_type;
- typedef typename TypeVec<D, shift>::vec_t write_type;
+ template <typename T, typename D> struct UnReadWriteTraits\r
+ {\r
+ enum {shift=UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift};\r
+ \r
+ typedef typename TypeVec<T, shift>::vec_t read_type;\r
+ typedef typename TypeVec<D, shift>::vec_t write_type;\r
};\r
\r
- template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
- struct BinReadWriteTraits_
- {
- enum {shift=1};
+ template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>\r
+ struct BinReadWriteTraits_\r
+ {\r
+ enum {shift=1};\r
};\r
- template <size_t src_elem_size1, size_t src_elem_size2>
- struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
- {
- enum {shift=4};
+ template <size_t src_elem_size1, size_t src_elem_size2>\r
+ struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>\r
+ {\r
+ enum {shift=4};\r
};\r
- template <size_t src_elem_size1, size_t src_elem_size2>
- struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
- {
- enum {shift=2};
+ template <size_t src_elem_size1, size_t src_elem_size2>\r
+ struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>\r
+ {\r
+ enum {shift=2};\r
};\r
- template <typename T1, typename T2, typename D> struct BinReadWriteTraits
- {
- enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
-
- typedef typename TypeVec<T1, shift>::vec_t read_type1;
- typedef typename TypeVec<T2, shift>::vec_t read_type2;
- typedef typename TypeVec<D , shift>::vec_t write_type;
+ template <typename T1, typename T2, typename D> struct BinReadWriteTraits\r
+ {\r
+ enum {shift=BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};\r
+\r
+ typedef typename TypeVec<T1, shift>::vec_t read_type1;\r
+ typedef typename TypeVec<T2, shift>::vec_t read_type2;\r
+ typedef typename TypeVec<D , shift>::vec_t write_type;\r
};\r
\r
//! Transform kernels\r
{\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
dst.x = op(src.x);\r
}\r
\r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
dst.x = op(src1.x, src2.x);\r
}\r
{\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src.x);
+ dst.x = op(src.x);\r
if (mask(y, x_shifted + 1))\r
dst.y = op(src.y);\r
}\r
\r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src1.x, src2.x);
+ dst.x = op(src1.x, src2.x);\r
if (mask(y, x_shifted + 1))\r
dst.y = op(src1.y, src2.y);\r
}\r
{\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src.x);
+ dst.x = op(src.x);\r
if (mask(y, x_shifted + 1))\r
- dst.y = op(src.y);
+ dst.y = op(src.y);\r
if (mask(y, x_shifted + 2))\r
dst.z = op(src.z);\r
}\r
\r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src1.x, src2.x);
+ dst.x = op(src1.x, src2.x);\r
if (mask(y, x_shifted + 1))\r
- dst.y = op(src1.y, src2.y);
+ dst.y = op(src1.y, src2.y);\r
if (mask(y, x_shifted + 2))\r
dst.z = op(src1.z, src2.z);\r
}\r
{\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static __device__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src.x);
+ dst.x = op(src.x);\r
if (mask(y, x_shifted + 1))\r
- dst.y = op(src.y);
+ dst.y = op(src.y);\r
if (mask(y, x_shifted + 2))\r
- dst.z = op(src.z);
+ dst.z = op(src.z);\r
if (mask(y, x_shifted + 3))\r
dst.w = op(src.w);\r
}\r
\r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
static __device__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)\r
- {
+ {\r
if (mask(y, x_shifted))\r
- dst.x = op(src1.x, src2.x);
+ dst.x = op(src1.x, src2.x);\r
if (mask(y, x_shifted + 1))\r
- dst.y = op(src1.y, src2.y);
+ dst.y = op(src1.y, src2.y);\r
if (mask(y, x_shifted + 2))\r
- dst.z = op(src1.z, src2.z);
+ dst.z = op(src1.z, src2.z);\r
if (mask(y, x_shifted + 3))\r
dst.w = op(src1.w, src2.w);\r
}\r
};\r
\r
- template <typename T, typename D, typename UnOp, typename Mask>
- __global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
- {
- typedef typename UnReadWriteTraits<T, D>::read_type read_type;
- typedef typename UnReadWriteTraits<T, D>::write_type write_type;
- const int shift = UnReadWriteTraits<T, D>::shift;
-
- const int x = threadIdx.x + blockIdx.x * blockDim.x;
- const int y = threadIdx.y + blockIdx.y * blockDim.y;
- const int x_shifted = x * shift;
-
- if (y < src_.rows)
- {
- const T* src = src_.ptr(y);
- D* dst = dst_.ptr(y);
-
- if (x_shifted + shift - 1 < src_.cols)
- {
- read_type src_n_el = ((const read_type*)src)[x];
- write_type dst_n_el;
-
- OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
-
- ((write_type*)dst)[x] = dst_n_el;
- }
- else
- {
- for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
- {
- if (mask(y, real_x))
- dst[real_x] = op(src[real_x]);
- }
- }
- }
+ template <typename T, typename D, typename UnOp, typename Mask>\r
+ __global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)\r
+ {\r
+ typedef typename UnReadWriteTraits<T, D>::read_type read_type;\r
+ typedef typename UnReadWriteTraits<T, D>::write_type write_type;\r
+ const int shift = UnReadWriteTraits<T, D>::shift;\r
+\r
+ const int x = threadIdx.x + blockIdx.x * blockDim.x;\r
+ const int y = threadIdx.y + blockIdx.y * blockDim.y;\r
+ const int x_shifted = x * shift;\r
+\r
+ if (y < src_.rows)\r
+ {\r
+ const T* src = src_.ptr(y);\r
+ D* dst = dst_.ptr(y);\r
+\r
+ if (x_shifted + shift - 1 < src_.cols)\r
+ {\r
+ read_type src_n_el = ((const read_type*)src)[x];\r
+ write_type dst_n_el;\r
+\r
+ OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);\r
+\r
+ ((write_type*)dst)[x] = dst_n_el;\r
+ }\r
+ else\r
+ {\r
+ for (int real_x = x_shifted; real_x < src_.cols; ++real_x)\r
+ {\r
+ if (mask(y, real_x))\r
+ dst[real_x] = op(src[real_x]);\r
+ }\r
+ }\r
+ }\r
}\r
\r
template <typename T, typename D, typename UnOp, typename Mask>\r
}\r
}\r
\r
- template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
- __global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
- const Mask mask, BinOp op)
- {
- typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
- typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
- typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
- const int shift = BinReadWriteTraits<T1, T2, D>::shift;
-
- const int x = threadIdx.x + blockIdx.x * blockDim.x;
- const int y = threadIdx.y + blockIdx.y * blockDim.y;
- const int x_shifted = x * shift;
-
- if (y < src1_.rows)
- {
- const T1* src1 = src1_.ptr(y);
- const T2* src2 = src2_.ptr(y);
- D* dst = dst_.ptr(y);
-
- if (x_shifted + shift - 1 < src1_.cols)
- {
- read_type1 src1_n_el = ((const read_type1*)src1)[x];
- read_type2 src2_n_el = ((const read_type2*)src2)[x];
- write_type dst_n_el;
-
- OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
-
- ((write_type*)dst)[x] = dst_n_el;
- }
- else
- {
- for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
- {
- if (mask(y, real_x))
- dst[real_x] = op(src1[real_x], src2[real_x]);
- }
- }
- }
+ template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
+ __global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_, \r
+ const Mask mask, BinOp op)\r
+ {\r
+ typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;\r
+ typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;\r
+ typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;\r
+ const int shift = BinReadWriteTraits<T1, T2, D>::shift;\r
+\r
+ const int x = threadIdx.x + blockIdx.x * blockDim.x;\r
+ const int y = threadIdx.y + blockIdx.y * blockDim.y;\r
+ const int x_shifted = x * shift;\r
+\r
+ if (y < src1_.rows)\r
+ {\r
+ const T1* src1 = src1_.ptr(y);\r
+ const T2* src2 = src2_.ptr(y);\r
+ D* dst = dst_.ptr(y);\r
+\r
+ if (x_shifted + shift - 1 < src1_.cols)\r
+ {\r
+ read_type1 src1_n_el = ((const read_type1*)src1)[x];\r
+ read_type2 src2_n_el = ((const read_type2*)src2)[x];\r
+ write_type dst_n_el;\r
+ \r
+ OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);\r
+\r
+ ((write_type*)dst)[x] = dst_n_el;\r
+ }\r
+ else\r
+ {\r
+ for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)\r
+ {\r
+ if (mask(y, real_x))\r
+ dst[real_x] = op(src1[real_x], src2[real_x]);\r
+ }\r
+ }\r
+ }\r
}\r
\r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, \r
cudaStream_t stream = 0)\r
- {
+ {\r
const int shift = device::UnReadWriteTraits<T, D>::shift;\r
\r
dim3 threads(16, 16, 1);\r
- dim3 grid(1, 1, 1);
+ dim3 grid(1, 1, 1); \r
\r
grid.x = divUp(src.cols, threads.x * shift);\r
grid.y = divUp(src.rows, threads.y); \r
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>\r
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, \r
BinOp op, const Mask& mask, cudaStream_t stream = 0)\r
- {
+ {\r
const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;\r
\r
dim3 threads(16, 16, 1);\r
template <typename T, typename D, typename UnOp, typename Mask>\r
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, \r
cudaStream_t stream = 0)\r
- {
+ {\r
TransformChooser<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);\r
}\r
\r
#include "cufft.h"\r
#include "opencv2/gpu/stream_accessor.hpp"\r
#include "npp.h" \r
- #include "npp_staging.h"\r
-\r
- #include "nvidia/NCV.hpp"\r
+ \r
+ #include "nvidia/core/NCV.hpp"\r
+ #include "nvidia/NPP_staging/npp_staging.hpp"\r
#include "nvidia/NCVHaarObjectDetection.hpp"\r
\r
#define CUDART_MINIMUM_REQUIRED_VERSION 3020\r
{
_Tp a = src1[i], b = src2[i];
if( a < 0 ) a ^= C; if( b < 0 ) b ^= C;
- _Tp d = std::abs(a - b);
+ _Tp d = std::abs(double(a - b));
if( d > imaxdiff )
{
idx = i + startidx;
#
# ----------------------------------------------------------------------------
-add_subdirectory(c)
-add_subdirectory(cpp)
+#add_subdirectory(c)
+#add_subdirectory(cpp)
add_subdirectory(gpu)
if(0)
"${CMAKE_SOURCE_DIR}/modules/legacy/include"\r
"${CMAKE_SOURCE_DIR}/modules/contrib/include"\r
"${CMAKE_SOURCE_DIR}/modules/gpu/include"\r
+ "${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"\r
+ "${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"\r
) \r
\r
if(HAVE_CUDA)\r
- include_directories(${CUDA_INCLUDE_DIRS})\r
+ include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core)\r
endif()\r
\r
if(CMAKE_COMPILER_IS_GNUCXX)\r
--- /dev/null
+#pragma warning( disable : 4201 4408 4127 4100)\r
+#include <cstdio>\r
+\r
+#include "cvconfig.h"\r
+#if !defined(HAVE_CUDA)\r
+ int main( int argc, const char** argv ) { return printf("Please compile the librarary with CUDA support."), -1; }\r
+#else\r
+\r
+\r
+#include <cuda_runtime.h>\r
+#include "opencv2/opencv.hpp"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+\r
+using namespace cv;\r
+\r
+\r
+const Size2i preferredVideoFrameSize(640, 480);\r
+\r
+std::string preferredClassifier = "haarcascade_frontalface_alt.xml";\r
+std::string wndTitle = "NVIDIA Computer Vision SDK :: Face Detection in Video Feed";\r
+\r
+\r
+void printSyntax(void)\r
+{\r
+ printf("Syntax: FaceDetectionFeed.exe [-c cameranum | -v filename] classifier.xml\n");\r
+}\r
+\r
+void imagePrintf(Mat& img, int lineOffsY, Scalar color, const char *format, ...)\r
+{ \r
+ int fontFace = CV_FONT_HERSHEY_PLAIN;\r
+ double fontScale = 1; \r
+ \r
+ int baseline;\r
+ Size textSize = cv::getTextSize("T", fontFace, fontScale, 1, &baseline);\r
+\r
+ va_list arg_ptr;\r
+ va_start(arg_ptr, format);\r
+ int len = _vscprintf(format, arg_ptr) + 1;\r
+ \r
+ vector<char> strBuf(len); \r
+ vsprintf_s(&strBuf[0], len, format, arg_ptr);\r
+\r
+ Point org(1, 3 * textSize.height * (lineOffsY + 1) / 2); \r
+ putText(img, &strBuf[0], org, fontFace, fontScale, color);\r
+ va_end(arg_ptr); \r
+}\r
+\r
+NCVStatus process(Mat *srcdst,\r
+ Ncv32u width, Ncv32u height,\r
+ NcvBool bShowAllHypotheses, NcvBool bLargestFace,\r
+ HaarClassifierCascadeDescriptor &haar,\r
+ NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,\r
+ NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,\r
+ INCVMemAllocator &gpuAllocator,\r
+ INCVMemAllocator &cpuAllocator,\r
+ cudaDeviceProp &devProp)\r
+{\r
+ ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);\r
+\r
+ NCVStatus ncvStat;\r
+\r
+ NCV_SET_SKIP_COND(gpuAllocator.isCounting());\r
+\r
+ NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);\r
+ ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
+ NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);\r
+ ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
+ NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);\r
+ ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)\r
+ {\r
+ memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols);\r
+ }\r
+\r
+ ncvStat = h_src.copySolid(d_src, 0);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ NcvSize32u roi;\r
+ roi.width = d_src.width();\r
+ roi.height = d_src.height();\r
+\r
+ Ncv32u numDetections;\r
+ ncvStat = ncvDetectObjectsMultiScale_device(\r
+ d_src, roi, d_rects, numDetections, haar, h_haarStages,\r
+ d_haarStages, d_haarNodes, d_haarFeatures,\r
+ haar.ClassifierSize,\r
+ bShowAllHypotheses ? 0 : 4,\r
+ 1.2f, 1,\r
+ (bLargestFace ? NCVPipeObjDet_FindLargestObject : 0)\r
+ | NCVPipeObjDet_VisualizeInPlace,\r
+ gpuAllocator, cpuAllocator, devProp, 0);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvStat = d_src.copySolid(h_src, 0);\r
+ ncvAssertReturnNcvStat(ncvStat);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);\r
+\r
+ for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)\r
+ {\r
+ memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols);\r
+ }\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ return NCV_SUCCESS;\r
+}\r
+\r
+int main( int argc, const char** argv )\r
+{\r
+ NCVStatus ncvStat;\r
+\r
+ printf("NVIDIA Computer Vision SDK\n");\r
+ printf("Face Detection in video and live feed\n");\r
+ printf("=========================================\n");\r
+ printf(" Esc - Quit\n");\r
+ printf(" Space - Switch between NCV and OpenCV\n");\r
+ printf(" L - Switch between FullSearch and LargestFace modes\n");\r
+ printf(" U - Toggle unfiltered hypotheses visualization in FullSearch\n");\r
+ \r
+ VideoCapture capture; \r
+ bool bQuit = false;\r
+\r
+ Size2i frameSize;\r
+\r
+ if (argc != 4 && argc != 1)\r
+ {\r
+ printSyntax();\r
+ return -1;\r
+ }\r
+\r
+ if (argc == 1 || strcmp(argv[1], "-c") == 0)\r
+ {\r
+ // Camera input is specified\r
+ int camIdx = (argc == 3) ? atoi(argv[2]) : 0;\r
+ if(!capture.open(camIdx)) \r
+ return printf("Error opening camera\n"), -1; \r
+ \r
+ capture.set(CV_CAP_PROP_FRAME_WIDTH, preferredVideoFrameSize.width);\r
+ capture.set(CV_CAP_PROP_FRAME_HEIGHT, preferredVideoFrameSize.height);\r
+ capture.set(CV_CAP_PROP_FPS, 25);\r
+ frameSize = preferredVideoFrameSize;\r
+ }\r
+ else if (strcmp(argv[1], "-v") == 0)\r
+ {\r
+ // Video file input (avi)\r
+ if(!capture.open(argv[2]))\r
+ return printf("Error opening video file\n"), -1;\r
+\r
+ frameSize.width = (int)capture.get(CV_CAP_PROP_FRAME_WIDTH);\r
+ frameSize.height = (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT);\r
+ }\r
+ else\r
+ return printSyntax(), -1;\r
+\r
+ NcvBool bUseOpenCV = true;\r
+ NcvBool bLargestFace = false; //LargestFace=true is used usually during training\r
+ NcvBool bShowAllHypotheses = false;\r
+\r
+ CascadeClassifier classifierOpenCV;\r
+ std::string classifierFile;\r
+ if (argc == 1)\r
+ {\r
+ classifierFile = preferredClassifier;\r
+ }\r
+ else\r
+ {\r
+ classifierFile.assign(argv[3]);\r
+ }\r
+\r
+ if (!classifierOpenCV.load(classifierFile))\r
+ {\r
+ printf("Error (in OpenCV) opening classifier\n");\r
+ printSyntax();\r
+ return -1;\r
+ }\r
+\r
+ int devId;\r
+ ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);\r
+ cudaDeviceProp devProp;\r
+ ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);\r
+ printf("Using GPU %d %s, arch=%d.%d\n", devId, devProp.name, devProp.major, devProp.minor);\r
+\r
+ //==============================================================================\r
+ //\r
+ // Load the classifier from file (assuming its size is about 1 mb)\r
+ // using a simple allocator\r
+ //\r
+ //==============================================================================\r
+\r
+ NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, devProp.textureAlignment);\r
+ ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);\r
+ NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment);\r
+ ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);\r
+\r
+ Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;\r
+ ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);\r
+\r
+ NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);\r
+ ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
+ NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);\r
+ ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
+ NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);\r
+ ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);\r
+\r
+ HaarClassifierCascadeDescriptor haar;\r
+ ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, h_haarStages, h_haarNodes, h_haarFeatures);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);\r
+\r
+ NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);\r
+ ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
+ NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);\r
+ ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
+ NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);\r
+ ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);\r
+\r
+ ncvStat = h_haarStages.copySolid(d_haarStages, 0);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
+ ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
+ ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);\r
+\r
+ //==============================================================================\r
+ //\r
+ // Calculate memory requirements and create real allocators\r
+ //\r
+ //==============================================================================\r
+\r
+ NCVMemStackAllocator gpuCounter(devProp.textureAlignment);\r
+ ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);\r
+ NCVMemStackAllocator cpuCounter(devProp.textureAlignment);\r
+ ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);\r
+\r
+ ncvStat = process(NULL, frameSize.width, frameSize.height,\r
+ false, false, haar,\r
+ d_haarStages, d_haarNodes,\r
+ d_haarFeatures, h_haarStages,\r
+ gpuCounter, cpuCounter, devProp);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);\r
+\r
+ NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), devProp.textureAlignment);\r
+ ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);\r
+ NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), devProp.textureAlignment);\r
+ ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);\r
+\r
+ printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);\r
+\r
+ //==============================================================================\r
+ //\r
+ // Main processing loop\r
+ //\r
+ //==============================================================================\r
+\r
+ namedWindow(wndTitle, 1);\r
+ Mat frame, gray, frameDisp;\r
+\r
+ do\r
+ {\r
+ // For camera and video file, capture the next image \r
+ capture >> frame;\r
+ if (frame.empty())\r
+ break;\r
+\r
+ Mat gray;\r
+ cvtColor(frame, gray, CV_BGR2GRAY);\r
+\r
+ //\r
+ // process\r
+ //\r
+\r
+ NcvSize32u minSize = haar.ClassifierSize;\r
+ if (bLargestFace)\r
+ {\r
+ Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;\r
+ Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;\r
+ Ncv32u ratioSmallest = std::min(ratioX, ratioY);\r
+ ratioSmallest = std::max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1);\r
+ minSize.width *= ratioSmallest;\r
+ minSize.height *= ratioSmallest;\r
+ }\r
+\r
+ Ncv32f avgTime;\r
+ NcvTimer timer = ncvStartTimer();\r
+\r
+ if (!bUseOpenCV)\r
+ {\r
+ ncvStat = process(&gray, frameSize.width, frameSize.height,\r
+ bShowAllHypotheses, bLargestFace, haar,\r
+ d_haarStages, d_haarNodes,\r
+ d_haarFeatures, h_haarStages,\r
+ gpuAllocator, cpuAllocator, devProp);\r
+ ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);\r
+ }\r
+ else\r
+ {\r
+ vector<Rect> rectsOpenCV;\r
+\r
+ classifierOpenCV.detectMultiScale(\r
+ gray,\r
+ rectsOpenCV,\r
+ 1.2f,\r
+ bShowAllHypotheses && !bLargestFace ? 0 : 4,\r
+ (bLargestFace ? CV_HAAR_FIND_BIGGEST_OBJECT : 0)\r
+ | CV_HAAR_SCALE_IMAGE,\r
+ Size(minSize.width, minSize.height));\r
+\r
+ for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)\r
+ rectangle(gray, rectsOpenCV[rt], Scalar(255));\r
+ }\r
+\r
+ avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);\r
+\r
+ cvtColor(gray, frameDisp, CV_GRAY2BGR);\r
+\r
+ imagePrintf(frameDisp, 0, CV_RGB(255, 0,0), "Space - Switch NCV%s / OpenCV%s", bUseOpenCV?"":" (ON)", bUseOpenCV?" (ON)":"");\r
+ imagePrintf(frameDisp, 1, CV_RGB(255, 0,0), "L - Switch FullSearch%s / LargestFace%s modes", bLargestFace?"":" (ON)", bLargestFace?" (ON)":"");\r
+ imagePrintf(frameDisp, 2, CV_RGB(255, 0,0), "U - Toggle unfiltered hypotheses visualization in FullSearch %s", bShowAllHypotheses?"(ON)":"(OFF)");\r
+ imagePrintf(frameDisp, 3, CV_RGB(118,185,0), " Running at %f FPS on %s", 1000.0f / avgTime, bUseOpenCV?"CPU":"GPU");\r
+\r
+ cv::imshow(wndTitle, frameDisp);\r
+\r
+ switch (cvWaitKey(1))\r
+ {\r
+ case ' ':\r
+ bUseOpenCV = !bUseOpenCV;\r
+ break;\r
+ case 'L':\r
+ case 'l':\r
+ bLargestFace = !bLargestFace;\r
+ break;\r
+ case 'U':\r
+ case 'u':\r
+ bShowAllHypotheses = !bShowAllHypotheses;\r
+ break;\r
+ case 27:\r
+ bQuit = true;\r
+ break;\r
+ }\r
+\r
+ } while (!bQuit);\r
+\r
+ cvDestroyWindow(wndTitle.c_str());\r
+\r
+ return 0;\r
+}\r
+\r
+\r
+#endif
\ No newline at end of file
# ----------------------------------------------------------------------------
project(opencv_test_gpu)
+set(the_target "opencv_test_gpu")
+
file(GLOB test_srcs "src/*.cpp")
-source_group("Src" FILES ${test_srcs})
file(GLOB test_hdrs "src/*.h*")
+source_group("Src" FILES ${test_srcs})
source_group("Include" FILES ${test_hdrs})
-set(the_target "opencv_test_gpu")
+
+
include_directories (
"${CMAKE_SOURCE_DIR}/include/opencv"
"${CMAKE_SOURCE_DIR}/modules/ml/include"
"${CMAKE_CURRENT_SOURCE_DIR}/src"
"${CMAKE_CURRENT_BINARY_DIR}"
+ "${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia"
+ "${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core"
)
include_directories(../cxts)
-add_executable(${the_target} ${test_srcs} ${test_hdrs})
+if(HAVE_CUDA)
+ include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/core ${CMAKE_SOURCE_DIR}/modules/gpu/src/nvidia/NPP_staging)
+
+ file(GLOB nvidia "src/nvidia/*.*")
+ SET(ncv_cpp ../../modules/gpu/src/nvidia/core/NCV.cpp)
+ source_group("Src\\nvidia" FILES ${nvidia})
+endif()
+
+add_executable(${the_target} ${test_srcs} ${test_hdrs} ${nvidia} ${ncv_cpp})
# Additional target properties
set_target_properties(${the_target} PROPERTIES
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _ncvautotestlister_hpp_\r
+#define _ncvautotestlister_hpp_\r
+\r
+#include <vector>\r
+\r
+#include "NCVTest.hpp"\r
+\r
+class NCVAutoTestLister\r
+{\r
+public:\r
+\r
+ NCVAutoTestLister(std::string testSuiteName, NcvBool bStopOnFirstFail=false, NcvBool bCompactOutput=true)\r
+ :\r
+ testSuiteName(testSuiteName),\r
+ bStopOnFirstFail(bStopOnFirstFail),\r
+ bCompactOutput(bCompactOutput)\r
+ {\r
+ }\r
+\r
+ void add(INCVTest *test)\r
+ {\r
+ this->tests.push_back(test);\r
+ }\r
+\r
+ void invoke()\r
+ {\r
+ Ncv32u nPassed = 0;\r
+ Ncv32u nFailed = 0;\r
+ Ncv32u nFailedMem = 0;\r
+\r
+ if (bCompactOutput)\r
+ {\r
+ printf("Test suite '%s' with %d tests\n", \r
+ testSuiteName.c_str(),\r
+ (int)(this->tests.size()));\r
+ }\r
+\r
+ for (Ncv32u i=0; i<this->tests.size(); i++)\r
+ {\r
+ INCVTest &curTest = *tests[i];\r
+\r
+ NCVTestReport curReport;\r
+ bool res = curTest.executeTest(curReport);\r
+\r
+ if (!bCompactOutput)\r
+ {\r
+ printf("Test %3i %16s; Consumed mem GPU = %8d, CPU = %8d; %s\n",\r
+ i,\r
+ curTest.getName().c_str(),\r
+ curReport.statsNums["MemGPU"],\r
+ curReport.statsNums["MemCPU"],\r
+ curReport.statsText["rcode"].c_str());\r
+ }\r
+\r
+ if (res)\r
+ {\r
+ nPassed++;\r
+ if (bCompactOutput)\r
+ {\r
+ printf(".");\r
+ }\r
+ }\r
+ else\r
+ {\r
+ if (!curReport.statsText["rcode"].compare("FAILED"))\r
+ {\r
+ nFailed++;\r
+ if (bCompactOutput)\r
+ {\r
+ printf("x");\r
+ }\r
+ if (bStopOnFirstFail)\r
+ {\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ nFailedMem++;\r
+ if (bCompactOutput)\r
+ {\r
+ printf("m");\r
+ }\r
+ }\r
+ }\r
+ fflush(stdout);\r
+ }\r
+ if (bCompactOutput)\r
+ {\r
+ printf("\n");\r
+ }\r
+\r
+ printf("Test suite '%s' complete: %d total, %d passed, %d memory errors, %d failed\n\n", \r
+ testSuiteName.c_str(),\r
+ (int)(this->tests.size()),\r
+ nPassed,\r
+ nFailedMem,\r
+ nFailed);\r
+ }\r
+\r
+ ~NCVAutoTestLister()\r
+ {\r
+ for (Ncv32u i=0; i<this->tests.size(); i++)\r
+ {\r
+ delete tests[i];\r
+ }\r
+ }\r
+\r
+private:\r
+\r
+ NcvBool bStopOnFirstFail;\r
+ NcvBool bCompactOutput;\r
+ std::string testSuiteName;\r
+ std::vector<INCVTest *> tests;\r
+};\r
+\r
+#endif // _ncvautotestlister_hpp_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _ncvtest_hpp_\r
+#define _ncvtest_hpp_\r
+\r
+#pragma warning( disable : 4201 4408 4127 4100)\r
+\r
+#include <string>\r
+#include <vector>\r
+#include <map>\r
+#include <memory>\r
+#include <algorithm>\r
+#include <fstream>\r
+\r
+#include <cuda_runtime.h>\r
+#include "NPP_staging.hpp"\r
+\r
+\r
+struct NCVTestReport\r
+{\r
+ std::map<std::string, Ncv32u> statsNums;\r
+ std::map<std::string, std::string> statsText;\r
+};\r
+\r
+\r
+class INCVTest\r
+{\r
+public:\r
+ virtual bool executeTest(NCVTestReport &report) = 0;\r
+ virtual std::string getName() const = 0;\r
+};\r
+\r
+\r
+class NCVTestProvider : public INCVTest\r
+{\r
+public:\r
+\r
+ NCVTestProvider(std::string testName)\r
+ :\r
+ testName(testName)\r
+ {\r
+ int devId;\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&this->devProp, devId), "Error returned from cudaGetDeviceProperties", );\r
+ }\r
+\r
+ virtual bool init() = 0;\r
+ virtual bool process() = 0;\r
+ virtual bool deinit() = 0;\r
+ virtual bool toString(std::ofstream &strOut) = 0;\r
+\r
+ virtual std::string getName() const\r
+ {\r
+ return this->testName;\r
+ }\r
+\r
+ virtual ~NCVTestProvider()\r
+ {\r
+ deinitMemory();\r
+ }\r
+\r
+ virtual bool executeTest(NCVTestReport &report)\r
+ {\r
+ bool res;\r
+ report.statsText["rcode"] = "FAILED";\r
+\r
+ res = initMemory(report);\r
+ if (!res)\r
+ {\r
+ dumpToFile(report);\r
+ deinitMemory();\r
+ return false;\r
+ }\r
+\r
+ res = init();\r
+ if (!res)\r
+ {\r
+ dumpToFile(report);\r
+ deinit();\r
+ deinitMemory();\r
+ return false;\r
+ }\r
+\r
+ res = process();\r
+ if (!res)\r
+ {\r
+ dumpToFile(report);\r
+ deinit();\r
+ deinitMemory();\r
+ return false;\r
+ }\r
+\r
+ res = deinit();\r
+ if (!res)\r
+ {\r
+ dumpToFile(report);\r
+ deinitMemory();\r
+ return false;\r
+ }\r
+\r
+ deinitMemory();\r
+\r
+ report.statsText["rcode"] = "Passed";\r
+ return true;\r
+ }\r
+\r
+protected:\r
+\r
+ cudaDeviceProp devProp;\r
+ std::auto_ptr<INCVMemAllocator> allocatorGPU;\r
+ std::auto_ptr<INCVMemAllocator> allocatorCPU;\r
+\r
+private:\r
+\r
+ std::string testName;\r
+\r
+ bool initMemory(NCVTestReport &report)\r
+ {\r
+ this->allocatorGPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));\r
+ this->allocatorCPU.reset(new NCVMemStackAllocator(devProp.textureAlignment));\r
+\r
+ if (!this->allocatorGPU.get()->isInitialized() ||\r
+ !this->allocatorCPU.get()->isInitialized())\r
+ {\r
+ report.statsText["rcode"] = "Memory FAILED";\r
+ return false;\r
+ }\r
+\r
+ if (!this->process())\r
+ {\r
+ report.statsText["rcode"] = "Memory FAILED";\r
+ return false;\r
+ }\r
+\r
+ Ncv32u maxGPUsize = (Ncv32u)this->allocatorGPU.get()->maxSize();\r
+ Ncv32u maxCPUsize = (Ncv32u)this->allocatorCPU.get()->maxSize();\r
+\r
+ report.statsNums["MemGPU"] = maxGPUsize;\r
+ report.statsNums["MemCPU"] = maxCPUsize;\r
+\r
+ this->allocatorGPU.reset(new NCVMemStackAllocator(NCVMemoryTypeDevice, maxGPUsize, devProp.textureAlignment));\r
+\r
+ this->allocatorCPU.reset(new NCVMemStackAllocator(NCVMemoryTypeHostPinned, maxCPUsize, devProp.textureAlignment));\r
+\r
+ if (!this->allocatorGPU.get()->isInitialized() ||\r
+ !this->allocatorCPU.get()->isInitialized())\r
+ {\r
+ report.statsText["rcode"] = "Memory FAILED";\r
+ return false;\r
+ }\r
+\r
+ return true;\r
+ }\r
+\r
+ void deinitMemory()\r
+ {\r
+ this->allocatorGPU.reset();\r
+ this->allocatorCPU.reset();\r
+ }\r
+\r
+ void dumpToFile(NCVTestReport &report)\r
+ {\r
+ bool bReasonMem = (0 == report.statsText["rcode"].compare("Memory FAILED"));\r
+ std::string fname = "TestDump_";\r
+ fname += (bReasonMem ? "m_" : "") + this->testName + ".log";\r
+ std::ofstream stream(fname.c_str(), std::ios::trunc | std::ios::out);\r
+ if (!stream.is_open()) return;\r
+\r
+ stream << "NCV Test Failure Log: " << this->testName << std::endl;\r
+ stream << "====================================================" << std::endl << std::endl;\r
+ stream << "Test initialization report: " << std::endl;\r
+ for (std::map<std::string,std::string>::iterator it=report.statsText.begin();\r
+ it != report.statsText.end(); it++)\r
+ {\r
+ stream << it->first << "=" << it->second << std::endl;\r
+ }\r
+ for (std::map<std::string,Ncv32u>::iterator it=report.statsNums.begin();\r
+ it != report.statsNums.end(); it++)\r
+ {\r
+ stream << it->first << "=" << it->second << std::endl;\r
+ }\r
+ stream << std::endl;\r
+\r
+ stream << "Test initialization parameters: " << std::endl;\r
+ bool bSerializeRes = false;\r
+ try\r
+ {\r
+ bSerializeRes = this->toString(stream);\r
+ }\r
+ catch (...)\r
+ {\r
+ }\r
+\r
+ if (!bSerializeRes)\r
+ {\r
+ stream << "Couldn't retrieve object dump" << std::endl;\r
+ }\r
+\r
+ stream.flush();\r
+ }\r
+};\r
+\r
+#endif // _ncvtest_hpp_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _ncvtestsourceprovider_hpp_\r
+#define _ncvtestsourceprovider_hpp_\r
+\r
+#include <memory>\r
+\r
+#include "NCV.hpp"\r
+#include <opencv2/highgui/highgui.hpp>\r
+\r
+\r
+template <class T>\r
+class NCVTestSourceProvider\r
+{\r
+public:\r
+\r
+ NCVTestSourceProvider(Ncv32u seed, T rangeLow, T rangeHigh, Ncv32u maxWidth, Ncv32u maxHeight)\r
+ :\r
+ bInit(false)\r
+ {\r
+ ncvAssertPrintReturn(rangeLow < rangeHigh, "NCVTestSourceProvider ctor:: Invalid range", );\r
+\r
+ int devId;\r
+ cudaDeviceProp devProp;\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );\r
+\r
+ //Ncv32u maxWpitch = alignUp(maxWidth * sizeof(T), devProp.textureAlignment);\r
+\r
+ allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));\r
+ data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), maxWidth, maxHeight));\r
+ ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );\r
+\r
+ this->dataWidth = maxWidth;\r
+ this->dataHeight = maxHeight;\r
+\r
+ srand(seed);\r
+\r
+ for (Ncv32u i=0; i<maxHeight; i++)\r
+ {\r
+ for (Ncv32u j=0; j<data.get()->stride(); j++)\r
+ {\r
+ data.get()->ptr()[i * data.get()->stride() + j] =\r
+ (T)(((1.0 * rand()) / RAND_MAX) * (rangeHigh - rangeLow) + rangeLow);\r
+ }\r
+ }\r
+\r
+ this->bInit = true;\r
+ }\r
+\r
+ NCVTestSourceProvider(std::string pgmFilename)\r
+ :\r
+ bInit(false)\r
+ {\r
+ ncvAssertPrintReturn(sizeof(T) == 1, "NCVTestSourceProvider ctor:: PGM constructor complies only with 8bit types", );\r
+\r
+ cv::Mat image = cv::imread(pgmFilename); \r
+ ncvAssertPrintReturn(!image.empty(), "NCVTestSourceProvider ctor:: PGM file error", );\r
+\r
+ int devId;\r
+ cudaDeviceProp devProp;\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDevice(&devId), "Error returned from cudaGetDevice", );\r
+ ncvAssertPrintReturn(cudaSuccess == cudaGetDeviceProperties(&devProp, devId), "Error returned from cudaGetDeviceProperties", );\r
+\r
+ allocatorCPU.reset(new NCVMemNativeAllocator(NCVMemoryTypeHostPinned, devProp.textureAlignment));\r
+ data.reset(new NCVMatrixAlloc<T>(*this->allocatorCPU.get(), image.cols, image.rows));\r
+ ncvAssertPrintReturn(data.get()->isMemAllocated(), "NCVTestSourceProvider ctor:: Matrix not allocated", );\r
+\r
+ this->dataWidth = image.cols;\r
+ this->dataHeight = image.rows;\r
+\r
+ cv::Mat hdr(image.size(), CV_8UC1, data.get()->ptr(), data.get()->pitch());\r
+ image.copyTo(hdr);\r
+ \r
+ this->bInit = true;\r
+ }\r
+\r
+ NcvBool fill(NCVMatrix<T> &dst)\r
+ {\r
+ ncvAssertReturn(this->isInit() &&\r
+ dst.memType() == allocatorCPU.get()->memType(), false);\r
+\r
+ if (dst.width() == 0 || dst.height() == 0)\r
+ {\r
+ return true;\r
+ }\r
+\r
+ for (Ncv32u i=0; i<dst.height(); i++)\r
+ {\r
+ Ncv32u srcLine = i % this->dataHeight;\r
+\r
+ Ncv32u srcFullChunks = dst.width() / this->dataWidth;\r
+ for (Ncv32u j=0; j<srcFullChunks; j++)\r
+ {\r
+ memcpy(dst.ptr() + i * dst.stride() + j * this->dataWidth,\r
+ this->data.get()->ptr() + this->data.get()->stride() * srcLine,\r
+ this->dataWidth * sizeof(T));\r
+ }\r
+\r
+ Ncv32u srcLastChunk = dst.width() % this->dataWidth;\r
+ memcpy(dst.ptr() + i * dst.stride() + srcFullChunks * this->dataWidth,\r
+ this->data.get()->ptr() + this->data.get()->stride() * srcLine,\r
+ srcLastChunk * sizeof(T));\r
+ }\r
+\r
+ return true;\r
+ }\r
+\r
+ NcvBool fill(NCVVector<T> &dst)\r
+ {\r
+ ncvAssertReturn(this->isInit() &&\r
+ dst.memType() == allocatorCPU.get()->memType(), false);\r
+\r
+ if (dst.length() == 0)\r
+ {\r
+ return true;\r
+ }\r
+\r
+ Ncv32u srcLen = this->dataWidth * this->dataHeight;\r
+\r
+ Ncv32u srcFullChunks = (Ncv32u)dst.length() / srcLen;\r
+ for (Ncv32u j=0; j<srcFullChunks; j++)\r
+ {\r
+ memcpy(dst.ptr() + j * srcLen, this->data.get()->ptr(), srcLen * sizeof(T));\r
+ }\r
+\r
+ Ncv32u srcLastChunk = dst.length() % srcLen;\r
+ memcpy(dst.ptr() + srcFullChunks * srcLen, this->data.get()->ptr(), srcLastChunk * sizeof(T));\r
+\r
+ return true;\r
+ }\r
+\r
+ ~NCVTestSourceProvider()\r
+ {\r
+ data.reset();\r
+ allocatorCPU.reset();\r
+ }\r
+\r
+private:\r
+\r
+ NcvBool isInit(void)\r
+ {\r
+ return this->bInit;\r
+ }\r
+\r
+ NcvBool bInit;\r
+ std::auto_ptr< INCVMemAllocator > allocatorCPU;\r
+ std::auto_ptr< NCVMatrixAlloc<T> > data;\r
+ Ncv32u dataWidth;\r
+ Ncv32u dataHeight;\r
+};\r
+\r
+#endif // _ncvtestsourceprovider_hpp_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestCompact.h"\r
+\r
+\r
+TestCompact::TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ length(length),\r
+ badElem(badElem),\r
+ badElemPercentage(badElemPercentage > 100 ? 100 : badElemPercentage)\r
+{\r
+}\r
+\r
+\r
+bool TestCompact::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "length=" << length << std::endl;\r
+ strOut << "badElem=" << badElem << std::endl;\r
+ strOut << "badElemPercentage=" << badElemPercentage << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestCompact::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestCompact::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->length);\r
+ ncvAssertReturn(h_vecSrc.isMemAllocated(), false);\r
+ NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->length);\r
+ ncvAssertReturn(d_vecSrc.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<Ncv32u> h_vecDst(*this->allocatorCPU.get(), this->length);\r
+ ncvAssertReturn(h_vecDst.isMemAllocated(), false);\r
+ NCVVectorAlloc<Ncv32u> d_vecDst(*this->allocatorGPU.get(), this->length);\r
+ ncvAssertReturn(d_vecDst.isMemAllocated(), false);\r
+ NCVVectorAlloc<Ncv32u> h_vecDst_d(*this->allocatorCPU.get(), this->length);\r
+ ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_vecSrc), false);\r
+ for (Ncv32u i=0; i<this->length; i++)\r
+ {\r
+ Ncv32u tmp = (h_vecSrc.ptr()[i]) & 0xFF;\r
+ tmp = tmp * 99 / 255;\r
+ if (tmp < this->badElemPercentage)\r
+ {\r
+ h_vecSrc.ptr()[i] = this->badElem;\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ NCVVectorAlloc<Ncv32u> h_dstLen(*this->allocatorCPU.get(), 1);\r
+ ncvAssertReturn(h_dstLen.isMemAllocated(), false);\r
+ Ncv32u bufSize;\r
+ ncvStat = nppsStCompactGetSize_32u(this->length, &bufSize, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);\r
+ ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);\r
+\r
+ Ncv32u h_outElemNum_h = 0;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ ncvStat = nppsStCompact_32u(d_vecSrc.ptr(), this->length,\r
+ d_vecDst.ptr(), h_dstLen.ptr(), this->badElem,\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppsStCompact_32u_host(h_vecSrc.ptr(), this->length, h_vecDst.ptr(), &h_outElemNum_h, this->badElem);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (h_dstLen.ptr()[0] != h_outElemNum_h)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ else\r
+ {\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_outElemNum_h; i++)\r
+ {\r
+ if (h_vecDst.ptr()[i] != h_vecDst_d.ptr()[i])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestCompact::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testhypothesescompact_h_\r
+#define _testhypothesescompact_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestCompact : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestCompact(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u length, Ncv32u badElem, Ncv32u badElemPercentage);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestCompact(const TestCompact&);\r
+ TestCompact& operator=(const TestCompact&); \r
+\r
+\r
+ NCVTestSourceProvider<Ncv32u> &src;\r
+ Ncv32u length;\r
+ Ncv32u badElem;\r
+ Ncv32u badElemPercentage;\r
+};\r
+\r
+#endif // _testhypothesescompact_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestDrawRects.h"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+template <class T>\r
+TestDrawRects<T>::TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,\r
+ Ncv32u width, Ncv32u height, Ncv32u numRects, T color)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ src32u(src32u),\r
+ width(width),\r
+ height(height),\r
+ numRects(numRects),\r
+ color(color)\r
+{\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestDrawRects<T>::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "sizeof(T)=" << sizeof(T) << std::endl;\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "height=" << height << std::endl;\r
+ strOut << "numRects=" << numRects << std::endl;\r
+ strOut << "color=" << color << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestDrawRects<T>::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestDrawRects<T>::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_img_d(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img_d.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<NcvRect32u> d_rects(*this->allocatorGPU.get(), this->numRects);\r
+ ncvAssertReturn(d_rects.isMemAllocated(), false);\r
+ NCVVectorAlloc<NcvRect32u> h_rects(*this->allocatorCPU.get(), this->numRects);\r
+ ncvAssertReturn(h_rects.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ //fill vector of rectangles with random rects covering the input\r
+ NCVVectorReuse<Ncv32u> h_rects_as32u(h_rects.getSegment());\r
+ ncvAssertReturn(h_rects_as32u.isMemReused(), false);\r
+ ncvAssertReturn(this->src32u.fill(h_rects_as32u), false);\r
+ for (Ncv32u i=0; i<this->numRects; i++)\r
+ {\r
+ h_rects.ptr()[i].x = (Ncv32u)(((1.0 * h_rects.ptr()[i].x) / RAND_MAX) * (this->width-2));\r
+ h_rects.ptr()[i].y = (Ncv32u)(((1.0 * h_rects.ptr()[i].y) / RAND_MAX) * (this->height-2));\r
+ h_rects.ptr()[i].width = (Ncv32u)(((1.0 * h_rects.ptr()[i].width) / RAND_MAX) * (this->width+10 - h_rects.ptr()[i].x));\r
+ h_rects.ptr()[i].height = (Ncv32u)(((1.0 * h_rects.ptr()[i].height) / RAND_MAX) * (this->height+10 - h_rects.ptr()[i].y));\r
+ }\r
+ ncvStat = h_rects.copySolid(d_rects, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = ncvDrawRects_32u_device((Ncv32u *)d_img.ptr(), d_img.stride(), this->width, this->height,\r
+ (NcvRect32u *)d_rects.ptr(), this->numRects, this->color, 0);\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv8u))\r
+ {\r
+ ncvStat = ncvDrawRects_8u_device((Ncv8u *)d_img.ptr(), d_img.stride(), this->width, this->height,\r
+ (NcvRect32u *)d_rects.ptr(), this->numRects, (Ncv8u)this->color, 0);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ ncvStat = d_img.copySolid(h_img_d, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = ncvDrawRects_32u_host((Ncv32u *)h_img.ptr(), h_img.stride(), this->width, this->height,\r
+ (NcvRect32u *)h_rects.ptr(), this->numRects, this->color);\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv8u))\r
+ {\r
+ ncvStat = ncvDrawRects_8u_host((Ncv8u *)h_img.ptr(), h_img.stride(), this->width, this->height,\r
+ (NcvRect32u *)h_rects.ptr(), this->numRects, (Ncv8u)this->color);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect drawrects test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ //const Ncv64f relEPS = 0.005;\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_img.height(); i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < h_img.width(); j++)\r
+ {\r
+ if (h_img.ptr()[h_img.stride()*i+j] != h_img_d.ptr()[h_img_d.stride()*i+j])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestDrawRects<T>::deinit()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template class TestDrawRects<Ncv8u>;\r
+template class TestDrawRects<Ncv32u>;\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testdrawrects_h_\r
+#define _testdrawrects_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+template <class T>\r
+class TestDrawRects : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestDrawRects(std::string testName, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,\r
+ Ncv32u width, Ncv32u height, Ncv32u numRects, T color);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+\r
+ TestDrawRects(const TestDrawRects&);\r
+ TestDrawRects& operator=(const TestDrawRects&); \r
+\r
+ NCVTestSourceProvider<T> &src;\r
+ NCVTestSourceProvider<Ncv32u> &src32u;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+ Ncv32u numRects;\r
+ T color;\r
+};\r
+\r
+#endif // _testdrawrects_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include <float.h>\r
+#include "TestHaarCascadeApplication.h"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+TestHaarCascadeApplication::TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ std::string cascadeName, Ncv32u width, Ncv32u height)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ cascadeName(cascadeName),\r
+ width(width),\r
+ height(height)\r
+{\r
+}\r
+\r
+\r
+bool TestHaarCascadeApplication::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "cascadeName=" << cascadeName << std::endl;\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "height=" << height << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHaarCascadeApplication::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHaarCascadeApplication::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32u numStages, numNodes, numFeatures;\r
+\r
+ ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);\r
+ ncvAssertReturn(h_HaarStages.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);\r
+ ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);\r
+ ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<HaarStage64> d_HaarStages(*this->allocatorGPU.get(), numStages);\r
+ ncvAssertReturn(d_HaarStages.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarClassifierNode128> d_HaarNodes(*this->allocatorGPU.get(), numNodes);\r
+ ncvAssertReturn(d_HaarNodes.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarFeature64> d_HaarFeatures(*this->allocatorGPU.get(), numFeatures);\r
+ ncvAssertReturn(d_HaarFeatures.isMemAllocated(), false);\r
+\r
+ HaarClassifierCascadeDescriptor haar;\r
+ haar.ClassifierSize.width = haar.ClassifierSize.height = 1;\r
+ haar.bNeedsTiltedII = false;\r
+ haar.NumClassifierRootNodes = numNodes;\r
+ haar.NumClassifierTotalNodes = numNodes;\r
+ haar.NumFeatures = numFeatures;\r
+ haar.NumStages = numStages;\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ ncvAssertReturn(NCV_SUCCESS == h_HaarStages.copySolid(d_HaarStages, 0), false);\r
+ ncvAssertReturn(NCV_SUCCESS == h_HaarNodes.copySolid(d_HaarNodes, 0), false);\r
+ ncvAssertReturn(NCV_SUCCESS == h_HaarFeatures.copySolid(d_HaarFeatures, 0), false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ NcvSize32s srcRoi, srcIIRoi, searchRoi;\r
+ srcRoi.width = this->width;\r
+ srcRoi.height = this->height;\r
+ srcIIRoi.width = srcRoi.width + 1;\r
+ srcIIRoi.height = srcRoi.height + 1;\r
+ searchRoi.width = srcIIRoi.width - haar.ClassifierSize.width;\r
+ searchRoi.height = srcIIRoi.height - haar.ClassifierSize.height;\r
+ if (searchRoi.width <= 0 || searchRoi.height <= 0)\r
+ {\r
+ return false;\r
+ }\r
+ NcvSize32u searchRoiU(searchRoi.width, searchRoi.height);\r
+\r
+ NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+\r
+ Ncv32u integralWidth = this->width + 1;\r
+ Ncv32u integralHeight = this->height + 1;\r
+\r
+ NCVMatrixAlloc<Ncv32u> d_integralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);\r
+ ncvAssertReturn(d_integralImage.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> d_sqIntegralImage(*this->allocatorGPU.get(), integralWidth, integralHeight);\r
+ ncvAssertReturn(d_sqIntegralImage.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32u> h_integralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);\r
+ ncvAssertReturn(h_integralImage.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> h_sqIntegralImage(*this->allocatorCPU.get(), integralWidth, integralHeight);\r
+ ncvAssertReturn(h_sqIntegralImage.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<Ncv32f> d_rectStdDev(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_rectStdDev.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32u> d_pixelMask(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_pixelMask.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32f> h_rectStdDev(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_rectStdDev.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32u> h_pixelMask(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_pixelMask.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<NcvRect32u> d_hypotheses(*this->allocatorGPU.get(), this->width * this->height);\r
+ ncvAssertReturn(d_hypotheses.isMemAllocated(), false);\r
+ NCVVectorAlloc<NcvRect32u> h_hypotheses(*this->allocatorCPU.get(), this->width * this->height);\r
+ ncvAssertReturn(h_hypotheses.isMemAllocated(), false);\r
+\r
+ NCVStatus nppStat;\r
+ Ncv32u szTmpBufIntegral, szTmpBufSqIntegral;\r
+ nppStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &szTmpBufIntegral, this->devProp);\r
+ ncvAssertReturn(nppStat == NPPST_SUCCESS, false);\r
+ nppStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &szTmpBufSqIntegral, this->devProp);\r
+ ncvAssertReturn(nppStat == NPPST_SUCCESS, false);\r
+ NCVVectorAlloc<Ncv8u> d_tmpIIbuf(*this->allocatorGPU.get(), std::max(szTmpBufIntegral, szTmpBufSqIntegral));\r
+ ncvAssertReturn(d_tmpIIbuf.isMemAllocated(), false);\r
+\r
+ Ncv32u detectionsOnThisScale_d = 0;\r
+ Ncv32u detectionsOnThisScale_h = 0;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ nppStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),\r
+ d_integralImage.ptr(), d_integralImage.pitch(),\r
+ NcvSize32u(d_img.width(), d_img.height()),\r
+ d_tmpIIbuf.ptr(), szTmpBufIntegral, this->devProp);\r
+ ncvAssertReturn(nppStat == NPPST_SUCCESS, false);\r
+\r
+ nppStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),\r
+ d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),\r
+ NcvSize32u(d_img.width(), d_img.height()),\r
+ d_tmpIIbuf.ptr(), szTmpBufSqIntegral, this->devProp);\r
+ ncvAssertReturn(nppStat == NPPST_SUCCESS, false);\r
+\r
+ const NcvRect32u rect(\r
+ HAAR_STDDEV_BORDER,\r
+ HAAR_STDDEV_BORDER,\r
+ haar.ClassifierSize.width - 2*HAAR_STDDEV_BORDER,\r
+ haar.ClassifierSize.height - 2*HAAR_STDDEV_BORDER);\r
+ nppStat = nppiStRectStdDev_32f_C1R(\r
+ d_integralImage.ptr(), d_integralImage.pitch(),\r
+ d_sqIntegralImage.ptr(), d_sqIntegralImage.pitch(),\r
+ d_rectStdDev.ptr(), d_rectStdDev.pitch(),\r
+ NcvSize32u(searchRoi.width, searchRoi.height), rect,\r
+ 1.0f, true);\r
+ ncvAssertReturn(nppStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = d_integralImage.copySolid(h_integralImage, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvStat = d_rectStdDev.copySolid(h_rectStdDev, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ for (Ncv32u i=0; i<searchRoiU.height; i++)\r
+ {\r
+ for (Ncv32u j=0; j<h_pixelMask.stride(); j++)\r
+ {\r
+ if (j<searchRoiU.width)\r
+ {\r
+ h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = (i << 16) | j;\r
+ }\r
+ else\r
+ {\r
+ h_pixelMask.ptr()[i*h_pixelMask.stride()+j] = OBJDET_MASK_ELEMENT_INVALID_32U;\r
+ }\r
+ }\r
+ }\r
+ ncvAssertReturn(cudaSuccess == cudaStreamSynchronize(0), false);\r
+\r
+ Ncv32u fpu_oldcw, fpu_cw;\r
+ _controlfp_s(&fpu_cw, 0, 0);\r
+ fpu_oldcw = fpu_cw;\r
+ _controlfp_s(&fpu_cw, _PC_24, _MCW_PC);\r
+ ncvStat = ncvApplyHaarClassifierCascade_host(\r
+ h_integralImage, h_rectStdDev, h_pixelMask,\r
+ detectionsOnThisScale_h,\r
+ haar, h_HaarStages, h_HaarNodes, h_HaarFeatures, false,\r
+ searchRoiU, 1, 1.0f);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ _controlfp_s(&fpu_cw, fpu_oldcw, _MCW_PC);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ int devId;\r
+ ncvAssertCUDAReturn(cudaGetDevice(&devId), false);\r
+ cudaDeviceProp devProp;\r
+ ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), false);\r
+\r
+ ncvStat = ncvApplyHaarClassifierCascade_device(\r
+ d_integralImage, d_rectStdDev, d_pixelMask,\r
+ detectionsOnThisScale_d,\r
+ haar, h_HaarStages, d_HaarStages, d_HaarNodes, d_HaarFeatures, false,\r
+ searchRoiU, 1, 1.0f,\r
+ *this->allocatorGPU.get(), *this->allocatorCPU.get(),\r
+ devProp, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ NCVMatrixAlloc<Ncv32u> h_pixelMask_d(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_pixelMask_d.isMemAllocated(), false);\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvStat = d_pixelMask.copySolid(h_pixelMask_d, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ if (detectionsOnThisScale_d != detectionsOnThisScale_h)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ else\r
+ {\r
+ std::sort(h_pixelMask_d.ptr(), h_pixelMask_d.ptr() + detectionsOnThisScale_d);\r
+ for (Ncv32u i=0; i<detectionsOnThisScale_d && bLoopVirgin; i++)\r
+ {\r
+ if (h_pixelMask.ptr()[i] != h_pixelMask_d.ptr()[i])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestHaarCascadeApplication::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testhaarcascadeapplication_h_\r
+#define _testhaarcascadeapplication_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestHaarCascadeApplication : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestHaarCascadeApplication(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ std::string cascadeName, Ncv32u width, Ncv32u height);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestHaarCascadeApplication(const TestHaarCascadeApplication&);\r
+ TestHaarCascadeApplication& operator=(const TestHaarCascadeApplication&); \r
+\r
+\r
+ NCVTestSourceProvider<Ncv8u> &src;\r
+ std::string cascadeName;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+};\r
+\r
+#endif // _testhaarcascadeapplication_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestHaarCascadeLoader.h"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+TestHaarCascadeLoader::TestHaarCascadeLoader(std::string testName, std::string cascadeName)\r
+ :\r
+ NCVTestProvider(testName),\r
+ cascadeName(cascadeName)\r
+{\r
+}\r
+\r
+\r
+bool TestHaarCascadeLoader::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "cascadeName=" << cascadeName << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHaarCascadeLoader::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHaarCascadeLoader::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32u numStages, numNodes, numFeatures;\r
+ Ncv32u numStages_2 = 0, numNodes_2 = 0, numFeatures_2 = 0;\r
+\r
+ ncvStat = ncvHaarGetClassifierSize(this->cascadeName, numStages, numNodes, numFeatures);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ NCVVectorAlloc<HaarStage64> h_HaarStages(*this->allocatorCPU.get(), numStages);\r
+ ncvAssertReturn(h_HaarStages.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes(*this->allocatorCPU.get(), numNodes);\r
+ ncvAssertReturn(h_HaarNodes.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarFeature64> h_HaarFeatures(*this->allocatorCPU.get(), numFeatures);\r
+ ncvAssertReturn(h_HaarFeatures.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<HaarStage64> h_HaarStages_2(*this->allocatorCPU.get(), numStages);\r
+ ncvAssertReturn(h_HaarStages_2.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarClassifierNode128> h_HaarNodes_2(*this->allocatorCPU.get(), numNodes);\r
+ ncvAssertReturn(h_HaarNodes_2.isMemAllocated(), false);\r
+ NCVVectorAlloc<HaarFeature64> h_HaarFeatures_2(*this->allocatorCPU.get(), numFeatures);\r
+ ncvAssertReturn(h_HaarFeatures_2.isMemAllocated(), false);\r
+\r
+ HaarClassifierCascadeDescriptor haar;\r
+ HaarClassifierCascadeDescriptor haar_2;\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ const std::string testNvbinName = "test.nvbin";\r
+ ncvStat = ncvHaarLoadFromFile_host(this->cascadeName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ ncvStat = ncvHaarStoreNVBIN_host(testNvbinName, haar, h_HaarStages, h_HaarNodes, h_HaarFeatures);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ ncvStat = ncvHaarGetClassifierSize(testNvbinName, numStages_2, numNodes_2, numFeatures_2);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ ncvStat = ncvHaarLoadFromFile_host(testNvbinName, haar_2, h_HaarStages_2, h_HaarNodes_2, h_HaarFeatures_2);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ if (\r
+ numStages_2 != numStages ||\r
+ numNodes_2 != numNodes ||\r
+ numFeatures_2 != numFeatures ||\r
+ haar.NumStages != haar_2.NumStages ||\r
+ haar.NumClassifierRootNodes != haar_2.NumClassifierRootNodes ||\r
+ haar.NumClassifierTotalNodes != haar_2.NumClassifierTotalNodes ||\r
+ haar.NumFeatures != haar_2.NumFeatures ||\r
+ haar.ClassifierSize.width != haar_2.ClassifierSize.width ||\r
+ haar.ClassifierSize.height != haar_2.ClassifierSize.height ||\r
+ haar.bNeedsTiltedII != haar_2.bNeedsTiltedII ||\r
+ haar.bHasStumpsOnly != haar_2.bHasStumpsOnly )\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ if (memcmp(h_HaarStages.ptr(), h_HaarStages_2.ptr(), haar.NumStages * sizeof(HaarStage64)) ||\r
+ memcmp(h_HaarNodes.ptr(), h_HaarNodes_2.ptr(), haar.NumClassifierTotalNodes * sizeof(HaarClassifierNode128)) ||\r
+ memcmp(h_HaarFeatures.ptr(), h_HaarFeatures_2.ptr(), haar.NumFeatures * sizeof(HaarFeature64)) )\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestHaarCascadeLoader::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testhaarcascadeloader_h_\r
+#define _testhaarcascadeloader_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestHaarCascadeLoader : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestHaarCascadeLoader(std::string testName, std::string cascadeName);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+\r
+ std::string cascadeName;\r
+};\r
+\r
+#endif // _testhaarcascadeloader_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestHypothesesFilter.h"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+TestHypothesesFilter::TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ numDstRects(numDstRects),\r
+ minNeighbors(minNeighbors),\r
+ eps(eps)\r
+{\r
+}\r
+\r
+\r
+bool TestHypothesesFilter::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "numDstRects=" << numDstRects << std::endl;\r
+ strOut << "minNeighbors=" << minNeighbors << std::endl;\r
+ strOut << "eps=" << eps << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHypothesesFilter::init()\r
+{\r
+ this->canvasWidth = 4096;\r
+ this->canvasHeight = 4096;\r
+ return true;\r
+}\r
+\r
+\r
+bool compareRects(const NcvRect32u &r1, const NcvRect32u &r2, Ncv32f eps)\r
+{\r
+ double delta = eps*(std::min(r1.width, r2.width) + std::min(r1.height, r2.height))*0.5;\r
+ return std::abs((Ncv32s)r1.x - (Ncv32s)r2.x) <= delta &&\r
+ std::abs((Ncv32s)r1.y - (Ncv32s)r2.y) <= delta &&\r
+ std::abs((Ncv32s)r1.x + (Ncv32s)r1.width - (Ncv32s)r2.x - (Ncv32s)r2.width) <= delta &&\r
+ std::abs((Ncv32s)r1.y + (Ncv32s)r1.height - (Ncv32s)r2.y - (Ncv32s)r2.height) <= delta;\r
+}\r
+\r
+\r
+inline bool operator < (const NcvRect32u &a, const NcvRect32u &b)\r
+{\r
+ return a.x < b.x;\r
+}\r
+\r
+\r
+bool TestHypothesesFilter::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ NCVVectorAlloc<Ncv32u> h_random32u(*this->allocatorCPU.get(), this->numDstRects * sizeof(NcvRect32u) / sizeof(Ncv32u));\r
+ ncvAssertReturn(h_random32u.isMemAllocated(), false);\r
+\r
+ Ncv32u srcSlotSize = 2 * this->minNeighbors + 1;\r
+\r
+ NCVVectorAlloc<NcvRect32u> h_vecSrc(*this->allocatorCPU.get(), this->numDstRects*srcSlotSize);\r
+ ncvAssertReturn(h_vecSrc.isMemAllocated(), false);\r
+ NCVVectorAlloc<NcvRect32u> h_vecDst_groundTruth(*this->allocatorCPU.get(), this->numDstRects);\r
+ ncvAssertReturn(h_vecDst_groundTruth.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorCPU.get()->isCounting());\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_random32u), false);\r
+ Ncv32u randCnt = 0;\r
+ Ncv64f randVal;\r
+\r
+ for (Ncv32u i=0; i<this->numDstRects; i++)\r
+ {\r
+ h_vecDst_groundTruth.ptr()[i].x = i * this->canvasWidth / this->numDstRects + this->canvasWidth / (this->numDstRects * 4);\r
+ h_vecDst_groundTruth.ptr()[i].y = i * this->canvasHeight / this->numDstRects + this->canvasHeight / (this->numDstRects * 4);\r
+ h_vecDst_groundTruth.ptr()[i].width = this->canvasWidth / (this->numDstRects * 2);\r
+ h_vecDst_groundTruth.ptr()[i].height = this->canvasHeight / (this->numDstRects * 2);\r
+\r
+ Ncv32u numNeighbors = this->minNeighbors + 1 + (Ncv32u)(((1.0 * h_random32u.ptr()[i]) * (this->minNeighbors + 1)) / 0xFFFFFFFF);\r
+ numNeighbors = (numNeighbors > srcSlotSize) ? srcSlotSize : numNeighbors;\r
+\r
+ //fill in strong hypotheses (2 * ((1.0 * randVal) / 0xFFFFFFFF) - 1)\r
+ for (Ncv32u j=0; j<numNeighbors; j++)\r
+ {\r
+ randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].x = \r
+ h_vecDst_groundTruth.ptr()[i].x +\r
+ (Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));\r
+ randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].y = \r
+ h_vecDst_groundTruth.ptr()[i].y +\r
+ (Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;\r
+ }\r
+\r
+ //generate weak hypotheses (to be removed in processing)\r
+ for (Ncv32u j=numNeighbors; j<srcSlotSize; j++)\r
+ {\r
+ randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].x = \r
+ this->canvasWidth + h_vecDst_groundTruth.ptr()[i].x +\r
+ (Ncv32s)(h_vecDst_groundTruth.ptr()[i].width * this->eps * (randVal - 0.5));\r
+ randVal = (1.0 * h_random32u.ptr()[randCnt++]) / 0xFFFFFFFF; randCnt = randCnt % h_random32u.length();\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].y = \r
+ this->canvasHeight + h_vecDst_groundTruth.ptr()[i].y +\r
+ (Ncv32s)(h_vecDst_groundTruth.ptr()[i].height * this->eps * (randVal - 0.5));\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].width = h_vecDst_groundTruth.ptr()[i].width;\r
+ h_vecSrc.ptr()[srcSlotSize * i + j].height = h_vecDst_groundTruth.ptr()[i].height;\r
+ }\r
+ }\r
+\r
+ //shuffle\r
+ for (Ncv32u i=0; i<this->numDstRects*srcSlotSize-1; i++)\r
+ {\r
+ Ncv32u randVal = h_random32u.ptr()[randCnt++]; randCnt = randCnt % h_random32u.length();\r
+ Ncv32u secondSwap = randVal % (this->numDstRects*srcSlotSize-1 - i);\r
+ NcvRect32u tmp = h_vecSrc.ptr()[i + secondSwap];\r
+ h_vecSrc.ptr()[i + secondSwap] = h_vecSrc.ptr()[i];\r
+ h_vecSrc.ptr()[i] = tmp;\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ Ncv32u numHypothesesSrc = h_vecSrc.length();\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvStat = ncvFilterHypotheses_host(h_vecSrc, numHypothesesSrc, this->minNeighbors, this->eps, NULL);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //verification\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (numHypothesesSrc != this->numDstRects)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ else\r
+ {\r
+ std::vector<NcvRect32u> tmpRects(numHypothesesSrc);\r
+ memcpy(&tmpRects[0], h_vecSrc.ptr(), numHypothesesSrc * sizeof(NcvRect32u));\r
+ std::sort(tmpRects.begin(), tmpRects.end());\r
+ for (Ncv32u i=0; i<numHypothesesSrc && bLoopVirgin; i++)\r
+ {\r
+ if (!compareRects(tmpRects[i], h_vecDst_groundTruth.ptr()[i], this->eps))\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestHypothesesFilter::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testhypothesesfilter_h_\r
+#define _testhypothesesfilter_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestHypothesesFilter : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestHypothesesFilter(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u numDstRects, Ncv32u minNeighbors, Ncv32f eps);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+\r
+ TestHypothesesFilter(const TestHypothesesFilter&);\r
+ TestHypothesesFilter& operator=(const TestHypothesesFilter&); \r
+\r
+ NCVTestSourceProvider<Ncv32u> &src;\r
+ Ncv32u numDstRects;\r
+ Ncv32u minNeighbors;\r
+ Ncv32f eps;\r
+\r
+ Ncv32u canvasWidth;\r
+ Ncv32u canvasHeight;\r
+};\r
+\r
+#endif // _testhypothesesfilter_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestHypothesesGrow.h"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+\r
+TestHypothesesGrow::TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale, \r
+ Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ rectWidth(rectWidth),\r
+ rectHeight(rectHeight),\r
+ rectScale(rectScale),\r
+ maxLenSrc(maxLenSrc),\r
+ lenSrc(lenSrc),\r
+ maxLenDst(maxLenDst),\r
+ lenDst(lenDst)\r
+{\r
+}\r
+\r
+\r
+bool TestHypothesesGrow::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "rectWidth=" << rectWidth << std::endl;\r
+ strOut << "rectHeight=" << rectHeight << std::endl;\r
+ strOut << "rectScale=" << rectScale << std::endl;\r
+ strOut << "maxLenSrc=" << maxLenSrc << std::endl;\r
+ strOut << "lenSrc=" << lenSrc << std::endl;\r
+ strOut << "maxLenDst=" << maxLenDst << std::endl;\r
+ strOut << "lenDst=" << lenDst << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHypothesesGrow::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestHypothesesGrow::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ NCVVectorAlloc<Ncv32u> h_vecSrc(*this->allocatorCPU.get(), this->maxLenSrc);\r
+ ncvAssertReturn(h_vecSrc.isMemAllocated(), false);\r
+ NCVVectorAlloc<Ncv32u> d_vecSrc(*this->allocatorGPU.get(), this->maxLenSrc);\r
+ ncvAssertReturn(d_vecSrc.isMemAllocated(), false);\r
+\r
+ NCVVectorAlloc<NcvRect32u> h_vecDst(*this->allocatorCPU.get(), this->maxLenDst);\r
+ ncvAssertReturn(h_vecDst.isMemAllocated(), false);\r
+ NCVVectorAlloc<NcvRect32u> d_vecDst(*this->allocatorGPU.get(), this->maxLenDst);\r
+ ncvAssertReturn(d_vecDst.isMemAllocated(), false);\r
+ NCVVectorAlloc<NcvRect32u> h_vecDst_d(*this->allocatorCPU.get(), this->maxLenDst);\r
+ ncvAssertReturn(h_vecDst_d.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_vecSrc), false);\r
+ memset(h_vecDst.ptr(), 0, h_vecDst.length() * sizeof(NcvRect32u));\r
+ NCVVectorReuse<Ncv32u> h_vecDst_as32u(h_vecDst.getSegment(), lenDst * sizeof(NcvRect32u) / sizeof(Ncv32u));\r
+ ncvAssertReturn(h_vecDst_as32u.isMemReused(), false);\r
+ ncvAssertReturn(this->src.fill(h_vecDst_as32u), false);\r
+ memcpy(h_vecDst_d.ptr(), h_vecDst.ptr(), h_vecDst.length() * sizeof(NcvRect32u));\r
+ NCV_SKIP_COND_END\r
+\r
+ ncvStat = h_vecSrc.copySolid(d_vecSrc, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvStat = h_vecDst.copySolid(d_vecDst, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ Ncv32u h_outElemNum_d = 0;\r
+ Ncv32u h_outElemNum_h = 0;\r
+ NCV_SKIP_COND_BEGIN\r
+ h_outElemNum_d = this->lenDst;\r
+ ncvStat = ncvGrowDetectionsVector_device(d_vecSrc, this->lenSrc,\r
+ d_vecDst, h_outElemNum_d, this->maxLenDst,\r
+ this->rectWidth, this->rectHeight, this->rectScale, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvStat = d_vecDst.copySolid(h_vecDst_d, 0);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ ncvAssertCUDAReturn(cudaStreamSynchronize(0), false);\r
+\r
+ h_outElemNum_h = this->lenDst;\r
+ ncvStat = ncvGrowDetectionsVector_host(h_vecSrc, this->lenSrc,\r
+ h_vecDst, h_outElemNum_h, this->maxLenDst,\r
+ this->rectWidth, this->rectHeight, this->rectScale);\r
+ ncvAssertReturn(ncvStat == NCV_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (h_outElemNum_d != h_outElemNum_h)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ else\r
+ {\r
+ if (memcmp(h_vecDst.ptr(), h_vecDst_d.ptr(), this->maxLenDst * sizeof(NcvRect32u)))\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestHypothesesGrow::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testhypothesesgrow_h_\r
+#define _testhypothesesgrow_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestHypothesesGrow : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestHypothesesGrow(std::string testName, NCVTestSourceProvider<Ncv32u> &src,\r
+ Ncv32u rectWidth, Ncv32u rectHeight, Ncv32f rectScale, \r
+ Ncv32u maxLenSrc, Ncv32u lenSrc, Ncv32u maxLenDst, Ncv32u lenDst);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestHypothesesGrow(const TestHypothesesGrow&);\r
+ TestHypothesesGrow& operator=(const TestHypothesesGrow&); \r
+\r
+\r
+ NCVTestSourceProvider<Ncv32u> &src;\r
+ Ncv32u rectWidth;\r
+ Ncv32u rectHeight;\r
+ Ncv32f rectScale;\r
+ Ncv32u maxLenSrc;\r
+ Ncv32u lenSrc;\r
+ Ncv32u maxLenDst;\r
+ Ncv32u lenDst;\r
+};\r
+\r
+#endif // _testhypothesesgrow_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include <math.h>\r
+#include "TestIntegralImage.h"\r
+\r
+\r
+template <class T_in, class T_out>\r
+TestIntegralImage<T_in, T_out>::TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,\r
+ Ncv32u width, Ncv32u height)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ width(width),\r
+ height(height)\r
+{\r
+}\r
+\r
+\r
+template <class T_in, class T_out>\r
+bool TestIntegralImage<T_in, T_out>::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "sizeof(T_in)=" << sizeof(T_in) << std::endl;\r
+ strOut << "sizeof(T_out)=" << sizeof(T_out) << std::endl;\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "height=" << height << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+template <class T_in, class T_out>\r
+bool TestIntegralImage<T_in, T_out>::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template <class T_in, class T_out>\r
+bool TestIntegralImage<T_in, T_out>::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32u widthII = this->width + 1;\r
+ Ncv32u heightII = this->height + 1;\r
+\r
+ NCVMatrixAlloc<T_in> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T_in> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T_out> d_imgII(*this->allocatorGPU.get(), widthII, heightII);\r
+ ncvAssertReturn(d_imgII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T_out> h_imgII(*this->allocatorCPU.get(), widthII, heightII);\r
+ ncvAssertReturn(h_imgII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T_out> h_imgII_d(*this->allocatorCPU.get(), widthII, heightII);\r
+ ncvAssertReturn(h_imgII_d.isMemAllocated(), false);\r
+\r
+ Ncv32u bufSize;\r
+ if (sizeof(T_in) == sizeof(Ncv8u))\r
+ {\r
+ ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ }\r
+ else if (sizeof(T_in) == sizeof(Ncv32f))\r
+ {\r
+ ncvStat = nppiStIntegralGetSize_32f32f(NcvSize32u(this->width, this->height), &bufSize, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);\r
+ }\r
+\r
+ NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);\r
+ ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ if (sizeof(T_in) == sizeof(Ncv8u))\r
+ {\r
+ ncvStat = nppiStIntegral_8u32u_C1R((Ncv8u *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv32u *)d_imgII.ptr(), d_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height),\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ }\r
+ else if (sizeof(T_in) == sizeof(Ncv32f))\r
+ {\r
+ ncvStat = nppiStIntegral_32f32f_C1R((Ncv32f *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv32f *)d_imgII.ptr(), d_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height),\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);\r
+ }\r
+\r
+ ncvStat = d_imgII.copySolid(h_imgII_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ if (sizeof(T_in) == sizeof(Ncv8u))\r
+ {\r
+ ncvStat = nppiStIntegral_8u32u_C1R_host((Ncv8u *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv32u *)h_imgII.ptr(), h_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ }\r
+ else if (sizeof(T_in) == sizeof(Ncv32f))\r
+ {\r
+ ncvStat = nppiStIntegral_32f32f_C1R_host((Ncv32f *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv32f *)h_imgII.ptr(), h_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);\r
+ }\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)\r
+ {\r
+ if (sizeof(T_in) == sizeof(Ncv8u))\r
+ {\r
+ if (h_imgII.ptr()[h_imgII.stride()*i+j] != h_imgII_d.ptr()[h_imgII_d.stride()*i+j])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ else if (sizeof(T_in) == sizeof(Ncv32f))\r
+ {\r
+ if (fabsf((float)h_imgII.ptr()[h_imgII.stride()*i+j] - (float)h_imgII_d.ptr()[h_imgII_d.stride()*i+j]) > 0.01f)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect integral image test instance", false);\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+template <class T_in, class T_out>\r
+bool TestIntegralImage<T_in, T_out>::deinit()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template class TestIntegralImage<Ncv8u, Ncv32u>;\r
+template class TestIntegralImage<Ncv32f, Ncv32f>;\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testintegralimage_h_\r
+#define _testintegralimage_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+template <class T_in, class T_out>\r
+class TestIntegralImage : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestIntegralImage(std::string testName, NCVTestSourceProvider<T_in> &src,\r
+ Ncv32u width, Ncv32u height);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestIntegralImage(const TestIntegralImage&);\r
+ TestIntegralImage& operator=(const TestIntegralImage&); \r
+\r
+ NCVTestSourceProvider<T_in> &src;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+};\r
+\r
+#endif // _testintegralimage_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include "TestIntegralImageSquared.h"\r
+\r
+\r
+TestIntegralImageSquared::TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u width, Ncv32u height)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ width(width),\r
+ height(height)\r
+{\r
+}\r
+\r
+\r
+bool TestIntegralImageSquared::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "height=" << height << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestIntegralImageSquared::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestIntegralImageSquared::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32u widthSII = this->width + 1;\r
+ Ncv32u heightSII = this->height + 1;\r
+\r
+ NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);\r
+ ncvAssertReturn(d_imgSII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);\r
+ ncvAssertReturn(h_imgSII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> h_imgSII_d(*this->allocatorCPU.get(), widthSII, heightSII);\r
+ ncvAssertReturn(h_imgSII_d.isMemAllocated(), false);\r
+\r
+ Ncv32u bufSize;\r
+ ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSize, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);\r
+ ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),\r
+ d_imgSII.ptr(), d_imgSII.pitch(),\r
+ NcvSize32u(this->width, this->height),\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = d_imgSII.copySolid(h_imgSII_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),\r
+ h_imgSII.ptr(), h_imgSII.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_img.height() + 1; i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < h_img.width() + 1; j++)\r
+ {\r
+ if (h_imgSII.ptr()[h_imgSII.stride()*i+j] != h_imgSII_d.ptr()[h_imgSII_d.stride()*i+j])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestIntegralImageSquared::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testintegralimagesquared_h_\r
+#define _testintegralimagesquared_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestIntegralImageSquared : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestIntegralImageSquared(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u width, Ncv32u height);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestIntegralImageSquared(const TestIntegralImageSquared&);\r
+ TestIntegralImageSquared& operator=(const TestIntegralImageSquared&); \r
+\r
+ NCVTestSourceProvider<Ncv8u> &src;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+};\r
+\r
+#endif // _testintegralimagesquared_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include <math.h>\r
+\r
+#include "TestRectStdDev.h"\r
+\r
+\r
+TestRectStdDev::TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,\r
+ NcvBool bTextureCache)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ width(width),\r
+ height(height),\r
+ rect(rect),\r
+ scaleFactor(scaleFactor),\r
+ bTextureCache(bTextureCache)\r
+{\r
+}\r
+\r
+\r
+bool TestRectStdDev::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "height=" << height << std::endl;\r
+ strOut << "rect=[" << rect.x << ", " << rect.y << ", " << rect.width << ", " << rect.height << "]\n";\r
+ strOut << "scaleFactor=" << scaleFactor << std::endl;\r
+ strOut << "bTextureCache=" << bTextureCache << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+bool TestRectStdDev::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+bool TestRectStdDev::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32s _normWidth = (Ncv32s)this->width - this->rect.x - this->rect.width + 1;\r
+ Ncv32s _normHeight = (Ncv32s)this->height - this->rect.y - this->rect.height + 1;\r
+ if (_normWidth <= 0 || _normHeight <= 0)\r
+ {\r
+ return true;\r
+ }\r
+ Ncv32u normWidth = (Ncv32u)_normWidth;\r
+ Ncv32u normHeight = (Ncv32u)_normHeight;\r
+ NcvSize32u szNormRoi(normWidth, normHeight);\r
+\r
+ Ncv32u widthII = this->width + 1;\r
+ Ncv32u heightII = this->height + 1;\r
+ Ncv32u widthSII = this->width + 1;\r
+ Ncv32u heightSII = this->height + 1;\r
+\r
+ NCVMatrixAlloc<Ncv8u> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv8u> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<Ncv32u> d_imgII(*this->allocatorGPU.get(), widthII, heightII);\r
+ ncvAssertReturn(d_imgII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32u> h_imgII(*this->allocatorCPU.get(), widthII, heightII);\r
+ ncvAssertReturn(h_imgII.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<Ncv64u> d_imgSII(*this->allocatorGPU.get(), widthSII, heightSII);\r
+ ncvAssertReturn(d_imgSII.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv64u> h_imgSII(*this->allocatorCPU.get(), widthSII, heightSII);\r
+ ncvAssertReturn(h_imgSII.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<Ncv32f> d_norm(*this->allocatorGPU.get(), normWidth, normHeight);\r
+ ncvAssertReturn(d_norm.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32f> h_norm(*this->allocatorCPU.get(), normWidth, normHeight);\r
+ ncvAssertReturn(h_norm.isMemAllocated(), false);\r
+ NCVMatrixAlloc<Ncv32f> h_norm_d(*this->allocatorCPU.get(), normWidth, normHeight);\r
+ ncvAssertReturn(h_norm_d.isMemAllocated(), false);\r
+\r
+ Ncv32u bufSizeII, bufSizeSII;\r
+ ncvStat = nppiStIntegralGetSize_8u32u(NcvSize32u(this->width, this->height), &bufSizeII, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ ncvStat = nppiStSqrIntegralGetSize_8u64u(NcvSize32u(this->width, this->height), &bufSizeSII, this->devProp);\r
+ ncvAssertReturn(NPPST_SUCCESS == ncvStat, false);\r
+ Ncv32u bufSize = bufSizeII > bufSizeSII ? bufSizeII : bufSizeSII;\r
+ NCVVectorAlloc<Ncv8u> d_tmpBuf(*this->allocatorGPU.get(), bufSize);\r
+ ncvAssertReturn(d_tmpBuf.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStIntegral_8u32u_C1R(d_img.ptr(), d_img.pitch(),\r
+ d_imgII.ptr(), d_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height),\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStSqrIntegral_8u64u_C1R(d_img.ptr(), d_img.pitch(),\r
+ d_imgSII.ptr(), d_imgSII.pitch(),\r
+ NcvSize32u(this->width, this->height),\r
+ d_tmpBuf.ptr(), bufSize, this->devProp);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStRectStdDev_32f_C1R(d_imgII.ptr(), d_imgII.pitch(),\r
+ d_imgSII.ptr(), d_imgSII.pitch(),\r
+ d_norm.ptr(), d_norm.pitch(),\r
+ szNormRoi, this->rect,\r
+ this->scaleFactor,\r
+ this->bTextureCache);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = d_norm.copySolid(h_norm_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStIntegral_8u32u_C1R_host(h_img.ptr(), h_img.pitch(),\r
+ h_imgII.ptr(), h_imgII.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStSqrIntegral_8u64u_C1R_host(h_img.ptr(), h_img.pitch(),\r
+ h_imgSII.ptr(), h_imgSII.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ ncvStat = nppiStRectStdDev_32f_C1R_host(h_imgII.ptr(), h_imgII.pitch(),\r
+ h_imgSII.ptr(), h_imgSII.pitch(),\r
+ h_norm.ptr(), h_norm.pitch(),\r
+ szNormRoi, this->rect,\r
+ this->scaleFactor);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ const Ncv64f relEPS = 0.005;\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_norm.height(); i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < h_norm.width(); j++)\r
+ {\r
+ Ncv64f absErr = fabs(h_norm.ptr()[h_norm.stride()*i+j] - h_norm_d.ptr()[h_norm_d.stride()*i+j]);\r
+ Ncv64f relErr = absErr / h_norm.ptr()[h_norm.stride()*i+j];\r
+\r
+ if (relErr > relEPS)\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+bool TestRectStdDev::deinit()\r
+{\r
+ return true;\r
+}\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testrectstddev_h_\r
+#define _testrectstddev_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+class TestRectStdDev : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestRectStdDev(std::string testName, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u width, Ncv32u height, NcvRect32u rect, Ncv32f scaleFactor,\r
+ NcvBool bTextureCache);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestRectStdDev(const TestRectStdDev&);\r
+ TestRectStdDev& operator=(const TestRectStdDev&); \r
+\r
+ NCVTestSourceProvider<Ncv8u> &src;\r
+ NcvRect32u rect;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+ Ncv32f scaleFactor;\r
+\r
+ NcvBool bTextureCache;\r
+};\r
+\r
+#endif // _testrectstddev_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include <math.h>\r
+\r
+#include "TestResize.h"\r
+\r
+\r
+template <class T>\r
+TestResize<T>::TestResize(std::string testName, NCVTestSourceProvider<T> &src,\r
+ Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ width(width),\r
+ height(height),\r
+ scaleFactor(scaleFactor),\r
+ bTextureCache(bTextureCache)\r
+{\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestResize<T>::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "sizeof(T)=" << sizeof(T) << std::endl;\r
+ strOut << "width=" << width << std::endl;\r
+ strOut << "scaleFactor=" << scaleFactor << std::endl;\r
+ strOut << "bTextureCache=" << bTextureCache << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestResize<T>::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestResize<T>::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ Ncv32s smallWidth = this->width / this->scaleFactor;\r
+ Ncv32s smallHeight = this->height / this->scaleFactor;\r
+ if (smallWidth == 0 || smallHeight == 0)\r
+ {\r
+ return true;\r
+ }\r
+\r
+ NcvSize32u srcSize(this->width, this->height);\r
+\r
+ NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<T> d_small(*this->allocatorGPU.get(), smallWidth, smallHeight);\r
+ ncvAssertReturn(d_small.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_small(*this->allocatorCPU.get(), smallWidth, smallHeight);\r
+ ncvAssertReturn(h_small.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_small_d(*this->allocatorCPU.get(), smallWidth, smallHeight);\r
+ ncvAssertReturn(h_small_d.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+ NCV_SKIP_COND_END\r
+\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_BEGIN\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = nppiStDownsampleNearest_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv32u *)d_small.ptr(), d_small.pitch(),\r
+ srcSize, this->scaleFactor,\r
+ this->bTextureCache);\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv64u))\r
+ {\r
+ ncvStat = nppiStDownsampleNearest_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv64u *)d_small.ptr(), d_small.pitch(),\r
+ srcSize, this->scaleFactor,\r
+ this->bTextureCache);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+ ncvStat = d_small.copySolid(h_small_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = nppiStDownsampleNearest_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv32u *)h_small.ptr(), h_small.pitch(),\r
+ srcSize, this->scaleFactor);\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv64u))\r
+ {\r
+ ncvStat = nppiStDownsampleNearest_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv64u *)h_small.ptr(), h_small.pitch(),\r
+ srcSize, this->scaleFactor);\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ //const Ncv64f relEPS = 0.005;\r
+ for (Ncv32u i=0; bLoopVirgin && i < h_small.height(); i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < h_small.width(); j++)\r
+ {\r
+ if (h_small.ptr()[h_small.stride()*i+j] != h_small_d.ptr()[h_small_d.stride()*i+j])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestResize<T>::deinit()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template class TestResize<Ncv32u>;\r
+template class TestResize<Ncv64u>;\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testresize_h_\r
+#define _testresize_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+template <class T>\r
+class TestResize : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestResize(std::string testName, NCVTestSourceProvider<T> &src,\r
+ Ncv32u width, Ncv32u height, Ncv32u scaleFactor, NcvBool bTextureCache);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+ TestResize(const TestResize&);\r
+ TestResize& operator=(const TestResize&); \r
+\r
+ NCVTestSourceProvider<T> &src;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+ Ncv32u scaleFactor;\r
+\r
+ NcvBool bTextureCache;\r
+};\r
+\r
+#endif // _testresize_h_\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+\r
+#include <math.h>\r
+\r
+#include "TestTranspose.h"\r
+\r
+\r
+template <class T>\r
+TestTranspose<T>::TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,\r
+ Ncv32u width, Ncv32u height)\r
+ :\r
+ NCVTestProvider(testName),\r
+ src(src),\r
+ width(width),\r
+ height(height)\r
+{\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestTranspose<T>::toString(std::ofstream &strOut)\r
+{\r
+ strOut << "sizeof(T)=" << sizeof(T) << std::endl;\r
+ strOut << "width=" << width << std::endl;\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestTranspose<T>::init()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestTranspose<T>::process()\r
+{\r
+ NCVStatus ncvStat;\r
+ bool rcode = false;\r
+\r
+ NcvSize32u srcSize(this->width, this->height);\r
+\r
+ NCVMatrixAlloc<T> d_img(*this->allocatorGPU.get(), this->width, this->height);\r
+ ncvAssertReturn(d_img.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_img(*this->allocatorCPU.get(), this->width, this->height);\r
+ ncvAssertReturn(h_img.isMemAllocated(), false);\r
+\r
+ NCVMatrixAlloc<T> d_dst(*this->allocatorGPU.get(), this->height, this->width);\r
+ ncvAssertReturn(d_dst.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_dst(*this->allocatorCPU.get(), this->height, this->width);\r
+ ncvAssertReturn(h_dst.isMemAllocated(), false);\r
+ NCVMatrixAlloc<T> h_dst_d(*this->allocatorCPU.get(), this->height, this->width);\r
+ ncvAssertReturn(h_dst_d.isMemAllocated(), false);\r
+\r
+ NCV_SET_SKIP_COND(this->allocatorGPU.get()->isCounting());\r
+ NCV_SKIP_COND_BEGIN\r
+ ncvAssertReturn(this->src.fill(h_img), false);\r
+ NCV_SKIP_COND_END\r
+\r
+ ncvStat = h_img.copySolid(d_img, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_BEGIN\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = nppiStTranspose_32u_C1R((Ncv32u *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv32u *)d_dst.ptr(), d_dst.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv64u))\r
+ {\r
+ ncvStat = nppiStTranspose_64u_C1R((Ncv64u *)d_img.ptr(), d_img.pitch(),\r
+ (Ncv64u *)d_dst.ptr(), d_dst.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect transpose test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+ ncvStat = d_dst.copySolid(h_dst_d, 0);\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ ncvStat = nppiStTranspose_32u_C1R_host((Ncv32u *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv32u *)h_dst.ptr(), h_dst.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv64u))\r
+ {\r
+ ncvStat = nppiStTranspose_64u_C1R_host((Ncv64u *)h_img.ptr(), h_img.pitch(),\r
+ (Ncv64u *)h_dst.ptr(), h_dst.pitch(),\r
+ NcvSize32u(this->width, this->height));\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintReturn(false, "Incorrect downsample test instance", false);\r
+ }\r
+ ncvAssertReturn(ncvStat == NPPST_SUCCESS, false);\r
+ NCV_SKIP_COND_END\r
+\r
+ //bit-to-bit check\r
+ bool bLoopVirgin = true;\r
+\r
+ NCV_SKIP_COND_BEGIN\r
+ //const Ncv64f relEPS = 0.005;\r
+ for (Ncv32u i=0; bLoopVirgin && i < this->width; i++)\r
+ {\r
+ for (Ncv32u j=0; bLoopVirgin && j < this->height; j++)\r
+ {\r
+ if (h_dst.ptr()[h_dst.stride()*i+j] != h_dst_d.ptr()[h_dst_d.stride()*i+j])\r
+ {\r
+ bLoopVirgin = false;\r
+ }\r
+ }\r
+ }\r
+ NCV_SKIP_COND_END\r
+\r
+ if (bLoopVirgin)\r
+ {\r
+ rcode = true;\r
+ }\r
+\r
+ return rcode;\r
+}\r
+\r
+\r
+template <class T>\r
+bool TestTranspose<T>::deinit()\r
+{\r
+ return true;\r
+}\r
+\r
+\r
+template class TestTranspose<Ncv32u>;\r
+template class TestTranspose<Ncv64u>;\r
--- /dev/null
+/*\r
+ * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+ *\r
+ * NVIDIA Corporation and its licensors retain all intellectual \r
+ * property and proprietary rights in and to this software and \r
+ * related documentation and any modifications thereto. \r
+ * Any use, reproduction, disclosure, or distribution of this \r
+ * software and related documentation without an express license \r
+ * agreement from NVIDIA Corporation is strictly prohibited.\r
+ */\r
+#ifndef _testtranspose_h_\r
+#define _testtranspose_h_\r
+\r
+#include "NCVTest.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+template <class T>\r
+class TestTranspose : public NCVTestProvider\r
+{\r
+public:\r
+\r
+ TestTranspose(std::string testName, NCVTestSourceProvider<T> &src,\r
+ Ncv32u width, Ncv32u height);\r
+\r
+ virtual bool init();\r
+ virtual bool process();\r
+ virtual bool deinit();\r
+ virtual bool toString(std::ofstream &strOut);\r
+\r
+private:\r
+\r
+ TestTranspose(const TestTranspose&);\r
+ TestTranspose& operator=(const TestTranspose&); \r
+\r
+ NCVTestSourceProvider<T> &src;\r
+ Ncv32u width;\r
+ Ncv32u height;\r
+};\r
+\r
+#endif // _testtranspose_h_\r
--- /dev/null
+#pragma warning (disable : 4408 4201 4100)\r
+ \r
+#include <cstdio>\r
+\r
+#include "NCV.hpp"\r
+#include "NCVHaarObjectDetection.hpp"\r
+\r
+#include "TestIntegralImage.h"\r
+#include "TestIntegralImageSquared.h"\r
+#include "TestRectStdDev.h"\r
+#include "TestResize.h"\r
+#include "TestCompact.h"\r
+#include "TestTranspose.h"\r
+\r
+#include "TestDrawRects.h"\r
+#include "TestHypothesesGrow.h"\r
+#include "TestHypothesesFilter.h"\r
+#include "TestHaarCascadeLoader.h"\r
+#include "TestHaarCascadeApplication.h"\r
+\r
+#include "NCVAutoTestLister.hpp"\r
+#include "NCVTestSourceProvider.hpp"\r
+\r
+\r
+template <class T_in, class T_out>\r
+void generateIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T_in> &src,\r
+ Ncv32u maxWidth, Ncv32u maxHeight)\r
+{\r
+ for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", i, 2);\r
+ testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, i, 2));\r
+ }\r
+ for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "LinIntImgW%dH%d", 2, i);\r
+ testLister.add(new TestIntegralImage<T_in, T_out>(testName, src, 2, i));\r
+ }\r
+\r
+ //test VGA\r
+ testLister.add(new TestIntegralImage<T_in, T_out>("LinIntImg_VGA", src, 640, 480));\r
+\r
+ //TODO: add tests of various resolutions up to 4096x4096\r
+}\r
+\r
+\r
+void generateSquaredIntegralTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u maxWidth, Ncv32u maxHeight)\r
+{\r
+ for (Ncv32f _i=1.0; _i<maxWidth; _i*=1.2f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", i, 32);\r
+ testLister.add(new TestIntegralImageSquared(testName, src, i, 32));\r
+ }\r
+ for (Ncv32f _i=1.0; _i<maxHeight; _i*=1.2f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "SqIntImgW%dH%d", 32, i);\r
+ testLister.add(new TestIntegralImageSquared(testName, src, 32, i));\r
+ }\r
+\r
+ //test VGA\r
+ testLister.add(new TestIntegralImageSquared("SqLinIntImg_VGA", src, 640, 480));\r
+\r
+ //TODO: add tests of various resolutions up to 4096x4096\r
+}\r
+\r
+\r
+void generateRectStdDevTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u maxWidth, Ncv32u maxHeight)\r
+{\r
+ NcvRect32u rect(1,1,18,18);\r
+\r
+ for (Ncv32f _i=32; _i<maxHeight/2 && _i < maxWidth/2; _i*=1.2f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "RectStdDevW%dH%d", i*2, i);\r
+ testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1, true));\r
+ testLister.add(new TestRectStdDev(testName, src, i*2, i, rect, 1.5, false));\r
+ testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 1, false));\r
+ testLister.add(new TestRectStdDev(testName, src, i-1, i*2-1, rect, 2.5, true));\r
+ }\r
+\r
+ //test VGA\r
+ testLister.add(new TestRectStdDev("RectStdDev_VGA", src, 640, 480, rect, 1, true));\r
+\r
+ //TODO: add tests of various resolutions up to 4096x4096\r
+}\r
+\r
+\r
+template <class T>\r
+void generateResizeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)\r
+{\r
+ //test VGA\r
+ for (Ncv32u i=1; i<480; i+=3)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "TestResize_VGA_s%d", i);\r
+ testLister.add(new TestResize<T>(testName, src, 640, 480, i, true));\r
+ testLister.add(new TestResize<T>(testName, src, 640, 480, i, false));\r
+ }\r
+\r
+ //test HD\r
+ for (Ncv32u i=1; i<1080; i+=5)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "TestResize_1080_s%d", i);\r
+ testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, true));\r
+ testLister.add(new TestResize<T>(testName, src, 1920, 1080, i, false));\r
+ }\r
+\r
+ //TODO: add tests of various resolutions up to 4096x4096\r
+}\r
+\r
+\r
+void generateNPPSTVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)\r
+{\r
+ //compaction\r
+ for (Ncv32f _i=256.0; _i<maxLength; _i*=1.1f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "Compaction%d", i);\r
+ testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 30));\r
+ }\r
+ for (Ncv32u i=1; i<260; i++)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "Compaction%d", i);\r
+ testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 70));\r
+ testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 0));\r
+ testLister.add(new TestCompact(testName, src, i, 0xC001C0DE, 100));\r
+ }\r
+ for (Ncv32u i=256*256-256; i<256*256+257; i++)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "Compaction%d", i);\r
+ testLister.add(new TestCompact(testName, src, i, 0xFFFFFFFF, 40));\r
+ }\r
+ for (Ncv32u i=256*256*256-10; i<256*256*256+10; i++)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "Compaction%d", i);\r
+ testLister.add(new TestCompact(testName, src, i, 0x00000000, 2));\r
+ }\r
+}\r
+\r
+\r
+template <class T>\r
+void generateTransposeTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src)\r
+{\r
+ for (int i=2; i<64; i+=4)\r
+ {\r
+ for (int j=2; j<64; j+=4)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);\r
+ testLister.add(new TestTranspose<T>(testName, src, i, j));\r
+ }\r
+ }\r
+\r
+ for (int i=1; i<128; i+=1)\r
+ {\r
+ for (int j=1; j<2; j+=1)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "TestTranspose_%dx%d", i, j);\r
+ testLister.add(new TestTranspose<T>(testName, src, i, j));\r
+ }\r
+ }\r
+\r
+ testLister.add(new TestTranspose<T>("TestTranspose_VGA", src, 640, 480));\r
+ testLister.add(new TestTranspose<T>("TestTranspose_HD1080", src, 1920, 1080));\r
+}\r
+\r
+\r
+template <class T>\r
+void generateDrawRectsTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<T> &src, NCVTestSourceProvider<Ncv32u> &src32u,\r
+ Ncv32u maxWidth, Ncv32u maxHeight)\r
+{\r
+ for (Ncv32f _i=16.0; _i<maxWidth; _i*=1.1f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ Ncv32u j = maxHeight * i / maxWidth;\r
+ if (!j) continue;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "DrawRectsW%dH%d", i, j);\r
+\r
+ if (sizeof(T) == sizeof(Ncv32u))\r
+ {\r
+ testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFFFFFFFF));\r
+ }\r
+ else if (sizeof(T) == sizeof(Ncv8u))\r
+ {\r
+ testLister.add(new TestDrawRects<T>(testName, src, src32u, i, j, i*j/1000+1, (T)0xFF));\r
+ }\r
+ else\r
+ {\r
+ ncvAssertPrintCheck(false, "Attempted to instantiate non-existing DrawRects test suite");\r
+ }\r
+ }\r
+\r
+ //test VGA\r
+ testLister.add(new TestDrawRects<T>("DrawRects_VGA", src, src32u, 640, 480, 640*480/1000, (T)0xFF));\r
+\r
+ //TODO: add tests of various resolutions up to 4096x4096\r
+}\r
+\r
+\r
+void generateVectorTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)\r
+{\r
+ //growth\r
+ for (Ncv32f _i=10.0; _i<maxLength; _i*=1.1f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "VectorGrow%d", i);\r
+ testLister.add(new TestHypothesesGrow(testName, src, 20, 20, 2.2f, i, i/2, i, i/4));\r
+ testLister.add(new TestHypothesesGrow(testName, src, 10, 42, 1.2f, i, i, i, 0));\r
+ }\r
+ testLister.add(new TestHypothesesGrow("VectorGrow01b", src, 10, 42, 1.2f, 10, 0, 10, 1));\r
+ testLister.add(new TestHypothesesGrow("VectorGrow11b", src, 10, 42, 1.2f, 10, 1, 10, 1));\r
+ testLister.add(new TestHypothesesGrow("VectorGrow10b", src, 10, 42, 1.2f, 10, 1, 10, 0));\r
+ testLister.add(new TestHypothesesGrow("VectorGrow00b", src, 10, 42, 1.2f, 10, 0, 10, 0));\r
+}\r
+\r
+\r
+void generateHypothesesFiltrationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv32u> &src, Ncv32u maxLength)\r
+{\r
+ for (Ncv32f _i=1.0; _i<maxLength; _i*=1.1f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "HypFilter%d", i);\r
+ testLister.add(new TestHypothesesFilter(testName, src, i, 3, 0.2f));\r
+ testLister.add(new TestHypothesesFilter(testName, src, i, 0, 0.2f));\r
+ testLister.add(new TestHypothesesFilter(testName, src, i, 1, 0.1f));\r
+ }\r
+}\r
+\r
+\r
+void generateHaarLoaderTests(NCVAutoTestLister &testLister)\r
+{\r
+ testLister.add(new TestHaarCascadeLoader("haarcascade_eye.xml", "haarcascade_eye.xml"));\r
+ testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt.xml", "haarcascade_frontalface_alt.xml"));\r
+ testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt2.xml", "haarcascade_frontalface_alt2.xml"));\r
+ testLister.add(new TestHaarCascadeLoader("haarcascade_frontalface_alt_tree.xml", "haarcascade_frontalface_alt_tree.xml"));\r
+ testLister.add(new TestHaarCascadeLoader("haarcascade_eye_tree_eyeglasses.xml", "haarcascade_eye_tree_eyeglasses.xml"));\r
+}\r
+\r
+\r
+void generateHaarApplicationTests(NCVAutoTestLister &testLister, NCVTestSourceProvider<Ncv8u> &src,\r
+ Ncv32u maxWidth, Ncv32u maxHeight)\r
+{\r
+ for (Ncv32u i=20; i<512; i+=11)\r
+ {\r
+ for (Ncv32u j=20; j<128; j+=5)\r
+ {\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "HaarAppl%d_%d", i, j);\r
+ testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", j, i));\r
+ }\r
+ }\r
+ for (Ncv32f _i=20.0; _i<maxWidth; _i*=1.1f)\r
+ {\r
+ Ncv32u i = (Ncv32u)_i;\r
+ char testName[80];\r
+ sprintf_s(testName, sizeof(testName), "HaarAppl%d", i);\r
+ testLister.add(new TestHaarCascadeApplication(testName, src, "haarcascade_frontalface_alt.xml", i, i));\r
+ }\r
+}\r
+\r
+\r
+static void devNullOutput(const char *msg)\r
+{\r
+}\r
+\r
+\r
+int main_nvidia()\r
+{\r
+ printf("Testing NVIDIA Computer Vision SDK\n");\r
+ printf("==================================\n");\r
+\r
+ ncvSetDebugOutputHandler(devNullOutput);\r
+\r
+ NCVAutoTestLister testListerII("NPPST Integral Image" );//,,true, false);\r
+ NCVAutoTestLister testListerSII("NPPST Squared Integral Image" );//,,true, false);\r
+ NCVAutoTestLister testListerRStdDev("NPPST RectStdDev" );//,,true, false);\r
+ NCVAutoTestLister testListerResize("NPPST Resize" );//,,true, false);\r
+ NCVAutoTestLister testListerNPPSTVectorOperations("NPPST Vector Operations" );//,,true, false);\r
+ NCVAutoTestLister testListerTranspose("NPPST Transpose" );//,,true, false);\r
+\r
+ NCVAutoTestLister testListerVectorOperations("Vector Operations" );//,,true, false);\r
+ NCVAutoTestLister testListerHaarLoader("Haar Cascade Loader" );//,,true, false);\r
+ NCVAutoTestLister testListerHaarAppl("Haar Cascade Application" );//,,true, false);\r
+ NCVAutoTestLister testListerHypFiltration("Hypotheses Filtration" );//,,true, false);\r
+ NCVAutoTestLister testListerVisualize("Visualization" );//,,true, false);\r
+\r
+ printf("Initializing data source providers\n");\r
+ NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 4096, 4096);\r
+ NCVTestSourceProvider<Ncv8u> testSrcRandom_8u(2010, 0, 255, 4096, 4096);\r
+ NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, 0xFFFFFFFFFFFFFFFF, 4096, 4096);\r
+ NCVTestSourceProvider<Ncv8u> testSrcFacesVGA_8u("../../data/group_1_640x480_VGA.pgm");\r
+ NCVTestSourceProvider<Ncv32f> testSrcRandom_32f(2010, -1.0f, 1.0f, 4096, 4096);\r
+\r
+ printf("Generating NPPST test suites\n");\r
+ generateIntegralTests<Ncv8u, Ncv32u>(testListerII, testSrcRandom_8u, 4096, 4096);\r
+ generateIntegralTests<Ncv32f, Ncv32f>(testListerII, testSrcRandom_32f, 4096, 4096);\r
+ generateSquaredIntegralTests(testListerSII, testSrcRandom_8u, 4096, 4096);\r
+ generateRectStdDevTests(testListerRStdDev, testSrcRandom_8u, 4096, 4096);\r
+ generateResizeTests(testListerResize, testSrcRandom_32u);\r
+ generateResizeTests(testListerResize, testSrcRandom_64u);\r
+ generateNPPSTVectorTests(testListerNPPSTVectorOperations, testSrcRandom_32u, 4096*4096);\r
+ generateTransposeTests(testListerTranspose, testSrcRandom_32u);\r
+ generateTransposeTests(testListerTranspose, testSrcRandom_64u);\r
+\r
+ printf("Generating NCV test suites\n");\r
+ generateDrawRectsTests(testListerVisualize, testSrcRandom_8u, testSrcRandom_32u, 4096, 4096);\r
+ generateDrawRectsTests(testListerVisualize, testSrcRandom_32u, testSrcRandom_32u, 4096, 4096);\r
+ generateVectorTests(testListerVectorOperations, testSrcRandom_32u, 4096*4096);\r
+ generateHypothesesFiltrationTests(testListerHypFiltration, testSrcRandom_32u, 1024);\r
+ generateHaarLoaderTests(testListerHaarLoader);\r
+ generateHaarApplicationTests(testListerHaarAppl, testSrcFacesVGA_8u, 1280, 720);\r
+\r
+ testListerII.invoke();\r
+ testListerSII.invoke();\r
+ testListerRStdDev.invoke();\r
+ testListerResize.invoke();\r
+ testListerNPPSTVectorOperations.invoke();\r
+ testListerTranspose.invoke();\r
+ testListerVisualize.invoke();\r
+ testListerVectorOperations.invoke();\r
+ testListerHypFiltration.invoke();\r
+ testListerHaarLoader.invoke();\r
+ testListerHaarAppl.invoke();\r
+\r
+ return 0;\r
+}\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// Intel License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000, Intel Corporation, all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+//\r
+// * The name of Intel Corporation may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include "gputest.hpp"\r
+#include "cvconfig.h"\r
+\r
+class CV_NVidiaTestsCaller : public CvTest\r
+{\r
+public:\r
+ CV_NVidiaTestsCaller() : CvTest("GPU-NVidia", "NVidia") {}\r
+ virtual ~CV_NVidiaTestsCaller() {}\r
+\r
+protected:\r
+ \r
+ void run( int )\r
+ { \r
+#if defined(HAVE_CUDA)\r
+ int main_nvidia();\r
+ main_nvidia(); \r
+ ts->set_failed_test_info(CvTS::OK);\r
+#else\r
+ ts->set_failed_test_info(CvTS::SKIPPED);\r
+#endif\r
+ } \r
+} CV_NVidiaTestsCaller_test;
\ No newline at end of file