--- /dev/null
+/*\r
+* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.\r
+*\r
+* NOTICE TO USER:\r
+*\r
+* This source code is subject to NVIDIA ownership rights under U.S. and\r
+* international Copyright laws.\r
+*\r
+* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE\r
+* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR\r
+* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH\r
+* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF\r
+* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.\r
+* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,\r
+* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS\r
+* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE\r
+* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE\r
+* OR PERFORMANCE OF THIS SOURCE CODE.\r
+*\r
+* U.S. Government End Users. This source code is a "commercial item" as\r
+* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of\r
+* "commercial computer software" and "commercial computer software\r
+* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)\r
+* and is provided to the U.S. Government only as a commercial end item.\r
+* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through\r
+* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the\r
+* source code with only those rights set forth herein.\r
+*/\r
+#ifndef _npp_staging_h_\r
+#define _npp_staging_h_\r
+\r
+\r
+/**\r
+* \file npp_staging.h\r
+* NPP Staging Library (will become part of NPP next release)\r
+*/\r
+\r
+\r
+#ifdef __cplusplus\r
+\r
+\r
+/** \defgroup ctassert Compile-time assert functionality\r
+* @{\r
+*/\r
+\r
+\r
+ /**\r
+ * Compile-time assert namespace\r
+ */\r
+ namespace NppStCTprep\r
+ {\r
+ template <bool x>\r
+ struct CT_ASSERT_FAILURE;\r
+\r
+ template <>\r
+ struct CT_ASSERT_FAILURE<true> {};\r
+\r
+ template <int x>\r
+ struct assertTest{};\r
+ }\r
+\r
+\r
+ #define NPPST_CT_PREP_PASTE_AUX(a,b) a##b ///< Concatenation indirection macro\r
+ #define NPPST_CT_PREP_PASTE(a,b) NPPST_CT_PREP_PASTE_AUX(a, b) ///< Concatenation macro\r
+\r
+\r
+ /**\r
+ * Performs compile-time assertion of a condition on the file scope\r
+ */\r
+ #define NPPST_CT_ASSERT(X) \\r
+ typedef NppStCTprep::assertTest<sizeof(NppStCTprep::CT_ASSERT_FAILURE< (bool)(X) >)> \\r
+ NPPST_CT_PREP_PASTE(__ct_assert_typedef_, __LINE__)\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+#endif\r
+\r
+\r
+/** \defgroup typedefs NPP Integral and compound types of guaranteed size\r
+ * @{\r
+ */\r
+\r
+\r
+typedef bool NppStBool; ///< Bool of size less than integer\r
+typedef long long NppSt64s; ///< 64-bit signed integer\r
+typedef unsigned long long NppSt64u; ///< 64-bit unsigned integer\r
+typedef int NppSt32s; ///< 32-bit signed integer\r
+typedef unsigned int NppSt32u; ///< 32-bit unsigned integer\r
+typedef short NppSt16s; ///< 16-bit signed short\r
+typedef unsigned short NppSt16u; ///< 16-bit unsigned short\r
+typedef char NppSt8s; ///< 8-bit signed char\r
+typedef unsigned char NppSt8u; ///< 8-bit unsigned char\r
+typedef float NppSt32f; ///< 32-bit IEEE-754 (single precision) float\r
+typedef double NppSt64f; ///< 64-bit IEEE-754 (double precision) float\r
+\r
+\r
+/**\r
+ * 2D Rectangle, 8-bit unsigned fields\r
+ * This struct contains position and size information of a rectangle in two space\r
+ */\r
+struct NppStRect8u\r
+{\r
+ NppSt8u x; ///< x-coordinate of upper left corner\r
+ NppSt8u y; ///< y-coordinate of upper left corner\r
+ NppSt8u width; ///< Rectangle width\r
+ NppSt8u height; ///< Rectangle height\r
+#ifdef __cplusplus\r
+ NppStRect8u() : x(0), y(0), width(0), height(0) {};\r
+ NppStRect8u(NppSt8u x, NppSt8u y, NppSt8u width, NppSt8u height) : x(x), y(y), width(width), height(height) {}\r
+#endif\r
+};\r
+\r
+\r
+/**\r
+ * 2D Rectangle, 32-bit signed fields\r
+ * This struct contains position and size information of a rectangle in two space\r
+ */\r
+struct NppStRect32s\r
+{\r
+ NppSt32s x; ///< x-coordinate of upper left corner\r
+ NppSt32s y; ///< y-coordinate of upper left corner\r
+ NppSt32s width; ///< Rectangle width\r
+ NppSt32s height; ///< Rectangle height\r
+#ifdef __cplusplus\r
+ NppStRect32s() : x(0), y(0), width(0), height(0) {};\r
+ NppStRect32s(NppSt32s x, NppSt32s y, NppSt32s width, NppSt32s height) : x(x), y(y), width(width), height(height) {}\r
+#endif\r
+};\r
+\r
+\r
+/**\r
+ * 2D Rectangle, 32-bit unsigned fields\r
+ * This struct contains position and size information of a rectangle in two space\r
+ */\r
+struct NppStRect32u\r
+{\r
+ NppSt32u x; ///< x-coordinate of upper left corner\r
+ NppSt32u y; ///< y-coordinate of upper left corner\r
+ NppSt32u width; ///< Rectangle width\r
+ NppSt32u height; ///< Rectangle height\r
+#ifdef __cplusplus\r
+ NppStRect32u() : x(0), y(0), width(0), height(0) {};\r
+ NppStRect32u(NppSt32u x, NppSt32u y, NppSt32u width, NppSt32u height) : x(x), y(y), width(width), height(height) {}\r
+#endif\r
+};\r
+\r
+\r
+/**\r
+ * 2D Size, 32-bit signed fields\r
+ * This struct typically represents the size of a a rectangular region in two space\r
+ */\r
+struct NppStSize32s\r
+{\r
+ NppSt32s width; ///< Rectangle width\r
+ NppSt32s height; ///< Rectangle height\r
+#ifdef __cplusplus\r
+ NppStSize32s() : width(0), height(0) {};\r
+ NppStSize32s(NppSt32s width, NppSt32s height) : width(width), height(height) {}\r
+#endif\r
+};\r
+\r
+\r
+/**\r
+ * 2D Size, 32-bit unsigned fields\r
+ * This struct typically represents the size of a a rectangular region in two space\r
+ */\r
+struct NppStSize32u\r
+{\r
+ NppSt32u width; ///< Rectangle width\r
+ NppSt32u height; ///< Rectangle height\r
+#ifdef __cplusplus\r
+ NppStSize32u() : width(0), height(0) {};\r
+ NppStSize32u(NppSt32u width, NppSt32u height) : width(width), height(height) {}\r
+#endif\r
+};\r
+\r
+\r
+/**\r
+ * Error Status Codes\r
+ *\r
+ * Almost all NPP function return error-status information using\r
+ * these return codes.\r
+ * Negative return codes indicate errors, positive return codes indicate\r
+ * warnings, a return code of 0 indicates success.\r
+ */\r
+enum NppStStatus\r
+{\r
+ //already present in NPP\r
+ /* NPP_SUCCESS = 0, ///< Successful operation (same as NPP_NO_ERROR)\r
+ NPP_ERROR = -1, ///< Unknown error\r
+ NPP_CUDA_KERNEL_EXECUTION_ERROR = -3, ///< CUDA kernel execution error\r
+ NPP_NULL_POINTER_ERROR = -4, ///< NULL pointer argument error\r
+ NPP_TEXTURE_BIND_ERROR = -24, ///< CUDA texture binding error or non-zero offset returned\r
+ NPP_MEMCPY_ERROR = -13, ///< CUDA memory copy error\r
+ NPP_MEM_ALLOC_ERR = -12, ///< CUDA memory allocation error\r
+ NPP_MEMFREE_ERR = -15, ///< CUDA memory deallocation error*/\r
+\r
+ //to be added\r
+ NPP_INVALID_ROI, ///< Invalid region of interest argument\r
+ NPP_INVALID_STEP, ///< Invalid image lines step argument (check sign, alignment, relation to image width)\r
+ NPP_INVALID_SCALE, ///< Invalid scale parameter passed\r
+ NPP_MEM_INSUFFICIENT_BUFFER, ///< Insufficient user-allocated buffer\r
+ NPP_MEM_RESIDENCE_ERROR, ///< Memory residence error detected (check if pointers should be device or pinned)\r
+ NPP_MEM_INTERNAL_ERROR, ///< Internal memory management error\r
+};\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+#ifdef __cplusplus\r
+\r
+\r
+/** \defgroup ct_typesize_checks Client-side sizeof types compile-time check\r
+* @{\r
+*/\r
+ NPPST_CT_ASSERT(sizeof(NppStBool) <= 4);\r
+ NPPST_CT_ASSERT(sizeof(NppSt64s) == 8);\r
+ NPPST_CT_ASSERT(sizeof(NppSt64u) == 8);\r
+ NPPST_CT_ASSERT(sizeof(NppSt32s) == 4);\r
+ NPPST_CT_ASSERT(sizeof(NppSt32u) == 4);\r
+ NPPST_CT_ASSERT(sizeof(NppSt16s) == 2);\r
+ NPPST_CT_ASSERT(sizeof(NppSt16u) == 2);\r
+ NPPST_CT_ASSERT(sizeof(NppSt8s) == 1);\r
+ NPPST_CT_ASSERT(sizeof(NppSt8u) == 1);\r
+ NPPST_CT_ASSERT(sizeof(NppSt32f) == 4);\r
+ NPPST_CT_ASSERT(sizeof(NppSt64f) == 8);\r
+ NPPST_CT_ASSERT(sizeof(NppStRect8u) == sizeof(NppSt32u));\r
+ NPPST_CT_ASSERT(sizeof(NppStRect32s) == 4 * sizeof(NppSt32s));\r
+ NPPST_CT_ASSERT(sizeof(NppStRect32u) == 4 * sizeof(NppSt32u));\r
+ NPPST_CT_ASSERT(sizeof(NppStSize32u) == 2 * sizeof(NppSt32u));\r
+/*@}*/\r
+\r
+\r
+#endif\r
+\r
+\r
+#ifdef __cplusplus\r
+extern "C" {\r
+#endif\r
+\r
+\r
+/** \defgroup core_npp NPP Core\r
+ * Basic functions for CUDA streams management.\r
+ * WARNING: These functions couldn't be exported from NPP_staging library, so they can't be used\r
+ * @{\r
+ */\r
+\r
+\r
+/**\r
+ * Gets an active CUDA stream used by NPP (Not an API yet!)\r
+ * \return Current CUDA stream\r
+ */\r
+cudaStream_t nppStGetActiveCUDAstream();\r
+\r
+\r
+/**\r
+ * Sets an active CUDA stream used by NPP (Not an API yet!)\r
+ * \param cudaStream [IN] cudaStream CUDA stream to become current\r
+ * \return CUDA stream used before\r
+ */\r
+cudaStream_t nppStSetActiveCUDAstream(cudaStream_t cudaStream);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+/** \defgroup nppi NPP Image Processing\r
+* @{\r
+*/\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel.\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest in the source image\r
+ * \param scale [IN] Downsampling scale factor (positive integer)\r
+ * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32u_C1R(NppSt32u *d_src, NppSt32u srcStep,\r
+ NppSt32u *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32s_C1R(NppSt32s *d_src, NppSt32u srcStep,\r
+ NppSt32s *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32f_C1R(NppSt32f *d_src, NppSt32u srcStep,\r
+ NppSt32f *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+* Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel.\r
+* \see nppiStDownsampleNearest_32u_C1R\r
+*/\r
+NppStStatus nppiStDownsampleNearest_64u_C1R(NppSt64u *d_src, NppSt32u srcStep,\r
+ NppSt64u *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NppStStatus nppiStDownsampleNearest_64s_C1R(NppSt64s *d_src, NppSt32u srcStep,\r
+ NppSt64s *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel.\r
+ * \see nppiStDownsampleNearest_32u_C1R\r
+ */\r
+NppStStatus nppiStDownsampleNearest_64f_C1R(NppSt64f *d_src, NppSt32u srcStep,\r
+ NppSt64f *d_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale,\r
+ NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit unsigned pixels, single channel. Host implementation.\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest in the source image\r
+ * \param scale [IN] Downsampling scale factor (positive integer)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStep,\r
+ NppSt32u *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit signed pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStep,\r
+ NppSt32s *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 32-bit float pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NppStStatus nppiStDownsampleNearest_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStep,\r
+ NppSt32f *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit unsigned pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NppStStatus nppiStDownsampleNearest_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStep,\r
+ NppSt64u *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit signed pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NppStStatus nppiStDownsampleNearest_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStep,\r
+ NppSt64s *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Downsamples (decimates) an image using the nearest neighbor algorithm. 64-bit float pixels, single channel. Host implementation.\r
+ * \see nppiStDownsampleNearest_32u_C1R_host\r
+ */\r
+NppStStatus nppiStDownsampleNearest_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStep,\r
+ NppSt64f *h_dst, NppSt32u dstStep,\r
+ NppStSize32u srcRoi, NppSt32u scale);\r
+\r
+\r
+/**\r
+ * Computes standard deviation for each rectangular region of the input image using integral images.\r
+ *\r
+ * \param d_sum [IN] Integral image pointer (CUDA device memory)\r
+ * \param sumStep [IN] Integral image line step\r
+ * \param d_sqsum [IN] Squared integral image pointer (CUDA device memory)\r
+ * \param sqsumStep [IN] Squared integral image line step\r
+ * \param d_norm [OUT] Stddev image pointer (CUDA device memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
+ * \param normStep [IN] Stddev image line step\r
+ * \param roi [IN] Region of interest in the source image\r
+ * \param rect [IN] Rectangular region to calculate stddev over\r
+ * \param scaleArea [IN] Multiplication factor to account decimated scale\r
+ * \param readThruTexture [IN] Performance hint to cache source in texture (true) or read directly (false)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStRectStdDev_32f_C1R(NppSt32u *d_sum, NppSt32u sumStep,\r
+ NppSt64u *d_sqsum, NppSt32u sqsumStep,\r
+ NppSt32f *d_norm, NppSt32u normStep,\r
+ NppStSize32u roi, NppStRect32u rect,\r
+ NppSt32f scaleArea, NppStBool readThruTexture);\r
+\r
+\r
+/**\r
+ * Computes standard deviation for each rectangular region of the input image using integral images. Host implementation\r
+ *\r
+ * \param h_sum [IN] Integral image pointer (Host or pinned memory)\r
+ * \param sumStep [IN] Integral image line step\r
+ * \param h_sqsum [IN] Squared integral image pointer (Host or pinned memory)\r
+ * \param sqsumStep [IN] Squared integral image line step\r
+ * \param h_norm [OUT] Stddev image pointer (Host or pinned memory). Each pixel contains stddev of a rect with top-left corner at the original location in the image\r
+ * \param normStep [IN] Stddev image line step\r
+ * \param roi [IN] Region of interest in the source image\r
+ * \param rect [IN] Rectangular region to calculate stddev over\r
+ * \param scaleArea [IN] Multiplication factor to account decimated scale\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStRectStdDev_32f_C1R_host(NppSt32u *h_sum, NppSt32u sumStep,\r
+ NppSt64u *h_sqsum, NppSt32u sqsumStep,\r
+ NppSt32f *h_norm, NppSt32u normStep,\r
+ NppStSize32u roi, NppStRect32u rect,\r
+ NppSt32f scaleArea);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit unsigned pixels, single channel\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStride [IN] Source image line step\r
+ * \param d_dst [OUT] Destination image pointer (CUDA device memory)\r
+ * \param dstStride [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest of the source image\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStTranspose_32u_C1R(NppSt32u *d_src, NppSt32u srcStride,\r
+ NppSt32u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit signed pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NppStStatus nppiStTranspose_32s_C1R(NppSt32s *d_src, NppSt32u srcStride,\r
+ NppSt32s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit float pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NppStStatus nppiStTranspose_32f_C1R(NppSt32f *d_src, NppSt32u srcStride,\r
+ NppSt32f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit unsigned pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NppStStatus nppiStTranspose_64u_C1R(NppSt64u *d_src, NppSt32u srcStride,\r
+ NppSt64u *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit signed pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NppStStatus nppiStTranspose_64s_C1R(NppSt64s *d_src, NppSt32u srcStride,\r
+ NppSt64s *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit float pixels, single channel\r
+ * \see nppiStTranspose_32u_C1R\r
+ */\r
+NppStStatus nppiStTranspose_64f_C1R(NppSt64f *d_src, NppSt32u srcStride,\r
+ NppSt64f *d_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit unsigned pixels, single channel. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStride [IN] Source image line step\r
+ * \param h_dst [OUT] Destination image pointer (Host or pinned memory)\r
+ * \param dstStride [IN] Destination image line step\r
+ * \param srcRoi [IN] Region of interest of the source image\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStTranspose_32u_C1R_host(NppSt32u *h_src, NppSt32u srcStride,\r
+ NppSt32u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit signed pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NppStStatus nppiStTranspose_32s_C1R_host(NppSt32s *h_src, NppSt32u srcStride,\r
+ NppSt32s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 32-bit float pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NppStStatus nppiStTranspose_32f_C1R_host(NppSt32f *h_src, NppSt32u srcStride,\r
+ NppSt32f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit unsigned pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NppStStatus nppiStTranspose_64u_C1R_host(NppSt64u *h_src, NppSt32u srcStride,\r
+ NppSt64u *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit signed pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NppStStatus nppiStTranspose_64s_C1R_host(NppSt64s *h_src, NppSt32u srcStride,\r
+ NppSt64s *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Transposes an image. 64-bit float pixels, single channel. Host implementation\r
+ * \see nppiStTranspose_32u_C1R_host\r
+ */\r
+NppStStatus nppiStTranspose_64f_C1R_host(NppSt64f *h_src, NppSt32u srcStride,\r
+ NppSt64f *h_dst, NppSt32u dstStride, NppStSize32u srcRoi);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for integral image creation\r
+ *\r
+ * \param roiSize [IN] Size of the input image\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStIntegralGetSize_8u32u(NppStSize32u roiSize, NppSt32u *pBufsize);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination integral image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStIntegral_8u32u_C1R(NppSt8u *d_src, NppSt32u srcStep,\r
+ NppSt32u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,\r
+ NppSt8u *pBuffer, NppSt32u bufSize);\r
+\r
+\r
+/**\r
+ * Creates an integral image representation for the input image. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination integral image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStIntegral_8u32u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,\r
+ NppSt32u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for squared integral image creation\r
+ *\r
+ * \param roiSize [IN] Size of the input image\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStSqrIntegralGetSize_8u64u(NppStSize32u roiSize, NppSt32u *pBufsize);\r
+\r
+\r
+/**\r
+ * Creates a squared integral image representation for the input image\r
+ *\r
+ * \param d_src [IN] Source image pointer (CUDA device memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param d_dst [OUT] Destination squared integral image pointer (CUDA device memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStSqrIntegral_8u64u_C1R(NppSt8u *d_src, NppSt32u srcStep,\r
+ NppSt64u *d_dst, NppSt32u dstStep, NppStSize32u roiSize,\r
+ NppSt8u *pBuffer, NppSt32u bufSize);\r
+\r
+\r
+/**\r
+ * Creates a squared integral image representation for the input image. Host implementation\r
+ *\r
+ * \param h_src [IN] Source image pointer (Host or pinned memory)\r
+ * \param srcStep [IN] Source image line step\r
+ * \param h_dst [OUT] Destination squared integral image pointer (Host or pinned memory)\r
+ * \param dstStep [IN] Destination image line step\r
+ * \param roiSize [IN] Region of interest of the source image\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppiStSqrIntegral_8u64u_C1R_host(NppSt8u *h_src, NppSt32u srcStep,\r
+ NppSt64u *h_dst, NppSt32u dstStep, NppStSize32u roiSize);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+/** \defgroup npps NPP Signal Processing\r
+* @{\r
+*/\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit unsigned values\r
+ *\r
+ * \param srcLen [IN] Length of the input vector in elements\r
+ * \param pBufsize [OUT] Pointer to host variable that returns the size of the temporary buffer (in bytes)\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppsStCompactGetSize_32u(NppSt32u srcLen, NppSt32u *pBufsize);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit signed values\r
+ * \see nppsStCompactGetSize_32u\r
+ */\r
+NppStStatus nppsStCompactGetSize_32s(NppSt32u srcLen, NppSt32u *pBufsize);\r
+\r
+\r
+/**\r
+ * Calculates the size of the temporary buffer for vector compaction. 32-bit float values\r
+ * \see nppsStCompactGetSize_32u\r
+ */\r
+NppStStatus nppsStCompactGetSize_32f(NppSt32u srcLen, NppSt32u *pBufsize);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values\r
+ *\r
+ * \param d_src [IN] Source vector pointer (CUDA device memory)\r
+ * \param srcLen [IN] Source vector length\r
+ * \param d_dst [OUT] Destination vector pointer (CUDA device memory)\r
+ * \param p_dstLen [OUT] Pointer to the destination vector length (Pinned memory or NULL)\r
+ * \param elemRemove [IN] The value to be removed\r
+ * \param pBuffer [IN] Pointer to the pre-allocated temporary buffer (CUDA device memory)\r
+ * \param bufSize [IN] Size of the pBuffer in bytes\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppsStCompact_32u(NppSt32u *d_src, NppSt32u srcLen,\r
+ NppSt32u *d_dst, NppSt32u *p_dstLen,\r
+ NppSt32u elemRemove,\r
+ NppSt8u *pBuffer, NppSt32u bufSize);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values\r
+ * \see nppsStCompact_32u\r
+ */\r
+NppStStatus nppsStCompact_32s(NppSt32s *d_src, NppSt32u srcLen,\r
+ NppSt32s *d_dst, NppSt32u *p_dstLen,\r
+ NppSt32s elemRemove,\r
+ NppSt8u *pBuffer, NppSt32u bufSize);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit float values\r
+ * \see nppsStCompact_32u\r
+ */\r
+NppStStatus nppsStCompact_32f(NppSt32f *d_src, NppSt32u srcLen,\r
+ NppSt32f *d_dst, NppSt32u *p_dstLen,\r
+ NppSt32f elemRemove,\r
+ NppSt8u *pBuffer, NppSt32u bufSize);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit unsigned values. Host implementation\r
+ *\r
+ * \param h_src [IN] Source vector pointer (CUDA device memory)\r
+ * \param srcLen [IN] Source vector length\r
+ * \param h_dst [OUT] Destination vector pointer (CUDA device memory)\r
+ * \param dstLen [OUT] Pointer to the destination vector length (can be NULL)\r
+ * \param elemRemove [IN] The value to be removed\r
+ *\r
+ * \return NPP status code\r
+ */\r
+NppStStatus nppsStCompact_32u_host(NppSt32u *h_src, NppSt32u srcLen,\r
+ NppSt32u *h_dst, NppSt32u *dstLen, NppSt32u elemRemove);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit signed values. Host implementation\r
+ * \see nppsStCompact_32u_host\r
+ */\r
+NppStStatus nppsStCompact_32s_host(NppSt32s *h_src, NppSt32u srcLen,\r
+ NppSt32s *h_dst, NppSt32u *dstLen, NppSt32s elemRemove);\r
+\r
+\r
+/**\r
+ * Compacts the input vector by removing elements of specified value. 32-bit float values. Host implementation\r
+ * \see nppsStCompact_32u_host\r
+ */\r
+NppStStatus nppsStCompact_32f_host(NppSt32f *h_src, NppSt32u srcLen,\r
+ NppSt32f *h_dst, NppSt32u *dstLen, NppSt32f elemRemove);\r
+\r
+\r
+/*@}*/\r
+\r
+\r
+#ifdef __cplusplus\r
+}\r
+#endif\r
+\r
+\r
+#endif // _npp_staging_h_\r