typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream);\r
\r
//------------------------------------------------------------\r
- // Merge\r
-\r
- template <typename T> \r
- static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
- {\r
- dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
- mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src[0].ptr, src[0].step, \r
- src[1].ptr, src[1].step,\r
- dst.rows, dst.cols, dst.ptr, dst.step);\r
- if (stream == 0)\r
- cudaSafeCall(cudaThreadSynchronize());\r
- }\r
-\r
-\r
- template <typename T> \r
- static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
- {\r
- dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
- mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src[0].ptr, src[0].step, \r
- src[1].ptr, src[1].step,\r
- src[2].ptr, src[2].step,\r
- dst.rows, dst.cols, dst.ptr, dst.step);\r
- if (stream == 0)\r
- cudaSafeCall(cudaThreadSynchronize());\r
- }\r
-\r
-\r
- template <typename T> \r
- static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
- {\r
- dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
- mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src[0].ptr, src[0].step, \r
- src[1].ptr, src[1].step,\r
- src[2].ptr, src[2].step,\r
- src[3].ptr, src[3].step,\r
- dst.rows, dst.cols, dst.ptr, dst.step);\r
- if (stream == 0)\r
- cudaSafeCall(cudaThreadSynchronize());\r
- }\r
-\r
-\r
- extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, \r
- int total_channels, int elem_size, \r
- const cudaStream_t& stream) \r
- {\r
- static MergeFunction merge_func_tbl[] = \r
- {\r
- mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,\r
- mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,\r
- mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,\r
- };\r
-\r
- int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);\r
- MergeFunction merge_func = merge_func_tbl[merge_func_id];\r
-\r
- if (merge_func == 0)\r
- cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);\r
-\r
- merge_func(src, dst, stream);\r
- }\r
-\r
+ // Merge \r
\r
template <typename T>\r
__global__ void mergeC2_(const uchar* src0, size_t src0_step, \r
}\r
}\r
\r
- //------------------------------------------------------------\r
- // Split\r
-\r
\r
- template <typename T> \r
- static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ template <typename T>\r
+ static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
{\r
dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
- splitC2_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src.ptr, src.step, src.rows, src.cols,\r
- dst[0].ptr, dst[0].step, \r
- dst[1].ptr, dst[1].step);\r
+ dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
+ mergeC2_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src[0].ptr, src[0].step,\r
+ src[1].ptr, src[1].step,\r
+ dst.rows, dst.cols, dst.ptr, dst.step);\r
if (stream == 0)\r
cudaSafeCall(cudaThreadSynchronize());\r
}\r
\r
\r
- template <typename T> \r
- static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ template <typename T>\r
+ static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
{\r
dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
- splitC3_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src.ptr, src.step, src.rows, src.cols,\r
- dst[0].ptr, dst[0].step, \r
- dst[1].ptr, dst[1].step,\r
- dst[2].ptr, dst[2].step); \r
+ dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
+ mergeC3_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src[0].ptr, src[0].step,\r
+ src[1].ptr, src[1].step,\r
+ src[2].ptr, src[2].step,\r
+ dst.rows, dst.cols, dst.ptr, dst.step);\r
if (stream == 0)\r
cudaSafeCall(cudaThreadSynchronize());\r
}\r
\r
\r
- template <typename T> \r
- static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ template <typename T>\r
+ static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream)\r
{\r
dim3 blockDim(32, 8);\r
- dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
- splitC4_<T><<<gridDim, blockDim, 0, stream>>>(\r
- src.ptr, src.step, src.rows, src.cols,\r
- dst[0].ptr, dst[0].step, \r
- dst[1].ptr, dst[1].step,\r
- dst[2].ptr, dst[2].step,\r
- dst[3].ptr, dst[3].step);\r
+ dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y));\r
+ mergeC4_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src[0].ptr, src[0].step,\r
+ src[1].ptr, src[1].step,\r
+ src[2].ptr, src[2].step,\r
+ src[3].ptr, src[3].step,\r
+ dst.rows, dst.cols, dst.ptr, dst.step);\r
if (stream == 0)\r
cudaSafeCall(cudaThreadSynchronize());\r
}\r
\r
\r
- extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, \r
- int num_channels, int elem_size1, \r
- const cudaStream_t& stream) \r
+ extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst,\r
+ int total_channels, int elem_size,\r
+ const cudaStream_t& stream)\r
{\r
- static SplitFunction split_func_tbl[] = \r
+ static MergeFunction merge_func_tbl[] =\r
{\r
- splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,\r
- splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,\r
- splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,\r
+ mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,\r
+ mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,\r
+ mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,\r
};\r
\r
- int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);\r
- SplitFunction split_func = split_func_tbl[split_func_id];\r
+ int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);\r
+ MergeFunction merge_func = merge_func_tbl[merge_func_id];\r
\r
- if (split_func == 0)\r
+ if (merge_func == 0)\r
cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);\r
\r
- split_func(src, dst, stream);\r
+ merge_func(src, dst, stream);\r
}\r
\r
\r
+\r
+ //------------------------------------------------------------\r
+ // Split\r
+\r
+\r
template <typename T>\r
__global__ void splitC2_(const uchar* src, size_t src_step, \r
int rows, int cols,\r
}\r
}\r
\r
-}}} // namespace cv::gpu::split_merge
\ No newline at end of file
+ template <typename T>\r
+ static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ {\r
+ dim3 blockDim(32, 8);\r
+ dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
+ splitC2_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src.ptr, src.step, src.rows, src.cols,\r
+ dst[0].ptr, dst[0].step,\r
+ dst[1].ptr, dst[1].step);\r
+ if (stream == 0)\r
+ cudaSafeCall(cudaThreadSynchronize());\r
+ }\r
+\r
+\r
+ template <typename T>\r
+ static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ {\r
+ dim3 blockDim(32, 8);\r
+ dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
+ splitC3_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src.ptr, src.step, src.rows, src.cols,\r
+ dst[0].ptr, dst[0].step,\r
+ dst[1].ptr, dst[1].step,\r
+ dst[2].ptr, dst[2].step);\r
+ if (stream == 0)\r
+ cudaSafeCall(cudaThreadSynchronize());\r
+ }\r
+\r
+\r
+ template <typename T>\r
+ static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream)\r
+ {\r
+ dim3 blockDim(32, 8);\r
+ dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y));\r
+ splitC4_<T><<<gridDim, blockDim, 0, stream>>>(\r
+ src.ptr, src.step, src.rows, src.cols,\r
+ dst[0].ptr, dst[0].step,\r
+ dst[1].ptr, dst[1].step,\r
+ dst[2].ptr, dst[2].step,\r
+ dst[3].ptr, dst[3].step);\r
+ if (stream == 0)\r
+ cudaSafeCall(cudaThreadSynchronize());\r
+ }\r
+\r
+\r
+ extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst,\r
+ int num_channels, int elem_size1,\r
+ const cudaStream_t& stream)\r
+ {\r
+ static SplitFunction split_func_tbl[] =\r
+ {\r
+ splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,\r
+ splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,\r
+ splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,\r
+ };\r
+\r
+ int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);\r
+ SplitFunction split_func = split_func_tbl[split_func_id];\r
+\r
+ if (split_func == 0)\r
+ cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__);\r
+\r
+ split_func(src, dst, stream);\r
+ }\r
+\r
+}}} // namespace cv::gpu::split_merge\r