From b0461db43f9c1561f27951f002e2f1fd68509b37 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 2 Aug 2012 15:56:59 +0400 Subject: [PATCH] added missed device synchronization --- modules/gpu/src/color.cpp | 116 +++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/modules/gpu/src/color.cpp b/modules/gpu/src/color.cpp index faebca6..d5615ab 100644 --- a/modules/gpu/src/color.cpp +++ b/modules/gpu/src/color.cpp @@ -1153,7 +1153,7 @@ namespace funcs[dcn == 4][src.channels() == 4][src.depth()](src, dst, StreamAccessor::getStream(stream)); } - void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) + void bgr_to_lab(const GpuMat& src, GpuMat& dst, int dcn, Stream& st) { #if (CUDA_VERSION < 5000) (void)src; @@ -1169,13 +1169,17 @@ namespace dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - NppStreamHandler h(StreamAccessor::getStream(stream)); + cudaStream_t stream = StreamAccessor::getStream(st); + NppStreamHandler h(stream); NppiSize oSizeROI; oSizeROI.width = src.cols; oSizeROI.height = src.rows; nppSafeCall( nppiBGRToLab_8u_C3R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); #endif } @@ -1185,7 +1189,7 @@ namespace bgr_to_lab(dst, dst, -1, stream); } - void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) + void lab_to_bgr(const GpuMat& src, GpuMat& dst, int dcn, Stream& st) { #if (CUDA_VERSION < 5000) (void)src; @@ -1201,13 +1205,17 @@ namespace dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - NppStreamHandler h(StreamAccessor::getStream(stream)); + cudaStream_t stream = StreamAccessor::getStream(st); + NppStreamHandler h(stream); NppiSize oSizeROI; oSizeROI.width = src.cols; oSizeROI.height = src.rows; nppSafeCall( nppiLabToBGR_8u_C3R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); #endif } @@ -1217,7 +1225,7 @@ namespace bgr_to_rgb(dst, dst, -1, stream); } - void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) + void rgb_to_luv(const GpuMat& src, GpuMat& dst, int dcn, Stream& st) { #if (CUDA_VERSION < 5000) (void)src; @@ -1233,7 +1241,8 @@ namespace dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - NppStreamHandler h(StreamAccessor::getStream(stream)); + cudaStream_t stream = StreamAccessor::getStream(st); + NppStreamHandler h(stream); NppiSize oSizeROI; oSizeROI.width = src.cols; @@ -1243,6 +1252,9 @@ namespace nppSafeCall( nppiRGBToLUV_8u_C3R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); else nppSafeCall( nppiRGBToLUV_8u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); #endif } @@ -1252,7 +1264,7 @@ namespace rgb_to_luv(dst, dst, -1, stream); } - void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream) + void luv_to_rgb(const GpuMat& src, GpuMat& dst, int dcn, Stream& st) { #if (CUDA_VERSION < 5000) (void)src; @@ -1268,7 +1280,8 @@ namespace dst.create(src.size(), CV_MAKETYPE(src.depth(), dcn)); - NppStreamHandler h(StreamAccessor::getStream(stream)); + cudaStream_t stream = StreamAccessor::getStream(st); + NppStreamHandler h(stream); NppiSize oSizeROI; oSizeROI.width = src.cols; @@ -1278,6 +1291,9 @@ namespace nppSafeCall( nppiLUVToRGB_8u_C3R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); else nppSafeCall( nppiLUVToRGB_8u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); #endif } @@ -1287,7 +1303,7 @@ namespace bgr_to_rgb(dst, dst, -1, stream); } - void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& stream) + void rgba_to_mbgra(const GpuMat& src, GpuMat& dst, int, Stream& st) { #if (CUDA_VERSION < 5000) (void)src; @@ -1299,7 +1315,8 @@ namespace dst.create(src.size(), src.type()); - NppStreamHandler h(StreamAccessor::getStream(stream)); + cudaStream_t stream = StreamAccessor::getStream(st); + NppStreamHandler h(stream); NppiSize oSizeROI; oSizeROI.width = src.cols; @@ -1309,6 +1326,9 @@ namespace nppSafeCall( nppiAlphaPremul_8u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); else nppSafeCall( nppiAlphaPremul_16u_AC4R(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), oSizeROI) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); #endif } @@ -1474,57 +1494,57 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream 0, // CV_BayerGR2GRAY = 89 //YUV 4:2:0 formats family - 0, // COLOR_YUV2RGB_NV12 = 90, - 0, // COLOR_YUV2BGR_NV12 = 91, - 0, // COLOR_YUV2RGB_NV21 = 92, - 0, // COLOR_YUV2BGR_NV21 = 93, + 0, // CV_YUV2RGB_NV12 = 90, + 0, // CV_YUV2BGR_NV12 = 91, + 0, // CV_YUV2RGB_NV21 = 92, + 0, // CV_YUV2BGR_NV21 = 93, - 0, // COLOR_YUV2RGBA_NV12 = 94, - 0, // COLOR_YUV2BGRA_NV12 = 95, - 0, // COLOR_YUV2RGBA_NV21 = 96, - 0, // COLOR_YUV2BGRA_NV21 = 97, + 0, // CV_YUV2RGBA_NV12 = 94, + 0, // CV_YUV2BGRA_NV12 = 95, + 0, // CV_YUV2RGBA_NV21 = 96, + 0, // CV_YUV2BGRA_NV21 = 97, - 0, // COLOR_YUV2RGB_YV12 = 98, - 0, // COLOR_YUV2BGR_YV12 = 99, - 0, // COLOR_YUV2RGB_IYUV = 100, - 0, // COLOR_YUV2BGR_IYUV = 101, + 0, // CV_YUV2RGB_YV12 = 98, + 0, // CV_YUV2BGR_YV12 = 99, + 0, // CV_YUV2RGB_IYUV = 100, + 0, // CV_YUV2BGR_IYUV = 101, - 0, // COLOR_YUV2RGBA_YV12 = 102, - 0, // COLOR_YUV2BGRA_YV12 = 103, - 0, // COLOR_YUV2RGBA_IYUV = 104, - 0, // COLOR_YUV2BGRA_IYUV = 105, + 0, // CV_YUV2RGBA_YV12 = 102, + 0, // CV_YUV2BGRA_YV12 = 103, + 0, // CV_YUV2RGBA_IYUV = 104, + 0, // CV_YUV2BGRA_IYUV = 105, - 0, // COLOR_YUV2GRAY_420 = 106, + 0, // CV_YUV2GRAY_420 = 106, //YUV 4:2:2 formats family - 0, // COLOR_YUV2RGB_UYVY = 107, - 0, // COLOR_YUV2BGR_UYVY = 108, - 0, // //COLOR_YUV2RGB_VYUY = 109, - 0, // //COLOR_YUV2BGR_VYUY = 110, + 0, // CV_YUV2RGB_UYVY = 107, + 0, // CV_YUV2BGR_UYVY = 108, + 0, // //CV_YUV2RGB_VYUY = 109, + 0, // //CV_YUV2BGR_VYUY = 110, - 0, // COLOR_YUV2RGBA_UYVY = 111, - 0, // COLOR_YUV2BGRA_UYVY = 112, - 0, // //COLOR_YUV2RGBA_VYUY = 113, - 0, // //COLOR_YUV2BGRA_VYUY = 114, + 0, // CV_YUV2RGBA_UYVY = 111, + 0, // CV_YUV2BGRA_UYVY = 112, + 0, // //CV_YUV2RGBA_VYUY = 113, + 0, // //CV_YUV2BGRA_VYUY = 114, - 0, // COLOR_YUV2RGB_YUY2 = 115, - 0, // COLOR_YUV2BGR_YUY2 = 116, - 0, // COLOR_YUV2RGB_YVYU = 117, - 0, // COLOR_YUV2BGR_YVYU = 118, + 0, // CV_YUV2RGB_YUY2 = 115, + 0, // CV_YUV2BGR_YUY2 = 116, + 0, // CV_YUV2RGB_YVYU = 117, + 0, // CV_YUV2BGR_YVYU = 118, - 0, // COLOR_YUV2RGBA_YUY2 = 119, - 0, // COLOR_YUV2BGRA_YUY2 = 120, - 0, // COLOR_YUV2RGBA_YVYU = 121, - 0, // COLOR_YUV2BGRA_YVYU = 122, + 0, // CV_YUV2RGBA_YUY2 = 119, + 0, // CV_YUV2BGRA_YUY2 = 120, + 0, // CV_YUV2RGBA_YVYU = 121, + 0, // CV_YUV2BGRA_YVYU = 122, - 0, // COLOR_YUV2GRAY_UYVY = 123, - 0, // COLOR_YUV2GRAY_YUY2 = 124, + 0, // CV_YUV2GRAY_UYVY = 123, + 0, // CV_YUV2GRAY_YUY2 = 124, // alpha premultiplication - rgba_to_mbgra, // COLOR_RGBA2mRGBA = 125, - 0, // COLOR_mRGBA2RGBA = 126, + rgba_to_mbgra, // CV_RGBA2mRGBA = 125, + 0, // CV_mRGBA2RGBA = 126, - 0, // COLOR_COLORCVT_MAX = 127 + 0, // CV_COLORCVT_MAX = 127 }; CV_Assert(code < 128); -- 2.7.4