From: Vladislav Vinogradov Date: Tue, 26 Feb 2013 09:50:33 +0000 (+0400) Subject: fixed gpu bitwise operations with scalars X-Git-Tag: accepted/2.0/20130307.220821~33^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4f7cfbc26ed42a881f34a77de0e19e3a4579334c;p=profile%2Fivi%2Fopencv.git fixed gpu bitwise operations with scalars --- diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 27fb61f..5165b35 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -2284,15 +2284,18 @@ namespace arithm template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); - template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); - template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); - template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); + template void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 3d6cde3..eedb313 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -2280,11 +2280,11 @@ namespace { typedef void (*bit_scalar_func_t)(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream); - template struct BitScalar + template struct BitScalar { static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream) { - func(src, static_cast(sc.val[0]), dst, stream); + func(src, saturate_cast(sc.val[0]), dst, stream); } }; @@ -2292,14 +2292,12 @@ namespace { static void call(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream) { - Scalar_ isc = sc; - unsigned int packedVal = 0; - packedVal |= (isc.val[0] & 0xffff); - packedVal |= (isc.val[1] & 0xffff) << 8; - packedVal |= (isc.val[2] & 0xffff) << 16; - packedVal |= (isc.val[3] & 0xffff) << 24; + packedVal |= (saturate_cast(sc.val[0]) & 0xffff); + packedVal |= (saturate_cast(sc.val[1]) & 0xffff) << 8; + packedVal |= (saturate_cast(sc.val[2]) & 0xffff) << 16; + packedVal |= (saturate_cast(sc.val[3]) & 0xffff) << 24; func(src, packedVal, dst, stream); } @@ -2330,7 +2328,7 @@ namespace oSizeROI.width = src.cols; oSizeROI.height = src.rows; - const npp_t pConstants[] = {static_cast(sc.val[0]), static_cast(sc.val[1]), static_cast(sc.val[2]), static_cast(sc.val[3])}; + const npp_t pConstants[] = {saturate_cast(sc.val[0]), saturate_cast(sc.val[1]), saturate_cast(sc.val[2]), saturate_cast(sc.val[3])}; nppSafeCall( func(src.ptr(), static_cast(src.step), pConstants, dst.ptr(), static_cast(dst.step), oSizeROI) ); @@ -2350,7 +2348,7 @@ namespace oSizeROI.width = src.cols; oSizeROI.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), static_cast(sc.val[0]), dst.ptr(), static_cast(dst.step), oSizeROI) ); + nppSafeCall( func(src.ptr(), static_cast(src.step), saturate_cast(sc.val[0]), dst.ptr(), static_cast(dst.step), oSizeROI) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -2365,11 +2363,11 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { - {BitScalar< bitScalarAnd >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, {0,0,0,0}, - {BitScalar< bitScalarAnd >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, {0,0,0,0}, - {BitScalar< bitScalarAnd >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} }; const int depth = src.depth(); @@ -2390,11 +2388,11 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { - {BitScalar< bitScalarOr >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, {0,0,0,0}, - {BitScalar< bitScalarOr >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, {0,0,0,0}, - {BitScalar< bitScalarOr >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} }; const int depth = src.depth(); @@ -2415,11 +2413,11 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = { - {BitScalar< bitScalarXor >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, {0,0,0,0}, - {BitScalar< bitScalarXor >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, {0,0,0,0}, - {BitScalar< bitScalarXor >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} }; const int depth = src.depth(); diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 736256c..affc306 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1873,7 +1873,7 @@ PARAM_TEST_CASE(Bitwise_Scalar, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channel cv::gpu::setDevice(devInfo.deviceID()); src = randomMat(size, CV_MAKE_TYPE(depth, channels)); - cv::Scalar_ ival = randomScalar(0.0, 255.0); + cv::Scalar_ ival = randomScalar(0.0, std::numeric_limits::max()); val = ival; } };