From: Vladislav Vinogradov Date: Wed, 6 May 2015 10:28:08 +0000 (+0300) Subject: Bug #4315 : fix CUDA bitwise operations with mask X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~1314^2~212^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d87c30dc8470e39048cc01f75f26eb7bd3d6ce9f;p=platform%2Fupstream%2Fopencv.git Bug #4315 : fix CUDA bitwise operations with mask --- diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 876d4ad..f606f0c 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -1896,53 +1896,53 @@ namespace cv { namespace gpu { namespace device namespace arithm { - template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) + template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream) { if (mask.data) - transform((PtrStepSz) src, (PtrStepSz) dst, bit_not(), mask, stream); + transform((PtrStepSz) src, (PtrStepSz) dst, bit_not(), SingleMaskChannels(mask, num_channels), stream); else transform((PtrStepSz) src, (PtrStepSz) dst, bit_not(), WithOutMask(), stream); } - template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) + template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream) { if (mask.data) - transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_and(), mask, stream); + transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_and(), SingleMaskChannels(mask, num_channels), stream); else transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_and(), WithOutMask(), stream); } - template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) + template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream) { if (mask.data) - transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_or(), mask, stream); + transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_or(), SingleMaskChannels(mask, num_channels), stream); else transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_or(), WithOutMask(), stream); } - template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) + template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream) { if (mask.data) - transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_xor(), mask, stream); + transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_xor(), SingleMaskChannels(mask, num_channels), stream); else transform((PtrStepSz) src1, (PtrStepSz) src2, (PtrStepSz) dst, bit_xor(), WithOutMask(), stream); } - template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); - template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); - template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); - template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); } ////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 356b50a..354d614 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -1955,7 +1955,7 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre namespace arithm { - template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); } void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& s) @@ -1964,39 +1964,73 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St const int depth = src.depth(); - CV_Assert( depth <= CV_64F ); + CV_Assert( depth < CV_32F ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); dst.create(src.size(), src.type()); cudaStream_t stream = StreamAccessor::getStream(s); - const int bcols = (int) (src.cols * src.elemSize()); - - if ((bcols & 3) == 0) + if (mask.empty()) { - const int vcols = bcols >> 2; + const int bcols = (int) (src.cols * src.elemSize()); + bool aligned = + isAligned(src.data, sizeof(unsigned int)) && + isAligned(dst.data, sizeof(unsigned int)); - bitMatNot( - PtrStepSzb(src.rows, vcols, src.data, src.step), - PtrStepSzb(src.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; + if (aligned && (bcols & 3) == 0) + { + const int vcols = bcols >> 2; - bitMatNot( - PtrStepSzb(src.rows, vcols, src.data, src.step), - PtrStepSzb(src.rows, vcols, dst.data, dst.step), - mask, stream); + bitMatNot( + PtrStepSzb(src.rows, vcols, src.data, src.step), + PtrStepSzb(src.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else if (aligned && (bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + bitMatNot( + PtrStepSzb(src.rows, vcols, src.data, src.step), + PtrStepSzb(src.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else + { + bitMatNot( + PtrStepSzb(src.rows, bcols, src.data, src.step), + PtrStepSzb(src.rows, bcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } } else { - bitMatNot( - PtrStepSzb(src.rows, bcols, src.data, src.step), - PtrStepSzb(src.rows, bcols, dst.data, dst.step), - mask, stream); + const int elem_size = src.elemSize1(); + const int num_channels = src.channels(); + const int bcols = src.cols * num_channels; + + if (elem_size == 1) + { + bitMatNot( + PtrStepSzb(src.rows, bcols, src.data, src.step), + PtrStepSzb(src.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 2) + { + bitMatNot( + PtrStepSzb(src.rows, bcols, src.data, src.step), + PtrStepSzb(src.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 4) + { + bitMatNot( + PtrStepSzb(src.rows, bcols, src.data, src.step), + PtrStepSzb(src.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } } } @@ -2005,9 +2039,9 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St namespace arithm { - template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); + template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, int num_channels, cudaStream_t stream); } void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) @@ -2016,7 +2050,7 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c const int depth = src1.depth(); - CV_Assert( depth <= CV_64F ); + CV_Assert( depth < CV_32F ); CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); @@ -2024,36 +2058,73 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c cudaStream_t stream = StreamAccessor::getStream(s); - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) + if (mask.empty()) { - const int vcols = bcols >> 2; + const int bcols = (int) (src1.cols * src1.elemSize()); + bool aligned = + isAligned(src1.data, sizeof(unsigned int)) && + isAligned(src2.data, sizeof(unsigned int)) && + isAligned(dst.data, sizeof(unsigned int)); - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; + if (aligned && (bcols & 3) == 0) + { + const int vcols = bcols >> 2; - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); + bitMatAnd( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else if (aligned && (bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + bitMatAnd( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else + { + bitMatAnd( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } } else { + const int elem_size = src1.elemSize1(); + const int num_channels = src1.channels(); + const int bcols = src1.cols * num_channels; - bitMatAnd( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); + if (elem_size == 1) + { + bitMatAnd( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 2) + { + bitMatAnd( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 4) + { + bitMatAnd( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } } } @@ -2063,7 +2134,7 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co const int depth = src1.depth(); - CV_Assert( depth <= CV_64F ); + CV_Assert( depth < CV_32F ); CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); @@ -2071,36 +2142,73 @@ void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, co cudaStream_t stream = StreamAccessor::getStream(s); - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) + if (mask.empty()) { - const int vcols = bcols >> 2; + const int bcols = (int) (src1.cols * src1.elemSize()); + bool aligned = + isAligned(src1.data, sizeof(unsigned int)) && + isAligned(src2.data, sizeof(unsigned int)) && + isAligned(dst.data, sizeof(unsigned int)); - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; + if (aligned && (bcols & 3) == 0) + { + const int vcols = bcols >> 2; - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); + bitMatOr( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else if (aligned && (bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + bitMatOr( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else + { + bitMatOr( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } } else { + const int elem_size = src1.elemSize1(); + const int num_channels = src1.channels(); + const int bcols = src1.cols * num_channels; - bitMatOr( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); + if (elem_size == 1) + { + bitMatOr( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 2) + { + bitMatOr( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 4) + { + bitMatOr( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } } } @@ -2110,7 +2218,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c const int depth = src1.depth(); - CV_Assert( depth <= CV_64F ); + CV_Assert( depth < CV_32F ); CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); @@ -2118,36 +2226,73 @@ void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c cudaStream_t stream = StreamAccessor::getStream(s); - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) + if (mask.empty()) { - const int vcols = bcols >> 2; + const int bcols = (int) (src1.cols * src1.elemSize()); + bool aligned = + isAligned(src1.data, sizeof(unsigned int)) && + isAligned(src2.data, sizeof(unsigned int)) && + isAligned(dst.data, sizeof(unsigned int)); - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; + if (aligned && (bcols & 3) == 0) + { + const int vcols = bcols >> 2; - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); + bitMatXor( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else if (aligned && (bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + bitMatXor( + PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + PtrStepSzb(src1.rows, vcols, src2.data, src2.step), + PtrStepSzb(src1.rows, vcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } + else + { + bitMatXor( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + PtrStepb(), 1, stream); + } } else { + const int elem_size = src1.elemSize1(); + const int num_channels = src1.channels(); + const int bcols = src1.cols * num_channels; - bitMatXor( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); + if (elem_size == 1) + { + bitMatXor( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 2) + { + bitMatXor( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } + else if (elem_size == 4) + { + bitMatXor( + PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, num_channels, stream); + } } } diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 7ceeaed..b8b83ef 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -1785,72 +1785,95 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Compare_Scalar, testing::Combine( ////////////////////////////////////////////////////////////////////////////// // Bitwise_Array -PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType) +PARAM_TEST_CASE(Bitwise_Array, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; int type; + bool useRoi; cv::Mat src1; cv::Mat src2; + cv::Mat mask; + virtual void SetUp() { devInfo = GET_PARAM(0); size = GET_PARAM(1); type = GET_PARAM(2); + useRoi = GET_PARAM(3); cv::gpu::setDevice(devInfo.deviceID()); src1 = randomMat(size, type, 0.0, std::numeric_limits::max()); src2 = randomMat(size, type, 0.0, std::numeric_limits::max()); + + mask = randomMat(size, CV_8UC1, 0.0, 2.0); } }; GPU_TEST_P(Bitwise_Array, Not) { - cv::gpu::GpuMat dst; - cv::gpu::bitwise_not(loadMat(src1), dst); + cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::gpu::bitwise_not(loadMat(src1, useRoi), dst_nomask); + cv::gpu::bitwise_not(loadMat(src1, useRoi), dst_mask, loadMat(mask, useRoi)); - cv::Mat dst_gold = ~src1; + cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::bitwise_not(src1, dst_gold_nomask); + cv::bitwise_not(src1, dst_gold_mask, mask); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0); + EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0); } GPU_TEST_P(Bitwise_Array, Or) { - cv::gpu::GpuMat dst; - cv::gpu::bitwise_or(loadMat(src1), loadMat(src2), dst); + cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::gpu::bitwise_or(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask); + cv::gpu::bitwise_or(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi)); - cv::Mat dst_gold = src1 | src2; + cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::bitwise_or(src1, src2, dst_gold_nomask); + cv::bitwise_or(src1, src2, dst_gold_mask, mask); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0); + EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0); } GPU_TEST_P(Bitwise_Array, And) { - cv::gpu::GpuMat dst; - cv::gpu::bitwise_and(loadMat(src1), loadMat(src2), dst); + cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::gpu::bitwise_and(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask); + cv::gpu::bitwise_and(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi)); - cv::Mat dst_gold = src1 & src2; + cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::bitwise_and(src1, src2, dst_gold_nomask); + cv::bitwise_and(src1, src2, dst_gold_mask, mask); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0); + EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0); } GPU_TEST_P(Bitwise_Array, Xor) { - cv::gpu::GpuMat dst; - cv::gpu::bitwise_xor(loadMat(src1), loadMat(src2), dst); + cv::gpu::GpuMat dst_nomask, dst_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::gpu::bitwise_xor(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_nomask); + cv::gpu::bitwise_xor(loadMat(src1, useRoi), loadMat(src2, useRoi), dst_mask, loadMat(mask, useRoi)); - cv::Mat dst_gold = src1 ^ src2; + cv::Mat dst_gold_nomask, dst_gold_mask(src1.size(), src1.type(), cv::Scalar::all(0)); + cv::bitwise_xor(src1, src2, dst_gold_nomask); + cv::bitwise_xor(src1, src2, dst_gold_mask, mask); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold_nomask, dst_nomask, 0.0); + EXPECT_MAT_NEAR(dst_gold_mask, dst_mask, 0.0); } INSTANTIATE_TEST_CASE_P(GPU_Core, Bitwise_Array, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, - TYPES(CV_8U, CV_32S, 1, 4))); + TYPES(CV_8U, CV_32S, 1, 4), + WHOLE_SUBMAT)); ////////////////////////////////////////////////////////////////////////////// // Bitwise_Scalar