From: Vladislav Vinogradov Date: Tue, 30 Dec 2014 12:36:58 +0000 (+0300) Subject: removed buffered versions of histogram functions X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~2746^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=220d937d9a27951a3d66e2c8daaa1399b12d83fc;p=platform%2Fupstream%2Fopencv.git removed buffered versions of histogram functions used BufferPool mechanism instead --- diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp index a97388b..9fff4ee 100644 --- a/modules/core/include/opencv2/core/private.cuda.hpp +++ b/modules/core/include/opencv2/core/private.cuda.hpp @@ -130,6 +130,12 @@ namespace cv { namespace cuda class NppStreamHandler { public: + inline explicit NppStreamHandler(Stream& newStream) + { + oldStream = nppGetStream(); + nppSetStream(StreamAccessor::getStream(newStream)); + } + inline explicit NppStreamHandler(cudaStream_t newStream) { oldStream = nppGetStream(); diff --git a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp index 1ec288f..7aa74aa 100644 --- a/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp +++ b/modules/cudaimgproc/include/opencv2/cudaimgproc.hpp @@ -205,19 +205,11 @@ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stre @param src Source image with CV_8UC1 type. @param dst Destination image. -@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). @param stream Stream for the asynchronous version. @sa equalizeHist */ -CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null()); - -/** @overload */ -static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) -{ - GpuMat buf; - cuda::equalizeHist(src, dst, buf, stream); -} +CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); /** @brief Base class for Contrast Limited Adaptive Histogram Equalization. : */ @@ -259,27 +251,11 @@ a four-channel image, all channels are processed separately. @param histSize Size of the histogram. @param lowerLevel Lower boundary of lowest-level bin. @param upperLevel Upper boundary of highest-level bin. -@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). @param stream Stream for the asynchronous version. */ -CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); - +CV_EXPORTS void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()); /** @overload */ -static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null()) -{ - GpuMat buf; - cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); -} - -/** @overload */ -CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); - -/** @overload */ -static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()) -{ - GpuMat buf; - cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream); -} +CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null()); /** @brief Calculates a histogram with bins determined by the levels array. @@ -287,27 +263,11 @@ static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int For a four-channel image, all channels are processed separately. @param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type. @param levels Number of levels in the histogram. -@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes). @param stream Stream for the asynchronous version. */ -CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null()); - -/** @overload */ -static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null()) -{ - GpuMat buf; - cuda::histRange(src, hist, levels, buf, stream); -} - -/** @overload */ -CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null()); - +CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null()); /** @overload */ -static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()) -{ - GpuMat buf; - cuda::histRange(src, hist, levels, buf, stream); -} +CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null()); //! @} cudaimgproc_hist diff --git a/modules/cudaimgproc/perf/perf_histogram.cpp b/modules/cudaimgproc/perf/perf_histogram.cpp index 0e02039..c638ce0 100644 --- a/modules/cudaimgproc/perf/perf_histogram.cpp +++ b/modules/cudaimgproc/perf/perf_histogram.cpp @@ -63,9 +63,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC1, { const cv::cuda::GpuMat d_src(src); cv::cuda::GpuMat dst; - cv::cuda::GpuMat d_buf; - TEST_CYCLE() cv::cuda::histEven(d_src, dst, d_buf, 30, 0, 180); + TEST_CYCLE() cv::cuda::histEven(d_src, dst, 30, 0, 180); CUDA_SANITY_CHECK(dst); } @@ -106,9 +105,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC4, { const cv::cuda::GpuMat d_src(src); cv::cuda::GpuMat d_hist[4]; - cv::cuda::GpuMat d_buf; - TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel); + TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, histSize, lowerLevel, upperLevel); cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3; d_hist[0].download(cpu_hist0); @@ -167,9 +165,8 @@ PERF_TEST_P(Sz, EqualizeHist, { const cv::cuda::GpuMat d_src(src); cv::cuda::GpuMat dst; - cv::cuda::GpuMat d_buf; - TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst, d_buf); + TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst); CUDA_SANITY_CHECK(dst); } diff --git a/modules/cudaimgproc/src/histogram.cpp b/modules/cudaimgproc/src/histogram.cpp index d63e57d..a965242 100644 --- a/modules/cudaimgproc/src/histogram.cpp +++ b/modules/cudaimgproc/src/histogram.cpp @@ -49,7 +49,7 @@ using namespace cv::cuda; void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); } +void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); } cv::Ptr cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr(); } @@ -93,7 +93,7 @@ namespace hist void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream); } -void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream) +void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream) { GpuMat src = _src.getGpuMat(); @@ -107,8 +107,8 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray size_t bufSize = intBufSize + 2 * 256 * sizeof(int); - ensureSizeIsEnough(1, static_cast(bufSize), CV_8UC1, _buf); - GpuMat buf = _buf.getGpuMat(); + BufferPool pool(_stream); + GpuMat buf = pool.getBuffer(1, static_cast(bufSize), CV_8UC1); GpuMat hist(1, 256, CV_32SC1, buf.data); GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int)); @@ -288,7 +288,7 @@ namespace { typedef typename NppHistogramEvenFuncC1::src_t src_t; - static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream) + static void hist(const GpuMat& src, OutputArray _hist, int histSize, int lowerLevel, int upperLevel, Stream& stream) { const int levels = histSize + 1; @@ -302,15 +302,15 @@ namespace int buf_size; get_buf_size(sz, levels, &buf_size); - ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf); - GpuMat buf = _buf.getGpuMat(); + BufferPool pool(stream); + GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1); NppStreamHandler h(stream); nppSafeCall( func(src.ptr(), static_cast(src.step), sz, hist.ptr(), levels, lowerLevel, upperLevel, buf.ptr()) ); - if (stream == 0) + if (!stream) cudaSafeCall( cudaDeviceSynchronize() ); } }; @@ -319,7 +319,7 @@ namespace { typedef typename NppHistogramEvenFuncC4::src_t src_t; - static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream) + static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) { int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1}; hist[0].create(1, histSize[0], CV_32S); @@ -336,14 +336,14 @@ namespace int buf_size; get_buf_size(sz, levels, &buf_size); - ensureSizeIsEnough(1, buf_size, CV_8U, _buf); - GpuMat buf = _buf.getGpuMat(); + BufferPool pool(stream); + GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1); NppStreamHandler h(stream); nppSafeCall( func(src.ptr(), static_cast(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr()) ); - if (stream == 0) + if (!stream) cudaSafeCall( cudaDeviceSynchronize() ); } }; @@ -392,7 +392,7 @@ namespace typedef typename NppHistogramRangeFuncC1::level_t level_t; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; - static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream) + static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, Stream& stream) { CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 ); @@ -406,8 +406,8 @@ namespace int buf_size; get_buf_size(sz, levels.cols, &buf_size); - ensureSizeIsEnough(1, buf_size, CV_8U, _buf); - GpuMat buf = _buf.getGpuMat(); + BufferPool pool(stream); + GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1); NppStreamHandler h(stream); @@ -424,7 +424,7 @@ namespace typedef typename NppHistogramRangeFuncC1::level_t level_t; enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1::LEVEL_TYPE_CODE}; - static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream) + static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream) { CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 ); CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 ); @@ -447,8 +447,8 @@ namespace int buf_size; get_buf_size(sz, nLevels, &buf_size); - ensureSizeIsEnough(1, buf_size, CV_8U, _buf); - GpuMat buf = _buf.getGpuMat(); + BufferPool pool(stream); + GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1); NppStreamHandler h(stream); @@ -493,9 +493,9 @@ namespace } } -void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream) +void cv::cuda::histEven(InputArray _src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream) { - typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream); + typedef void (*hist_t)(const GpuMat& src, OutputArray hist, int levels, int lowerLevel, int upperLevel, Stream& stream); static const hist_t hist_callers[] = { NppHistogramEvenC1::hist, @@ -514,12 +514,12 @@ void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 ); - hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); + hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream); } -void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) +void cv::cuda::histEven(InputArray _src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream) { - typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream); + typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], Stream& stream); static const hist_t hist_callers[] = { NppHistogramEvenC4::hist, @@ -532,12 +532,12 @@ void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, i CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 ); - hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream)); + hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream); } -void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream) +void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, Stream& stream) { - typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream); + typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, Stream& stream); static const hist_t hist_callers[] = { NppHistogramRangeC1::hist, @@ -553,12 +553,12 @@ void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 ); - hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); + hist_callers[src.depth()](src, hist, levels, stream); } -void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream) +void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], Stream& stream) { - typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream); + typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream); static const hist_t hist_callers[] = { NppHistogramRangeC4::hist, @@ -573,7 +573,7 @@ void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4] CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 ); - hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream)); + hist_callers[src.depth()](src, hist, levels, stream); } #endif /* !defined (HAVE_CUDA) */ diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index af3f874..2e7faa3 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -1053,12 +1053,11 @@ TEST(equalizeHist) cuda::GpuMat d_src(src); cuda::GpuMat d_dst; - cuda::GpuMat d_buf; - cuda::equalizeHist(d_src, d_dst, d_buf); + cuda::equalizeHist(d_src, d_dst); CUDA_ON; - cuda::equalizeHist(d_src, d_dst, d_buf); + cuda::equalizeHist(d_src, d_dst); CUDA_OFF; } }