From 1feb5b4d024134ac50f948367e3b65b74f56e946 Mon Sep 17 00:00:00 2001 From: Andrey Morozov Date: Wed, 8 Sep 2010 15:13:23 +0000 Subject: [PATCH] added async version of postfilter_textureness and prefilter_xsobel, modified async test and added test for async version of stereobm --- modules/gpu/src/cuda/stereobm.cu | 27 +++++++--- modules/gpu/src/stereobm_gpu.cpp | 10 ++-- tests/gpu/src/operator_async_call.cpp | 6 +-- tests/gpu/src/stereo_bm_async.cpp | 97 +++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 14 deletions(-) create mode 100644 tests/gpu/src/stereo_bm_async.cpp diff --git a/modules/gpu/src/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu index 73a70b8..5685c54 100644 --- a/modules/gpu/src/cuda/stereobm.cu +++ b/modules/gpu/src/cuda/stereobm.cu @@ -410,7 +410,7 @@ extern "C" __global__ void prefilter_kernel(unsigned char *output, size_t step, namespace cv { namespace gpu { namespace bm { - extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap) + extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap, const cudaStream_t & stream) { cudaChannelFormatDesc desc = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D( 0, stereobm_gpu::texForSobel, input.ptr, desc, input.cols, input.rows, input.step ) ); @@ -421,10 +421,18 @@ namespace cv { namespace gpu { namespace bm grid.x = divUp(input.cols, threads.x); grid.y = divUp(input.rows, threads.y); - stereobm_gpu::prefilter_kernel<<>>(output.ptr, output.step, output.cols, output.rows, prefilterCap); - cudaSafeCall( cudaThreadSynchronize() ); + if (stream == 0) + { + stereobm_gpu::prefilter_kernel<<>>(output.ptr, output.step, output.cols, output.rows, prefilterCap); + cudaSafeCall( cudaThreadSynchronize() ); + } + else + { + stereobm_gpu::prefilter_kernel<<>>(output.ptr, output.step, output.cols, output.rows, prefilterCap); + } cudaSafeCall( cudaUnbindTexture (stereobm_gpu::texForSobel ) ); + } }}} @@ -532,7 +540,7 @@ extern "C" __global__ void textureness_kernel(unsigned char *disp, size_t disp_s namespace cv { namespace gpu { namespace bm { - extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp) + extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, const cudaStream_t & stream) { avgTexturenessThreshold *= winsz * winsz; @@ -551,8 +559,15 @@ namespace cv { namespace gpu { namespace bm size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float); - stereobm_gpu::textureness_kernel<<>>(disp.ptr, disp.step, winsz, avgTexturenessThreshold, disp.cols, disp.rows); - cudaSafeCall( cudaThreadSynchronize() ); + if (stream == 0) + { + stereobm_gpu::textureness_kernel<<>>(disp.ptr, disp.step, winsz, avgTexturenessThreshold, disp.cols, disp.rows); + cudaSafeCall( cudaThreadSynchronize() ); + } + else + { + stereobm_gpu::textureness_kernel<<>>(disp.ptr, disp.step, winsz, avgTexturenessThreshold, disp.cols, disp.rows); + } cudaSafeCall( cudaUnbindTexture (stereobm_gpu::texForTF) ); } diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp index 1c5716f..706e4ef 100644 --- a/modules/gpu/src/stereobm_gpu.cpp +++ b/modules/gpu/src/stereobm_gpu.cpp @@ -62,8 +62,8 @@ namespace cv { namespace gpu { //extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_& minSSD_buf); extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_& minSSD_buf, const cudaStream_t & stream); - extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31); - extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp); + extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap /*= 31*/, const cudaStream_t & stream); + extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avgTexturenessThreshold, const DevMem2D& disp, const cudaStream_t & stream); } }} @@ -115,8 +115,8 @@ static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& ri leBuf.create( left.size(), left.type()); riBuf.create(right.size(), right.type()); - bm::prefilter_xsobel( left, leBuf); - bm::prefilter_xsobel(right, riBuf); + bm::prefilter_xsobel( left, leBuf, 31, stream); + bm::prefilter_xsobel(right, riBuf, 31, stream); le_for_bm = leBuf; ri_for_bm = riBuf; @@ -125,7 +125,7 @@ static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& ri bm::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream); if (avergeTexThreshold) - bm::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity); + bm::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream); } diff --git a/tests/gpu/src/operator_async_call.cpp b/tests/gpu/src/operator_async_call.cpp index 6033e67..1ecddc6 100644 --- a/tests/gpu/src/operator_async_call.cpp +++ b/tests/gpu/src/operator_async_call.cpp @@ -99,9 +99,9 @@ bool CV_GpuMatAsyncCallTest::compare_matrix(cv::Mat & cpumat) //int64 time = getTickCount(); Stream stream; - stream.enqueueCopy(gmat0, gmat1); - stream.enqueueCopy(gmat0, gmat2); - stream.enqueueCopy(gmat0, gmat3); + stream.enqueueMemSet(gmat0, cv::Scalar::all(1), gmat1); + stream.enqueueMemSet(gmat0, cv::Scalar::all(1), gmat2); + stream.enqueueMemSet(gmat0, cv::Scalar::all(1), gmat3); stream.waitForCompletion(); //int64 time1 = getTickCount(); diff --git a/tests/gpu/src/stereo_bm_async.cpp b/tests/gpu/src/stereo_bm_async.cpp new file mode 100644 index 0000000..fdbe951 --- /dev/null +++ b/tests/gpu/src/stereo_bm_async.cpp @@ -0,0 +1,97 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "gputest.hpp" +#include "highgui.h" +#include "cv.h" + + +using namespace cv; +using namespace std; + +class CV_GpuMatAsyncCallStereoBMTest : public CvTest +{ + public: + CV_GpuMatAsyncCallStereoBMTest(); + ~CV_GpuMatAsyncCallStereoBMTest(); + + protected: + void run(int); +}; + +CV_GpuMatAsyncCallStereoBMTest::CV_GpuMatAsyncCallStereoBMTest(): CvTest( "GPU-MatAsyncCallStereoBM", "asyncStereoBM" ) +{} + +CV_GpuMatAsyncCallStereoBMTest::~CV_GpuMatAsyncCallStereoBMTest() {} + +void CV_GpuMatAsyncCallStereoBMTest::run( int /* start_from */) +{ + cv::Mat img_l = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-L.png", 0); + cv::Mat img_r = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-R.png", 0); + cv::Mat img_reference = cv::imread(std::string(ts->get_data_path()) + "stereobm/aloe-disp.png", 0); + + if (img_l.empty() || img_r.empty() || img_reference.empty()) + { + ts->set_failed_test_info(CvTS::FAIL_MISSING_TEST_DATA); + return; + } + + cv::gpu::GpuMat disp; + cv::gpu::StereoBM_GPU bm(0, 128, 19); + + cv::gpu::Stream stream; + + for (size_t i = 0; i < 50; i++) + { + bm(cv::gpu::GpuMat(img_l), cv::gpu::GpuMat(img_r), disp, stream); + } + + stream.waitForCompletion(); + disp.convertTo(disp, img_reference.type()); + double norm = cv::norm(disp, img_reference, cv::NORM_INF); + + if (norm >= 100) + ts->printf(CvTS::CONSOLE, "\nStereoBM norm = %f\n", norm); + ts->set_failed_test_info((norm < 100) ? CvTS::OK : CvTS::FAIL_GENERIC); + +} + +CV_GpuMatAsyncCallStereoBMTest CV_GpuMatAsyncCallStereoBMTest_test; -- 2.7.4