From 0898c3c651ef6196a2287d61ef9d6644b5653743 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Thu, 11 Oct 2012 18:24:48 +0400 Subject: [PATCH] kernel policy --- modules/gpu/src/cuda/isf-sc.cu | 62 +++++++++++------------------------------ modules/gpu/src/icf.hpp | 27 ++++++++++++++++++ modules/gpu/src/softcascade.cpp | 44 ++++++----------------------- 3 files changed, 52 insertions(+), 81 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index f74673c..74e47ba 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -303,21 +303,16 @@ namespace icf { } #endif - void detect(const PtrStepSzb& roi, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, - const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, - PtrStepSz objects, PtrStepSzi counter, const int downscales) + template<> + void CascadeInvoker::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, + PtrStepSz objects, PtrStepSzi counter, const int downscales, const int scale) const { int fw = 160; int fh = 120; dim3 block(32, 8); - dim3 grid(fw, fh / 8, downscales); + dim3 grid(fw, fh / 8, (scale == -1) ? downscales : 1); - const Level* l = (const Level*)levels.ptr(); - const Octave* oct = ((const Octave*)octaves.ptr()); - const float* st = (const float*)stages.ptr(); - const Node* nd = (const Node*)nodes.ptr(); - const float* lf = (const float*)leaves.ptr(); uint* ctr = (uint*)counter.ptr(); Detection* det = (Detection*)objects.ptr(); uint max_det = objects.cols / sizeof(Detection); @@ -328,44 +323,21 @@ namespace icf { cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc(); cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step)); - test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, 0); - cudaSafeCall( cudaGetLastError()); - - grid = dim3(fw, fh / 8, 47 - downscales); - test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, downscales); - cudaSafeCall( cudaGetLastError()); - cudaSafeCall( cudaDeviceSynchronize()); - } - - void detectAtScale(const int scale, const PtrStepSzb& roi, const PtrStepSzb& levels, const PtrStepSzb& octaves, - const PtrStepSzf& stages, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, - PtrStepSz objects, PtrStepSzi counter, const int downscales) - { - int fw = 160; - int fh = 120; - - dim3 block(32, 8); - dim3 grid(fw, fh / 8, 1); - - const Level* l = (const Level*)levels.ptr(); - const Octave* oct = ((const Octave*)octaves.ptr()); - const float* st = (const float*)stages.ptr(); - const Node* nd = (const Node*)nodes.ptr(); - const float* lf = (const float*)leaves.ptr(); - uint* ctr = (uint*)counter.ptr(); - Detection* det = (Detection*)objects.ptr(); - uint max_det = objects.cols / sizeof(Detection); - - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); - - cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc(); - cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step)); + if (scale == -1) + { + test_kernel_warp<<>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, 0); + cudaSafeCall( cudaGetLastError()); - if (scale >= downscales) - test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, scale); + grid = dim3(fw, fh / 8, 47 - downscales); + test_kernel_warp<<>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, downscales); + } else - test_kernel_warp<<>>(l, oct, st, nd, lf, det, max_det, ctr, scale); + { + if (scale >= downscales) + test_kernel_warp<<>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale); + else + test_kernel_warp<<>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale); + } cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaDeviceSynchronize()); diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index a103341..06c8114 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -124,6 +124,33 @@ struct __align__(16) Detection : x(_x), y(_y), w(_w), h(_h), confidence(c), kind(0) {}; }; +struct CascadePolicy +{ + enum {STA_X = 32, STA_Y = 8}; +}; + +template +struct CascadeInvoker +{ + CascadeInvoker(): levels(0), octaves(0), stages(0), nodes(0), leaves(0) {} + CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzb& _octaves, const PtrStepSzf& _stages, + const PtrStepSzb& _nodes, const PtrStepSzf& _leaves) + : levels((const Level*)_levels.ptr()), octaves((const Octave*)_octaves.ptr()), stages((const float*)_stages.ptr()), + nodes((const Node*)_nodes.ptr()), leaves((const float*)_leaves.ptr()) + {} + + const Level* levels; + const Octave* octaves; + + const float* stages; + + const Node* nodes; + const float* leaves; + + void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz objects, + PtrStepSzi counter, const int downscales, const int csale = -1) const; +}; + } }}} diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index c93949f..f25c5a3 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -69,29 +69,6 @@ namespace cv { namespace gpu { namespace device { namespace icf { void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, const int fw, const int fh, const int bins); - - void detect(const PtrStepSzb& rois, - const PtrStepSzb& levels, - const PtrStepSzb& octaves, - const PtrStepSzf& stages, - const PtrStepSzb& nodes, - const PtrStepSzf& leaves, - const PtrStepSzi& hogluv, - PtrStepSz objects, - PtrStepSzi counter, - const int downscales); - - void detectAtScale(const int scale, - const PtrStepSzb& rois, - const PtrStepSzb& levels, - const PtrStepSzb& octaves, - const PtrStepSzf& stages, - const PtrStepSzb& nodes, - const PtrStepSzf& leaves, - const PtrStepSzi& hogluv, - PtrStepSz objects, - PtrStepSzi counter, - const int downscales); } namespace imgproc { @@ -150,6 +127,8 @@ struct cv::gpu::SoftCascade::Filds std::vector scales; + device::icf::CascadeInvoker invoker; + static const int shrinkage = 4; enum { BOOST = 0 }; @@ -166,17 +145,11 @@ struct cv::gpu::SoftCascade::Filds }; bool fill(const FileNode &root, const float mins, const float maxs); - void detect(const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const - { - cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); - device::icf::detect(roi, levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales); - } - - void detectAtScale(int scale, const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const + void detect(int scale, const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const { cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); - device::icf::detectAtScale(scale, roi, levels, octaves, stages, nodes, leaves, hogluv, objects, - detCounter, downscales); + // device::icf::CascadeInvoker invoker(levels, octaves, stages, nodes, leaves); + invoker(roi, hogluv, objects, detCounter, downscales, scale); } void preprocess(const cv::gpu::GpuMat& colored) @@ -439,6 +412,8 @@ bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, c calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES); CV_Assert(!levels.empty()); + invoker = device::icf::CascadeInvoker(levels, octaves, stages, nodes, leaves); + return true; } @@ -569,10 +544,7 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& flds.preprocess(colored); - if (specificScale == -1) - flds.detect(rois,objects, 0); - else - flds.detectAtScale(specificScale, rois, objects, 0); + flds.detect(specificScale, rois, objects, 0); cv::Mat out(flds.detCounter); int ndetections = *(out.data); -- 2.7.4