From 5d15e4ea58f8aa591d9be9d64ae4d22936fe0b88 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Fri, 21 Sep 2012 16:12:18 +0400 Subject: [PATCH] CUDA kernels interface --- modules/gpu/src/cuda/isf-sc.cu | 83 +++++++++++++++++++++++++++++++++++++++-- modules/gpu/src/icf.hpp | 27 +++++++++++--- modules/gpu/src/softcascade.cpp | 14 ++++--- 3 files changed, 109 insertions(+), 15 deletions(-) diff --git a/modules/gpu/src/cuda/isf-sc.cu b/modules/gpu/src/cuda/isf-sc.cu index a6418c1..89a74ee 100644 --- a/modules/gpu/src/cuda/isf-sc.cu +++ b/modules/gpu/src/cuda/isf-sc.cu @@ -42,13 +42,90 @@ #include -void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv) const +namespace cv { namespace gpu { + + + namespace device { + +__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog, + const int rgbPitch, const int hogPitch) +{ +} + +__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog, + const int pitch) +{ +} + +__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank, + const int inPitch, const int outPitch ) +{ +} + +__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum, + const int inPitch, const int outPitch) +{ + +} + +__global__ void intCol(ushort* __restrict__ sum, const int pitch) +{ + +} + + +__global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restrict__ hogluv, const int pitch) +{ + cascade.detectAt(); +} + +} + +void __device icf::Cascade::detectAt() const +{ + +} + +void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const { // detection kernel + } -void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz& image) +void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz& rgb, cudaStream_t stream) { // color convertin kernel + dim3 block(32, 8); + dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8); + + uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS); + device::rgb2grayluv<<>>((uchar3*)rgb.ptr(), channels, rgb.step, dmem.step); + cudaSafeCall( cudaGetLastError()); + // hog calculation kernel -} \ No newline at end of file + channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS); + device::gray2hog<<>>(channels, (uchar*)dmem.ptr(), dmem.step); + cudaSafeCall( cudaGetLastError() ); + + const int shrWidth = FRAME_WIDTH / shrinkage; + const int shrHeight = FRAME_HEIGHT / shrinkage; + + // decimate kernel + grid = dim3(shrWidth / 32, shrHeight / 8); + device::decimate<<>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step); + cudaSafeCall( cudaGetLastError() ); + + // integrate rows + block = dim3(shrWidth, 1); + grid = dim3(shrHeight * HOG_LUV_BINS, 1); + device::intRow<<>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), shrunk.step, hogluv.step); + cudaSafeCall( cudaGetLastError() ); + + // integrate cols + block = dim3(128, 1); + grid = dim3(shrWidth * HOG_LUV_BINS, 1); + device::intCol<<>>((ushort*)hogluv.ptr(), hogluv.step); + cudaSafeCall( cudaGetLastError() ); +} + +}} \ No newline at end of file diff --git a/modules/gpu/src/icf.hpp b/modules/gpu/src/icf.hpp index 7183fc0..8b075be 100644 --- a/modules/gpu/src/icf.hpp +++ b/modules/gpu/src/icf.hpp @@ -46,17 +46,19 @@ #define __OPENCV_ICF_HPP__ #if defined __CUDACC__ -# define __hd__ __host__ __device__ __forceinline__ +# define __device __device__ __forceinline__ #else -# define __hd__ +# define __device #endif -namespace icf { +namespace cv { namespace gpu { namespace icf { using cv::gpu::PtrStepSzb; using cv::gpu::PtrStepSzf; +typedef unsigned char uchar; + struct Cascade { Cascade() {} @@ -64,7 +66,8 @@ struct Cascade const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls) : octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {} - void detect(const cv::gpu::PtrStepSzb& hogluv) const; + void detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const; + void __device detectAt() const; PtrStepSzb octaves; PtrStepSzf stages; @@ -83,12 +86,24 @@ struct ChannelStorage const cv::gpu::PtrStepSzb& itg, const int s) : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {} - void frame(const cv::gpu::PtrStepSz& image); + void frame(const cv::gpu::PtrStepSz& rgb, cudaStream_t stream); PtrStepSzb dmem; PtrStepSzb shrunk; PtrStepSzb hogluv; + enum + { + FRAME_WIDTH = 640, + FRAME_HEIGHT = 480, + TOTAL_SCALES = 55, + CLASSIFIERS = 5, + ORIG_OBJECT_WIDTH = 64, + ORIG_OBJECT_HEIGHT = 128, + HOG_BINS = 6, + HOG_LUV_BINS = 10 + }; + int shrinkage; }; @@ -143,6 +158,6 @@ struct __align__(8) Level //is actually 24 bytes objSize.y = round(oct.size.y * relScale); } }; -} +}}} #endif \ No newline at end of file diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp index fb36efd..b2419c1 100644 --- a/modules/gpu/src/softcascade.cpp +++ b/modules/gpu/src/softcascade.cpp @@ -100,9 +100,9 @@ struct cv::gpu::SoftCascade::Filds }; bool fill(const FileNode &root, const float mins, const float maxs); - void detect() const + void detect(cudaStream_t stream) const { - cascade.detect(hogluv); + cascade.detect(hogluv, stream); } private: @@ -394,18 +394,20 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c } void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& image, const GpuMat& /*rois*/, - GpuMat& /*objects*/, const int /*rejectfactor*/, Stream /*stream*/) + GpuMat& /*objects*/, const int /*rejectfactor*/, Stream s) { // only color images are supperted - CV_Assert(image.type() == CV_8UC4); + CV_Assert(image.type() == CV_8UC3); // only this window size allowed CV_Assert(image.cols == 640 && image.rows == 480); Filds& flds = *filds; - flds.storage.frame(image); - flds.detect(); + cudaStream_t stream = StreamAccessor::getStream(s); + + flds.storage.frame(image, stream); + flds.detect(stream); } #endif \ No newline at end of file -- 2.7.4