#include <icf.hpp>
-void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv) const
+namespace cv { namespace gpu {
+
+
+ namespace device {
+
+__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog,
+ const int rgbPitch, const int hogPitch)
+{
+}
+
+__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog,
+ const int pitch)
+{
+}
+
+__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank,
+ const int inPitch, const int outPitch )
+{
+}
+
+__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum,
+ const int inPitch, const int outPitch)
+{
+
+}
+
+__global__ void intCol(ushort* __restrict__ sum, const int pitch)
+{
+
+}
+
+
+__global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restrict__ hogluv, const int pitch)
+{
+ cascade.detectAt();
+}
+
+}
+
+void __device icf::Cascade::detectAt() const
+{
+
+}
+
+void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const
{
// detection kernel
+
}
-void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar4>& image)
+void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream)
{
// color convertin kernel
+ dim3 block(32, 8);
+ dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8);
+
+ uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS);
+ device::rgb2grayluv<<<grid, block, 0, stream>>>((uchar3*)rgb.ptr(), channels, rgb.step, dmem.step);
+ cudaSafeCall( cudaGetLastError());
+
// hog calculation kernel
-}
\ No newline at end of file
+ channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS);
+ device::gray2hog<<<grid, block, 0, stream>>>(channels, (uchar*)dmem.ptr(), dmem.step);
+ cudaSafeCall( cudaGetLastError() );
+
+ const int shrWidth = FRAME_WIDTH / shrinkage;
+ const int shrHeight = FRAME_HEIGHT / shrinkage;
+
+ // decimate kernel
+ grid = dim3(shrWidth / 32, shrHeight / 8);
+ device::decimate<<<grid, block, 0, stream>>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step);
+ cudaSafeCall( cudaGetLastError() );
+
+ // integrate rows
+ block = dim3(shrWidth, 1);
+ grid = dim3(shrHeight * HOG_LUV_BINS, 1);
+ device::intRow<<<grid, block, 0, stream>>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), shrunk.step, hogluv.step);
+ cudaSafeCall( cudaGetLastError() );
+
+ // integrate cols
+ block = dim3(128, 1);
+ grid = dim3(shrWidth * HOG_LUV_BINS, 1);
+ device::intCol<<<grid, block, 0, stream>>>((ushort*)hogluv.ptr(), hogluv.step);
+ cudaSafeCall( cudaGetLastError() );
+}
+
+}}
\ No newline at end of file
#define __OPENCV_ICF_HPP__
#if defined __CUDACC__
-# define __hd__ __host__ __device__ __forceinline__
+# define __device __device__ __forceinline__
#else
-# define __hd__
+# define __device
#endif
-namespace icf {
+namespace cv { namespace gpu { namespace icf {
using cv::gpu::PtrStepSzb;
using cv::gpu::PtrStepSzf;
+typedef unsigned char uchar;
+
struct Cascade
{
Cascade() {}
const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls)
: octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {}
- void detect(const cv::gpu::PtrStepSzb& hogluv) const;
+ void detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const;
+ void __device detectAt() const;
PtrStepSzb octaves;
PtrStepSzf stages;
const cv::gpu::PtrStepSzb& itg, const int s)
: dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {}
- void frame(const cv::gpu::PtrStepSz<uchar4>& image);
+ void frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream);
PtrStepSzb dmem;
PtrStepSzb shrunk;
PtrStepSzb hogluv;
+ enum
+ {
+ FRAME_WIDTH = 640,
+ FRAME_HEIGHT = 480,
+ TOTAL_SCALES = 55,
+ CLASSIFIERS = 5,
+ ORIG_OBJECT_WIDTH = 64,
+ ORIG_OBJECT_HEIGHT = 128,
+ HOG_BINS = 6,
+ HOG_LUV_BINS = 10
+ };
+
int shrinkage;
};
objSize.y = round(oct.size.y * relScale);
}
};
-}
+}}}
#endif
\ No newline at end of file
};
bool fill(const FileNode &root, const float mins, const float maxs);
- void detect() const
+ void detect(cudaStream_t stream) const
{
- cascade.detect(hogluv);
+ cascade.detect(hogluv, stream);
}
private:
}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& image, const GpuMat& /*rois*/,
- GpuMat& /*objects*/, const int /*rejectfactor*/, Stream /*stream*/)
+ GpuMat& /*objects*/, const int /*rejectfactor*/, Stream s)
{
// only color images are supperted
- CV_Assert(image.type() == CV_8UC4);
+ CV_Assert(image.type() == CV_8UC3);
// only this window size allowed
CV_Assert(image.cols == 640 && image.rows == 480);
Filds& flds = *filds;
- flds.storage.frame(image);
- flds.detect();
+ cudaStream_t stream = StreamAccessor::getStream(s);
+
+ flds.storage.frame(image, stream);
+ flds.detect(stream);
}
#endif
\ No newline at end of file