}
#endif
- void detect(const PtrStepSzb& roi, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
- const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
- PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
+ template<>
+ void CascadeInvoker<CascadePolicy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
+ PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const int scale) const
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
- dim3 grid(fw, fh / 8, downscales);
+ dim3 grid(fw, fh / 8, (scale == -1) ? downscales : 1);
- const Level* l = (const Level*)levels.ptr();
- const Octave* oct = ((const Octave*)octaves.ptr());
- const float* st = (const float*)stages.ptr();
- const Node* nd = (const Node*)nodes.ptr();
- const float* lf = (const float*)leaves.ptr();
uint* ctr = (uint*)counter.ptr();
Detection* det = (Detection*)objects.ptr();
uint max_det = objects.cols / sizeof(Detection);
cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
- test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
- cudaSafeCall( cudaGetLastError());
-
- grid = dim3(fw, fh / 8, 47 - downscales);
- test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
- cudaSafeCall( cudaGetLastError());
- cudaSafeCall( cudaDeviceSynchronize());
- }
-
- void detectAtScale(const int scale, const PtrStepSzb& roi, const PtrStepSzb& levels, const PtrStepSzb& octaves,
- const PtrStepSzf& stages, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
- PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
- {
- int fw = 160;
- int fh = 120;
-
- dim3 block(32, 8);
- dim3 grid(fw, fh / 8, 1);
-
- const Level* l = (const Level*)levels.ptr();
- const Octave* oct = ((const Octave*)octaves.ptr());
- const float* st = (const float*)stages.ptr();
- const Node* nd = (const Node*)nodes.ptr();
- const float* lf = (const float*)leaves.ptr();
- uint* ctr = (uint*)counter.ptr();
- Detection* det = (Detection*)objects.ptr();
- uint max_det = objects.cols / sizeof(Detection);
-
- cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
- cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
-
- cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
- cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
+ if (scale == -1)
+ {
+ test_kernel_warp<false><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, 0);
+ cudaSafeCall( cudaGetLastError());
- if (scale >= downscales)
- test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
+ grid = dim3(fw, fh / 8, 47 - downscales);
+ test_kernel_warp<true><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, downscales);
+ }
else
- test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
+ {
+ if (scale >= downscales)
+ test_kernel_warp<true><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
+ else
+ test_kernel_warp<false><<<grid, block>>>(levels, octaves, stages, nodes, leaves, det, max_det, ctr, scale);
+ }
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
: x(_x), y(_y), w(_w), h(_h), confidence(c), kind(0) {};
};
+struct CascadePolicy
+{
+ enum {STA_X = 32, STA_Y = 8};
+};
+
+template<typename Policy>
+struct CascadeInvoker
+{
+ CascadeInvoker(): levels(0), octaves(0), stages(0), nodes(0), leaves(0) {}
+ CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzb& _octaves, const PtrStepSzf& _stages,
+ const PtrStepSzb& _nodes, const PtrStepSzf& _leaves)
+ : levels((const Level*)_levels.ptr()), octaves((const Octave*)_octaves.ptr()), stages((const float*)_stages.ptr()),
+ nodes((const Node*)_nodes.ptr()), leaves((const float*)_leaves.ptr())
+ {}
+
+ const Level* levels;
+ const Octave* octaves;
+
+ const float* stages;
+
+ const Node* nodes;
+ const float* leaves;
+
+ void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
+ PtrStepSzi counter, const int downscales, const int csale = -1) const;
+};
+
}
}}}
namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins);
-
- void detect(const PtrStepSzb& rois,
- const PtrStepSzb& levels,
- const PtrStepSzb& octaves,
- const PtrStepSzf& stages,
- const PtrStepSzb& nodes,
- const PtrStepSzf& leaves,
- const PtrStepSzi& hogluv,
- PtrStepSz<uchar4> objects,
- PtrStepSzi counter,
- const int downscales);
-
- void detectAtScale(const int scale,
- const PtrStepSzb& rois,
- const PtrStepSzb& levels,
- const PtrStepSzb& octaves,
- const PtrStepSzf& stages,
- const PtrStepSzb& nodes,
- const PtrStepSzf& leaves,
- const PtrStepSzi& hogluv,
- PtrStepSz<uchar4> objects,
- PtrStepSzi counter,
- const int downscales);
}
namespace imgproc
{
std::vector<float> scales;
+ device::icf::CascadeInvoker<device::icf::CascadePolicy> invoker;
+
static const int shrinkage = 4;
enum { BOOST = 0 };
};
bool fill(const FileNode &root, const float mins, const float maxs);
- void detect(const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const
- {
- cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
- device::icf::detect(roi, levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales);
- }
-
- void detectAtScale(int scale, const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const
+ void detect(int scale, const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, cudaStream_t stream) const
{
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
- device::icf::detectAtScale(scale, roi, levels, octaves, stages, nodes, leaves, hogluv, objects,
- detCounter, downscales);
+ // device::icf::CascadeInvoker<device::icf::CascadePolicy> invoker(levels, octaves, stages, nodes, leaves);
+ invoker(roi, hogluv, objects, detCounter, downscales, scale);
}
void preprocess(const cv::gpu::GpuMat& colored)
calcLevels(voctaves, FRAME_WIDTH, FRAME_HEIGHT, TOTAL_SCALES);
CV_Assert(!levels.empty());
+ invoker = device::icf::CascadeInvoker<device::icf::CascadePolicy>(levels, octaves, stages, nodes, leaves);
+
return true;
}
flds.preprocess(colored);
- if (specificScale == -1)
- flds.detect(rois,objects, 0);
- else
- flds.detectAtScale(specificScale, rois, objects, 0);
+ flds.detect(specificScale, rois, objects, 0);
cv::Mat out(flds.detCounter);
int ndetections = *(out.data);