return (w < 0 || h < 0)? 0.f : (float)(w * h);
}
- __global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps)
+ texture<uint4, cudaTextureType2D, cudaReadModeElementType> tdetections;
+
+ __global__ void overlap(const uint* n, uchar* overlaps)
{
const int idx = threadIdx.x;
const int total = *n;
- for (int i = idx; i < total; i += 192)
+ for (int i = idx + 1; i < total; i += 192)
{
- const Detection& a = detections[i];
+ const uint4 _a = tex2D(tdetections, i, 0);
+ const Detection& a = *((Detection*)(&_a));
bool excluded = false;
for (int j = i + 1; j < total; ++j)
{
- const Detection& b = detections[j];
+ const uint4 _b = tex2D(tdetections, j, 0);
+ const Detection& b = *((Detection*)(&_b));
float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
if (ovl > 0.65f)
}
}
- __global__ void collect(const uint* n, const Detection* detections, uchar* overlaps)
+ __global__ void collect(const uint* n, uchar* overlaps, uint* ctr, uint4* suppressed)
{
const int idx = threadIdx.x;
const int total = *n;
{
if (!overlaps[i])
{
- const Detection& det = detections[i];
- // printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence );
+ int oidx = atomicInc(ctr, 50);
+ suppressed[oidx] = tex2D(tdetections, i + 1, 0);
}
}
}
- void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections)
+ void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
{
int block = 192;
int grid = 1;
- overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
- collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0));
+ cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint4>();
+ size_t offset;
+ cudaSafeCall( cudaBindTexture2D(&offset, tdetections, objects.data, desc, objects.cols / sizeof(uint4), objects.rows, objects.step));
+
+ overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
+ collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
+
// if (!stream)
{
cudaSafeCall( cudaGetLastError());
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins, cudaStream_t stream);
- void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections);
+ void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
}
namespace imgproc {
hogluv.setTo(cv::Scalar::all(0));
overlaps.create(1, 5000, CV_8UC1);
+ suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
return true;
}
{
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
overlaps.setTo(0);
- device::icf::suppress(objects, overlaps, ndetections);
+ suppressed.setTo(0);
+
+ device::icf::suppress(objects, overlaps, ndetections, suppressed);
// std::cout << cv::Mat(overlaps) << std::endl;
}
// used for area overlap computing during
GpuMat overlaps;
+ // used for suppression
+ GpuMat suppressed;
+
// Cascade from xml
GpuMat octaves;
GpuMat stages;
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
{
CV_Assert(fields);
-
const GpuMat colored = image.getGpuMat();
// only color images are supperted
colored.copyTo(flds.hogluv);
}
+ GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
flds.detect(rois, tmp, objects, stream);
- // if (rejCriteria != NO_REJECT)
- flds.suppress(tmp, objects);
+ if (rejCriteria != NO_REJECT)
+ {
+ flds.suppress(tmp, objects);
+ flds.suppressed.copyTo(spr);
+ }
}
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const