{
if (runOnGpu)
{
- cv::Mat cpu = readImage (GetParam().second);
+ cv::Mat cpu = readImage (GET_PARAM(1));
ASSERT_FALSE(cpu.empty());
cv::gpu::GpuMat colored(cpu);
cv::gpu::SoftCascade cascade;
- ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath(GetParam().first)));
+ ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath(GET_PARAM(0))));
- cv::gpu::GpuMat objectBoxes(1, 16384, CV_8UC1), rois(cascade.getRoiSize(), CV_8UC1);
-
- rois.setTo(0);
- cv::gpu::GpuMat sub(rois, cv::Rect(rois.cols / 4, rois.rows / 4,rois.cols / 2, rois.rows / 2));
- sub.setTo(cv::Scalar::all(1));
- cascade.detectMultiScale(colored, rois, objectBoxes);
+ cv::gpu::GpuMat objectBoxes(1, 16384, CV_8UC1), rois(cascade.getRoiSize(), CV_8UC1), trois;
+ rois.setTo(1);
+ cv::gpu::transpose(rois, trois);
+ cascade.detectMultiScale(colored, trois, objectBoxes);
TEST_CYCLE()
{
- cascade.detectMultiScale(colored, rois, objectBoxes);
+ cascade.detectMultiScale(colored, trois, objectBoxes);
}
- } else
+ }
+ else
{
- cv::Mat colored = readImage(GetParam().second);
+ cv::Mat colored = readImage(GET_PARAM(1));
ASSERT_FALSE(colored.empty());
cv::SoftCascade cascade;
- ASSERT_TRUE(cascade.load(getDataPath(GetParam().first)));
+ ASSERT_TRUE(cascade.load(getDataPath(GET_PARAM(0))));
+
+ std::vector<cv::Rect> rois;
- std::vector<cv::Rect> rois, objectBoxes;
+ typedef cv::SoftCascade::Detection Detection;
+ std::vector<Detection>objectBoxes;
cascade.detectMultiScale(colored, rois, objectBoxes);
TEST_CYCLE()
sub.setTo(1);
}
+ cv::gpu::GpuMat trois;
+ cv::gpu::transpose(rois, trois);
+
cv::gpu::GpuMat curr = objectBoxes;
- cascade.detectMultiScale(colored, rois, curr);
+ cascade.detectMultiScale(colored, trois, curr);
TEST_CYCLE()
{
curr = objectBoxes;
- cascade.detectMultiScale(colored, rois, curr);
+ cascade.detectMultiScale(colored, trois, curr);
}
}
else
sub.setTo(1);
cv::gpu::GpuMat curr = objectBoxes;
- cascade.detectMultiScale(colored, rois, curr);
+ cv::gpu::GpuMat trois;
+ cv::gpu::transpose(rois, trois);
+
+ cascade.detectMultiScale(colored, trois, curr);
TEST_CYCLE()
{
cv::Mat img = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
- if (PERF_RUN_GPU())
+ if (runOnGpu)
{
cv::gpu::CascadeClassifier_GPU d_cascade;
ASSERT_TRUE(d_cascade.load(perf::TestBase::getDataPath(GetParam().second)));
}
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
- texture<char, cudaTextureType2D, cudaReadModeElementType> troi;
template<bool isUp>
__device__ __forceinline__ float rescale(const Level& level, Node& node)
float relScale = level.relScale;
float farea = scaledRect.z * scaledRect.w;
- dprintf("%d: feature %d box %d %d %d %d\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y,
- scaledRect.z, scaledRect.w);
- dprintf("%d: rescale: %f [%f %f] selected %f\n",threadIdx.x, level.relScale, level.scaling[0], level.scaling[1],
- level.scaling[(node.threshold >> 28) > 6]);
-
// rescale
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
const float expected_new_area = farea * relScale * relScale;
float approx = __fdividef(sarea, expected_new_area);
- dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
- scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
-
- float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
-
- rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
-
- dprintf("%d: approximation %f %d -> %f %f\n",threadIdx.x, approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
- level.scaling[(node.threshold >> 28) > 6]);
+ float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx * level.scaling[(node.threshold >> 28) > 6];
return rootThreshold;
}
template<bool isUp>
__device__ __forceinline__ int get(int x, int y, uchar4 area)
{
-
- dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
- dprintf("%d: extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",threadIdx.x,
- x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
- x + area.x, y + area.w);
- dprintf("%d: at point %d %d with offset %d\n", x, y, 0);
-
int a = tex2D(thogluv, x + area.x, y + area.y);
int b = tex2D(thogluv, x + area.z, y + area.y);
int c = tex2D(thogluv, x + area.z, y + area.w);
int d = tex2D(thogluv, x + area.x, y + area.w);
- dprintf("%d retruved integral values: %d %d %d %d\n",threadIdx.x, a, b, c, d);
-
return (a - b + c - d);
}
template<>
__device__ __forceinline__ int get<true>(int x, int y, uchar4 area)
{
-
- dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
- dprintf("%d: extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",threadIdx.x,
- x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
- x + area.x, y + area.w);
- dprintf("%d: at point %d %d with offset %d\n", x, y, 0);
-
x += area.x;
y += area.y;
int a = tex2D(thogluv, x, y);
int c = tex2D(thogluv, x + area.z, y + area.w);
int d = tex2D(thogluv, x, y + area.w);
- dprintf("%d retruved integral values: %d %d %d %d\n",threadIdx.x, a, b, c, d);
-
return (a - b + c - d);
}
+ texture<float2, cudaTextureType2D, cudaReadModeElementType> troi;
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x;
+ __shared__ volatile char roiCache[8];
+
+ if (!threadIdx.y && !threadIdx.x)
+ {
+ ((float2*)roiCache)[threadIdx.x] = tex2D(troi, blockIdx.y, x);
+ }
+
+ __syncthreads();
+
+ if (!roiCache[threadIdx.y]) return;
+
Level level = levels[downscales + blockIdx.z];
if(x >= level.workRect.x || y >= level.workRect.y) return;
- if (!tex2D(troi, x, y)) return;
-
Octave octave = octaves[level.octave];
int st = octave.index * octave.stages;
const int stEnd = st + 1024;
// if (blockIdx.z != 31) return;
if(x >= level.workRect.x || y >= level.workRect.y) return;
- int roi = tex2D(troi, x, y);
- printf("%d\n", roi);
- if (!roi) return;
+ // int roi = tex2D(troi, x, y);
+ // printf("%d\n", roi);
+ // if (!roi) return;
Octave octave = octaves[level.octave];
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
- cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<char>();
- cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols, roi.rows, roi.step));
+ cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
+ cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
- cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<char>();
- cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols, roi.rows, roi.step));
+ cudaChannelFormatDesc desc_roi = cudaCreateChannelDesc<float2>();
+ cudaSafeCall( cudaBindTexture2D(0, troi, roi.data, desc_roi, roi.cols / 8, roi.rows, roi.step));
if (scale >= downscales)
test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
using cv::gpu::GpuMat;
// show detection results on input image with cv::imshow
-//#define SHOW_DETECTIONS
+#define SHOW_DETECTIONS
#if defined SHOW_DETECTIONS
# define SHOW(res) \
cv::gpu::SoftCascade cascade;
ASSERT_TRUE(cascade.load(cvtest::TS::ptr()->get_data_path() + GET_PARAM(0)));
- GpuMat colored(coloredCpu), objectBoxes(1, 16384, CV_8UC1), rois(cascade.getRoiSize(), CV_8UC1);
+ GpuMat colored(coloredCpu), objectBoxes(1, 16384, CV_8UC1), rois(cascade.getRoiSize(), CV_8UC1), trois;
rois.setTo(0);
int nroi = GET_PARAM(2);
+ cv::Mat result(coloredCpu);
cv::RNG rng;
for (int i = 0; i < nroi; ++i)
{
cv::Rect r = getFromTable(rng(10));
GpuMat sub(rois, r);
sub.setTo(1);
+ r.x *= 4; r.y *= 4; r.width *= 4; r.height *= 4;
+ cv::rectangle(result, r, cv::Scalar(0, 0, 255, 255), 1);
}
- cascade.detectMultiScale(colored, rois, objectBoxes);
+ cv::gpu::transpose(rois, trois);
+
+ cascade.detectMultiScale(colored, trois, objectBoxes);
///
cv::Mat dt(objectBoxes);
typedef cv::gpu::SoftCascade::Detection detection_t;
detection_t* dts = (detection_t*)dt.data;
- cv::Mat result(coloredCpu);
printTotal(std::cout, dt.cols);
for (int i = 0; i < (int)(dt.cols / sizeof(detection_t)); ++i)
GpuMat colored(coloredCpu), objectBoxes(1, 100 * sizeof(detection_t), CV_8UC1), rois(cascade.getRoiSize(), CV_8UC1);
rois.setTo(1);
+ cv::gpu::GpuMat trois;
+ cv::gpu::transpose(rois, trois);
+
int level = GET_PARAM(2);
- cascade.detectMultiScale(colored, rois, objectBoxes, 1, level);
+ cascade.detectMultiScale(colored, trois, objectBoxes, 1, level);
cv::Mat dt(objectBoxes);
GpuMat sub(rois, cv::Rect(rois.cols / 4, rois.rows / 4,rois.cols / 2, rois.rows / 2));
sub.setTo(cv::Scalar::all(1));
- cascade.detectMultiScale(colored, rois, objectBoxes);
+ cv::gpu::GpuMat trois;
+ cv::gpu::transpose(rois, trois);
+
+ cascade.detectMultiScale(colored, trois, objectBoxes);
}
#endif
\ No newline at end of file