\r
Ncv32u bufSize;\r
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );\r
- // printf("HERE!!!!!!!%d\n", bufSize);\r
integralBuffer.create(1, bufSize, CV_8UC1);\r
}\r
}\r
int step,\r
int subsetSize,\r
DevMem2D_<int4> objects,\r
- unsigned int* classified,\r
- const int maxX);\r
+ unsigned int* classified);\r
\r
int connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);\r
void bindIntegral(DevMem2Di integral);\r
\r
double factor = 1;\r
\r
- for (; processingRectSize.width / step >= 256;)\r
+ for (; ;)\r
{\r
- // std::cout << "IN FIXED: factor " << factor << " size " << processingRectSize.width << " " << processingRectSize.height << std::endl;\r
- // if (factor > 2.0) break;\r
if (processingRectSize.width <= 0 || processingRectSize.height <= 0 )\r
break;\r
\r
step = (factor <= 2.) + 1;\r
\r
cv::gpu::device::lbp::classifyStumpFixed(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,\r
- processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, factor, step, subsetSize, candidates, dclassified, processingRectSize.width);\r
-\r
- factor *= scaleFactor;\r
- windowSize = cv::Size(cvRound(NxM.width * factor), cvRound(NxM.height * factor));\r
- scaledImageSize = cv::Size(cvRound( image.cols / factor ), cvRound( image.rows / factor ));\r
- processingRectSize = cv::Size(scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 );\r
- }\r
-\r
- for (; /*processingRectSize.width / step >= 128*/;)\r
- {\r
- // std::cout << "In FLOATING: factor " << factor << " size " << processingRectSize.width << " " << processingRectSize.height << std::endl;\r
- // if (factor > 2.0) break;\r
- if (processingRectSize.width <= 0 || processingRectSize.height <= 0 )\r
- break;\r
-\r
- if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )\r
- break;\r
-\r
- // if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )\r
- // continue;\r
-\r
- GpuMat scaledImg(resuzeBuffer, cv::Rect(0, 0, scaledImageSize.width, scaledImageSize.height));\r
- GpuMat scaledIntegral(integral, cv::Rect(0, 0, scaledImageSize.width + 1, scaledImageSize.height + 1));\r
- GpuMat currBuff = integralBuffer;\r
-\r
- cv::gpu::resize(image, scaledImg, scaledImageSize, 0, 0, CV_INTER_LINEAR);\r
- cv::gpu::integralBuffered(scaledImg, scaledIntegral, currBuff);\r
-\r
- step = (factor <= 2.) + 1;\r
-\r
- cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,\r
processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, factor, step, subsetSize, candidates, dclassified);\r
\r
factor *= scaleFactor;\r
if (groupThreshold <= 0 || objects.empty())\r
return 0;\r
cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost);\r
- // std::cout << "!!! CLASSIFIED " << *classified << std::endl;\r
cv::gpu::device::lbp::connectedConmonents(candidates, *classified, objects, groupThreshold, grouping_eps, dclassified);\r
cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost);\r
cudaSafeCall( cudaDeviceSynchronize() );\r
classifier(y, x, objects, maxN, n);
}
- __global__ void lbp_classify_stump(const Classifier classifier, DevMem2D_<int4> objects, const unsigned int maxN, unsigned int* n, int lines, int maxX)
+ __global__ void lbp_classify_stump(const Classifier classifier, DevMem2D_<int4> objects, const unsigned int maxN, unsigned int* n, int maxX)
{
- int x = threadIdx.x * lines * classifier.step;
- if (x >= maxX) return;
+ int ftid = blockIdx.x * blockDim.x + threadIdx.x;
+ int y = ftid / maxX;
+ int x = ftid - y * maxX;
- int y = blockIdx.x * classifier.step / lines;
-
- classifier(y, x, objects, maxN, n);
+ classifier(y * classifier.step, x * classifier.step, objects, maxN, n);
}
template<typename Pr>
}
void classifyStumpFixed(const DevMem2Db& mstages, const int nstages, const DevMem2Di& mnodes, const DevMem2Df& mleaves, const DevMem2Di& msubsets, const DevMem2Db& mfeatures,
- const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, unsigned int* classified,
- int maxX)
+ const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, unsigned int* classified)
{
const int THREADS_BLOCK = 256;
- int blocks = ceilf(workHeight / (float)step);
- int threads = ceilf(workWidth / (float)step);
+ int work_amount = ceilf(workHeight / (float)step) * ceilf(workWidth / (float)step);
+ int blocks = divUp(work_amount, THREADS_BLOCK);
Classifier clr((Stage*)(mstages.ptr()), (ClNode*)(mnodes.ptr()), mleaves.ptr(), msubsets.ptr(), (uchar4*)(mfeatures.ptr()), nstages, clWidth, clHeight, scale, step, subsetSize);
- int lines = divUp(threads, THREADS_BLOCK);
- lbp_classify_stump<<<blocks * lines, THREADS_BLOCK>>>(clr, objects, objects.cols, classified, lines, maxX);
+ lbp_classify_stump<<<blocks, THREADS_BLOCK>>>(clr, objects, objects.cols, classified, workWidth >> 1);
}
int connectedConmonents(DevMem2D_<int4> candidates, int ncandidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)