pwin = &sum.at<int>(pt);
return true;
}
+
+
+void LBPEvaluator::getUMats(std::vector<UMat>& bufs)
+{
+ bufs.clear();
+ bufs.push_back(usum);
+ bufs.push_back(ufbuf);
+}
//---------------------------------------------- HOGEvaluator ---------------------------------------
bool HOGEvaluator::Feature :: read( const FileNode& node )
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
{
- const int VECTOR_SIZE = 1;
- Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
- if( haar.empty() )
- return false;
-
- haar->setImage(_image, data.origWinSize, sumSize0);
-
- if( cascadeKernel.empty() )
- {
- cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc,
- format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
- if( cascadeKernel.empty() )
- return false;
- }
-
+ int featureType = getFeatureType();
+ std::vector<UMat> bufs;
+ size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
+ bool ok = false;
+
if( ustages.empty() )
{
copyVectorToUMat(data.stages, ustages);
copyVectorToUMat(data.stumps, ustumps);
+ if( !data.subsets.empty() )
+ copyVectorToUMat(data.subsets, usubsets);
}
- std::vector<UMat> bufs;
- haar->getUMats(bufs);
- CV_Assert(bufs.size() == 3);
-
- Rect normrect = haar->getNormRect();
-
- //processingRectSize = Size(yStep, yStep);
- size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
-
- cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
- ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
- ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
-
- // cascade classifier
- (int)data.stages.size(),
- ocl::KernelArg::PtrReadOnly(ustages),
- ocl::KernelArg::PtrReadOnly(ustumps),
+ if( featureType == FeatureEvaluator::HAAR )
+ {
+ Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
+ if( haar.empty() )
+ return false;
- ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
- processingRectSize,
- yStep, (float)factor,
- normrect, data.origWinSize, MAX_FACES);
- bool ok = cascadeKernel.run(2, globalsize, 0, true);
+ haar->setImage(_image, data.origWinSize, sumSize0);
+ if( haarKernel.empty() )
+ {
+ haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
+ if( haarKernel.empty() )
+ return false;
+ }
+
+ haar->getUMats(bufs);
+ Rect normrect = haar->getNormRect();
+
+ haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
+ ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
+ ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
+
+ // cascade classifier
+ (int)data.stages.size(),
+ ocl::KernelArg::PtrReadOnly(ustages),
+ ocl::KernelArg::PtrReadOnly(ustumps),
+
+ ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
+ processingRectSize,
+ yStep, (float)factor,
+ normrect, data.origWinSize, MAX_FACES);
+ ok = haarKernel.run(2, globalsize, 0, true);
+ }
+ else if( featureType == FeatureEvaluator::LBP )
+ {
+ Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
+ if( lbp.empty() )
+ return false;
+
+ lbp->setImage(_image, data.origWinSize, sumSize0);
+ if( lbpKernel.empty() )
+ {
+ lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
+ if( lbpKernel.empty() )
+ return false;
+ }
+
+ lbp->getUMats(bufs);
+
+ int subsetSize = (data.ncategories + 31)/32;
+ lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
+ ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures
+
+ // cascade classifier
+ (int)data.stages.size(),
+ ocl::KernelArg::PtrReadOnly(ustages),
+ ocl::KernelArg::PtrReadOnly(ustumps),
+ ocl::KernelArg::PtrReadOnly(usubsets),
+ subsetSize,
+
+ ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
+ processingRectSize,
+ yStep, (float)factor,
+ data.origWinSize, MAX_FACES);
+ ok = lbpKernel.run(2, globalsize, 0, true);
+ }
//CV_Assert(ok);
return ok;
}
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
+ int featureType = getFeatureType();
Size imgsz = _image.size();
int imgtype = _image.type();
maxObjectSize = imgsz;
bool use_ocl = ocl::useOpenCL() &&
- getFeatureType() == FeatureEvaluator::HAAR &&
+ (featureType == FeatureEvaluator::HAAR ||
+ featureType == FeatureEvaluator::LBP) &&
!isOldFormatCascade() &&
data.isStumpBased() &&
maskGenerator.empty() &&
bool CascadeClassifierImpl::read_(const FileNode& root)
{
tryOpenCL = true;
- cascadeKernel = ocl::Kernel();
+ haarKernel = ocl::Kernel();
+ lbpKernel = ocl::Kernel();
ustages.release();
ustumps.release();
if( !data.read(root) )
}
}
-#if 0
+
__kernel void runLBPClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
- int4 normrect, int2 windowsize, int maxFaces)
+ int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
- sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int stageIdx;
__global const Stump* stump = stumps;
+ __global const int* p = sum + mad24(iy, sumstep, ix);
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
- for( i = 0; i < ntrees; i++, stump++ )
+ for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize )
{
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
+ #define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
+ ((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
+ int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
- int4 ofs = f->ofs[0];
- sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
- ofs = f->ofs[1];
- sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
- if( weight.z > 0 )
- {
- ofs = f->ofs[2];
- sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
- }
+ int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
+ idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
+ idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
- s += (sval < st.y*nf) ? st.z : st.w;
+ mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
+ mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
+ mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
+ mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
+ mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
+
+ s += (bitsets[idx] & (1 << mask)) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
- break;
+ break;
}
if( stageIdx == nstages )
}
}
}
-#endif
+