{
_svmDetector.getMat().convertTo(svmDetector, CV_32F);
- std::vector<float> detector;
- _svmDetector.getMat().copyTo(detector);
-
- std::vector<float> detector_reordered(detector.size());
+ Mat detector_reordered(1, (int)svmDetector.size(), CV_32FC1);
size_t block_hist_size = getBlockHistogramSize(blockSize, cellSize, nbins);
cv::Size blocks_per_img = numPartsWithin(winSize, blockSize, blockStride);
for (int i = 0; i < blocks_per_img.height; ++i)
for (int j = 0; j < blocks_per_img.width; ++j)
{
- const float *src = &detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
- float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
+ const float *src = &svmDetector[0] + (j * blocks_per_img.height + i) * block_hist_size;
+ float *dst = (float*)detector_reordered.data + (i * blocks_per_img.width + j) * block_hist_size;
for (size_t k = 0; k < block_hist_size; ++k)
dst[k] = src[k];
}
- Mat(detector_reordered).convertTo(oclSvmDetector, CV_32F);
+ detector_reordered.copyTo(oclSvmDetector);
CV_Assert(checkDetectorSize());
}
static bool ocl_compute_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width,
UMat grad, UMat qangle, UMat gauss_w_lut, UMat block_hists, size_t block_hist_size)
{
+ ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc);
+ if(k.empty())
+ return false;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
- opts = cv::format("-D WAVE_SIZE=%d", 32);
-
- ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
+ k.create("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, UMat block_hists, float threshold)
{
- bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
- cv::String opts;
- if(is_cpu)
- opts = "-D CPU ";
- else
- opts = cv::format("-D WAVE_SIZE=%d", 32);
-
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
/ block_stride_x;
size_t localThreads[3] = { 1, 1, 1 };
int idx = 0;
+ bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
+ cv::String opts;
ocl::Kernel k;
if ( nbins == 9 )
{
+ k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+ if(k.empty())
+ return false;
+ if(is_cpu)
+ opts = "-D CPU ";
+ else
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
+
int blocks_in_group = NTHREADS / block_hist_size;
nthreads = blocks_in_group * block_hist_size;
int num_groups = (img_block_width * img_block_height + blocks_in_group - 1)/blocks_in_group;
}
else
{
+ k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+ if(k.empty())
+ return false;
+ if(is_cpu)
+ opts = "-D CPU ";
+ else
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
+
nthreads = power_2up(block_hist_size);
globalThreads[0] = img_block_width * nthreads;
globalThreads[1] = img_block_height;
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(imgSize.width + padding.width*2, imgSize.height + padding.height*2);
- if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat() &&
+ CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat(),
ocl_compute(_img, winStride, descriptors, DESCR_FORMAT_COL_BY_COL))
- return;
Mat img = _img.getMat();
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width,
- const UMat& block_hists, const std::vector<float>& _detector,
+ const UMat& block_hists, UMat detector,
float free_coef, float threshold, UMat& labels, Size descr_size, int block_hist_size)
{
int nthreads;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
- if(is_cpu)
- opts = "-D CPU ";
- else
- opts = cv::format("-D WAVE_SIZE=%d", 32);
ocl::Kernel k;
int idx = 0;
{
case 180:
nthreads = 180;
+ k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+ if(k.empty())
+ return false;
+ if(is_cpu)
+ opts = "-D CPU ";
+ else
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
idx = k.set(idx, descr_size.width);
idx = k.set(idx, descr_size.height);
break;
+
case 252:
nthreads = 256;
+ k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+ if(k.empty())
+ return false;
+ if(is_cpu)
+ opts = "-D CPU ";
+ else
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
idx = k.set(idx, descr_size.width);
idx = k.set(idx, descr_size.height);
break;
+
default:
nthreads = 256;
+ k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
+ if(k.empty())
+ return false;
+ if(is_cpu)
+ opts = "-D CPU ";
+ else
+ opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 };
- UMat detector(_detector, true);
-
idx = k.set(idx, block_hist_size);
idx = k.set(idx, img_win_width);
idx = k.set(idx, img_block_width);
return k.run(2, globalThreads, localThreads, false);
}
-bool HOGDescriptor::ocl_detect(const UMat& img, std::vector<Point> &hits,
+bool HOGDescriptor::ocl_detect(InputArray img, std::vector<Point> &hits,
double hit_threshold, Size win_stride) const
{
hits.clear();
{
std::vector<Rect> all_candidates;
std::vector<Point> locations;
- UMat img = _img.getUMat(), image_scale;
- image_scale.create(img.size(), img.type());
+ UMat image_scale;
+ Size imgSize = _img.size();
+ image_scale.create(imgSize, _img.type());
for (size_t i = 0; i<level_scale.size() ; i++)
{
double scale = level_scale[i];
- Size effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
- if (effect_size == img.size())
+ Size effect_size = Size(cvRound(imgSize.width / scale), cvRound(imgSize.height / scale));
+ if (effect_size == imgSize)
{
- if(!ocl_detect(img, locations, hit_threshold, win_stride)) return false;
+ if(!ocl_detect(_img, locations, hit_threshold, win_stride)) return false;
}
else
{
- resize(img, image_scale, effect_size);
+ resize(_img, image_scale, effect_size);
if(!ocl_detect(image_scale, locations, hit_threshold, win_stride)) return false;
}
Size scaled_win_size(cvRound(winSize.width * scale),
levels = std::max(levels, 1);
levelScale.resize(levels);
- if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
- winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat() &&
- ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold))
- return;
+ if(winStride == Size())
+ winStride = blockStride;
+
+ CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
+ winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(),
+ ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold));
std::vector<Rect> allCandidates;
std::vector<double> tempScales;