From: yao Date: Wed, 8 Aug 2012 07:55:39 +0000 (+0800) Subject: add SURF and HOG to ocl module X-Git-Tag: accepted/2.0/20130307.220821~364^2~272^2~17 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=64e9cf5d75f9e9190601753dd93c198be4604d9e;p=profile%2Fivi%2Fopencv.git add SURF and HOG to ocl module --- diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 3bc6729..c5075d8 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -12,6 +12,7 @@ // // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -924,6 +925,154 @@ namespace cv const oclMat& src3, double beta, oclMat& dst, int flags = 0); #endif + //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// + struct CV_EXPORTS HOGDescriptor + { + enum { DEFAULT_WIN_SIGMA = -1 }; + enum { DEFAULT_NLEVELS = 64 }; + enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; + + HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), + Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), + int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, + double threshold_L2hys=0.2, bool gamma_correction=true, + int nlevels=DEFAULT_NLEVELS); + + size_t getDescriptorSize() const; + size_t getBlockHistogramSize() const; + + void setSVMDetector(const vector& detector); + + static vector getDefaultPeopleDetector(); + static vector getPeopleDetector48x96(); + static vector getPeopleDetector64x128(); + + void detect(const oclMat& img, vector& found_locations, + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size()); + + void detectMultiScale(const oclMat& img, vector& found_locations, + double hit_threshold=0, Size win_stride=Size(), + Size padding=Size(), double scale0=1.05, + int group_threshold=2); + + void getDescriptors(const oclMat& img, Size win_stride, + oclMat& descriptors, + int descr_format=DESCR_FORMAT_COL_BY_COL); + + Size win_size; + Size block_size; + Size block_stride; + Size cell_size; + int nbins; + double win_sigma; + double threshold_L2hys; + bool gamma_correction; + int nlevels; + + protected: + void computeBlockHistograms(const oclMat& img); + void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle); + + double getWinSigma() const; + bool checkDetectorSize() const; + + static int numPartsWithin(int size, int part_size, int stride); + static Size numPartsWithin(Size size, Size part_size, Size stride); + + // Coefficients of the separating plane + float free_coef; + oclMat detector; + + // Results of the last classification step + oclMat labels; + Mat labels_host; + + // Results of the last histogram evaluation step + oclMat block_hists; + + // Gradients conputation results + oclMat grad, qangle; + + std::vector image_scales; + }; + + //! Speeded up robust features, port from GPU module. + ////////////////////////////////// SURF ////////////////////////////////////////// + class CV_EXPORTS SURF_OCL + { + public: + enum KeypointLayout + { + X_ROW = 0, + Y_ROW, + LAPLACIAN_ROW, + OCTAVE_ROW, + SIZE_ROW, + ANGLE_ROW, + HESSIAN_ROW, + ROWS_COUNT + }; + + //! the default constructor + SURF_OCL(); + //! the full constructor taking all the necessary parameters + explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4, + int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false); + + //! returns the descriptor size in float's (64 or 128) + int descriptorSize() const; + + //! upload host keypoints to device memory + void uploadKeypoints(const vector& keypoints, oclMat& keypointsocl); + //! download keypoints from device to host memory + void downloadKeypoints(const oclMat& keypointsocl, vector& keypoints); + + //! download descriptors from device to host memory + void downloadDescriptors(const oclMat& descriptorsocl, vector& descriptors); + + //! finds the keypoints using fast hessian detector used in SURF + //! supports CV_8UC1 images + //! keypoints will have nFeature cols and 6 rows + //! keypoints.ptr(X_ROW)[i] will contain x coordinate of i'th feature + //! keypoints.ptr(Y_ROW)[i] will contain y coordinate of i'th feature + //! keypoints.ptr(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature + //! keypoints.ptr(OCTAVE_ROW)[i] will contain octave of i'th feature + //! keypoints.ptr(SIZE_ROW)[i] will contain size of i'th feature + //! keypoints.ptr(ANGLE_ROW)[i] will contain orientation of i'th feature + //! keypoints.ptr(HESSIAN_ROW)[i] will contain response of i'th feature + void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints); + //! finds the keypoints and computes their descriptors. + //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction + void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors, + bool useProvidedKeypoints = false); + + void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints); + void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints, oclMat& descriptors, + bool useProvidedKeypoints = false); + + void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints, std::vector& descriptors, + bool useProvidedKeypoints = false); + + void releaseMemory(); + + // SURF parameters + float hessianThreshold; + int nOctaves; + int nOctaveLayers; + bool extended; + bool upright; + + //! max keypoints = min(keypointsRatio * img.size().area(), 65535) + float keypointsRatio; + + oclMat sum, mask1, maskSum, intBuffer; + + oclMat det, trace; + + oclMat maxPosBuffer; + + }; } } #include "opencv2/ocl/matrix_operations.hpp" diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp new file mode 100644 index 0000000..1f8a925 --- /dev/null +++ b/modules/ocl/src/hog.cpp @@ -0,0 +1,1787 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Wenju He, wenju@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#if !defined (HAVE_OPENCL) + +cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_nogpu(); } +size_t cv::ocl::HOGDescriptor::getDescriptorSize() const { throw_nogpu(); return 0; } +size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); return 0; } +double cv::ocl::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; } +bool cv::ocl::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; } +void cv::ocl::HOGDescriptor::setSVMDetector(const vector&) { throw_nogpu(); } +void cv::ocl::HOGDescriptor::detect(const oclMat&, vector&, double, Size, Size) { throw_nogpu(); } +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat&, vector&, double, Size, Size, double, int) { throw_nogpu(); } +void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat&) { throw_nogpu(); } +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat&, Size, oclMat&, int) { throw_nogpu(); } +std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector(); } +std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector(); } +std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector(); } + +#else + +#define CELL_WIDTH 8 +#define CELL_HEIGHT 8 +#define CELLS_PER_BLOCK_X 2 +#define CELLS_PER_BLOCK_Y 2 +#define NTHREADS 256 + +namespace cv { namespace ocl +{ + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *objdetect_hog; +}} + +namespace cv { namespace ocl { namespace device +{ + namespace hog + { + int cnbins; + int cblock_stride_x; + int cblock_stride_y; + int cnblocks_win_x; + int cnblocks_win_y; + int cblock_hist_size; + int cblock_hist_size_2up; + int cdescr_size; + int cdescr_width; + + void set_up_constants(int nbins, int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y); + + void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, + int height, int width, const cv::ocl::oclMat& grad, + const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists); + + void normalize_hists(int nbins, int block_stride_x, int block_stride_y, + int height, int width, cv::ocl::oclMat& block_hists, float threshold); + + void classify_hists(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, int height, + int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef, + float threshold, cv::ocl::oclMat& labels); + + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists, + cv::ocl::oclMat& descriptors); + void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists, + cv::ocl::oclMat& descriptors); + + void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img, + float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); + void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img, + float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); + } +}}} + +using namespace ::cv::ocl::device; + +cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, + int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) + : win_size(win_size_), + block_size(block_size_), + block_stride(block_stride_), + cell_size(cell_size_), + nbins(nbins_), + win_sigma(win_sigma_), + threshold_L2hys(threshold_L2hys_), + gamma_correction(gamma_correction_), + nlevels(nlevels_) +{ + CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && + (win_size.height - block_size.height) % block_stride.height == 0); + + CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); + + CV_Assert(block_stride == cell_size); + + CV_Assert(cell_size == Size(8, 8)); + + Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + CV_Assert(cells_per_block == Size(2, 2)); + + cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); + hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); +} + +size_t cv::ocl::HOGDescriptor::getDescriptorSize() const +{ + return numPartsWithin(win_size, block_size, block_stride).area() * getBlockHistogramSize(); +} + +size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const +{ + Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + return (size_t)(nbins * cells_per_block.area()); +} + +double cv::ocl::HOGDescriptor::getWinSigma() const +{ + return win_sigma >= 0 ? win_sigma : (block_size.width + block_size.height) / 8.0; +} + +bool cv::ocl::HOGDescriptor::checkDetectorSize() const +{ + size_t detector_size = detector.rows * detector.cols; + size_t descriptor_size = getDescriptorSize(); + return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; +} + +void cv::ocl::HOGDescriptor::setSVMDetector(const vector& _detector) +{ + std::vector detector_reordered(_detector.size()); + + size_t block_hist_size = getBlockHistogramSize(); + cv::Size blocks_per_img = numPartsWithin(win_size, block_size, block_stride); + + for (int i = 0; i < blocks_per_img.height; ++i) + for (int j = 0; j < blocks_per_img.width; ++j) + { + const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size; + float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size; + for (size_t k = 0; k < block_hist_size; ++k) + dst[k] = src[k]; + } + + this->detector.upload(Mat(detector_reordered).reshape(1, 1)); + + size_t descriptor_size = getDescriptorSize(); + free_coef = _detector.size() > descriptor_size ? _detector[descriptor_size] : 0; + + CV_Assert(checkDetectorSize()); +} + +void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle) +{ + CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); + + grad.create(img.size(), CV_32FC2); + + qangle.create(img.size(), CV_8UC2); + + float angleScale = (float)(nbins / CV_PI); + switch (img.type()) + { + case CV_8UC1: + hog::compute_gradients_8UC1(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); + break; + case CV_8UC4: + hog::compute_gradients_8UC4(img.rows, img.cols, img, angleScale, grad, qangle, gamma_correction); + break; + } +} + + +void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img) +{ + computeGradient(img, grad, qangle); + + size_t block_hist_size = getBlockHistogramSize(); + Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); + + block_hists.create(1, static_cast(block_hist_size * blocks_per_img.area()), CV_32F); + + hog::compute_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, grad, qangle, (float)getWinSigma(), block_hists); + + hog::normalize_hists(nbins, block_stride.width, block_stride.height, img.rows, img.cols, block_hists, (float)threshold_L2hys); +} + + +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format) +{ + CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + + computeBlockHistograms(img); + + const size_t block_hist_size = getBlockHistogramSize(); + Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); + Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); + + descriptors.create(wins_per_img.area(), static_cast(blocks_per_win.area() * block_hist_size), CV_32F); + + switch (descr_format) + { + case DESCR_FORMAT_ROW_BY_ROW: + hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors); + break; + case DESCR_FORMAT_COL_BY_COL: + hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists, descriptors); + break; + default: + CV_Error(CV_StsBadArg, "Unknown descriptor format"); + } +} + + +void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector& hits, double hit_threshold, Size win_stride, Size padding) +{ + CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); + CV_Assert(padding == Size(0, 0)); + + hits.clear(); + if (detector.empty()) + return; + + computeBlockHistograms(img); + + if (win_stride == Size()) + win_stride = block_stride; + else + CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + + Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); + labels.create(1, wins_per_img.area(), CV_8U); + + hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists, + detector, (float)free_coef, (float)hit_threshold, labels); + + labels.download(labels_host); + unsigned char* vec = labels_host.ptr(); + for (int i = 0; i < wins_per_img.area(); i++) + { + int y = i / wins_per_img.width; + int x = i - wins_per_img.width * y; + if (vec[i]) + hits.push_back(Point(x * win_stride.width, y * win_stride.height)); + } +} + + + +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector& found_locations, double hit_threshold, + Size win_stride, Size padding, double scale0, int group_threshold) +{ + CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); + + vector level_scale; + double scale = 1.; + int levels = 0; + + for (levels = 0; levels < nlevels; levels++) + { + level_scale.push_back(scale); + if (cvRound(img.cols/scale) < win_size.width || + cvRound(img.rows/scale) < win_size.height || scale0 <= 1) + break; + scale *= scale0; + } + levels = std::max(levels, 1); + level_scale.resize(levels); + image_scales.resize(levels); + + std::vector all_candidates; + vector locations; + + for (size_t i = 0; i < level_scale.size(); i++) + { + scale = level_scale[i]; + Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); + oclMat smaller_img; + + if (sz == img.size()) + smaller_img = img; + else + { + image_scales[i].create(sz, img.type()); + resize(img, image_scales[i], image_scales[i].size(), 0, 0, INTER_LINEAR); + smaller_img = image_scales[i]; + } + + detect(smaller_img, locations, hit_threshold, win_stride, padding); + Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); + for (size_t j = 0; j < locations.size(); j++) + all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); + } + + found_locations.assign(all_candidates.begin(), all_candidates.end()); + groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); +} + +int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) +{ + return (size - part_size + stride) / stride; +} + +cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) +{ + return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); +} + +std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() +{ + return getPeopleDetector64x128(); +} + +std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() +{ + static const float detector[] = { + 0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f, + 0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f, + 0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f, + 0.254676f, -0.069235f, 0.082566f, 0.147260f, 0.326969f, 0.148888f, + 0.055270f, -0.087985f, 0.261720f, 0.143442f, 0.026812f, 0.238212f, + 0.194020f, 0.056341f, -0.025854f, -0.034444f, -0.156631f, 0.205174f, + 0.089008f, -0.139811f, -0.100147f, -0.037830f, -0.029230f, -0.055641f, + 0.033248f, -0.016512f, 0.155244f, 0.247315f, -0.124694f, -0.048414f, + -0.062219f, 0.193683f, 0.004574f, 0.055089f, 0.093565f, 0.167712f, + 0.167581f, 0.018895f, 0.215258f, 0.122609f, 0.090520f, -0.067219f, + -0.049029f, -0.099615f, 0.241804f, -0.094893f, -0.176248f, 0.001727f, + -0.134473f, 0.104442f, 0.050942f, 0.081165f, 0.072156f, 0.121646f, + 0.002656f, -0.297974f, -0.133587f, -0.060121f, -0.092515f, -0.048974f, + -0.084754f, -0.180111f, -0.038590f, 0.086283f, -0.134636f, -0.107249f, + 0.132890f, 0.141556f, 0.249425f, 0.130273f, -0.030031f, 0.073212f, + -0.008155f, 0.019931f, 0.071688f, 0.000300f, -0.019525f, -0.021725f, + -0.040993f, -0.086841f, 0.070124f, 0.240033f, 0.265350f, 0.043208f, + 0.166754f, 0.091453f, 0.060916f, -0.036972f, -0.091043f, 0.079873f, + 0.219781f, 0.158102f, -0.140618f, -0.043016f, 0.124802f, 0.093668f, + 0.103208f, 0.094872f, 0.080541f, 0.137711f, 0.160566f, -0.169231f, + 0.013983f, 0.309508f, -0.004217f, -0.057200f, -0.064489f, 0.014066f, + 0.361009f, 0.251328f, -0.080983f, -0.044183f, 0.061436f, -0.037381f, + -0.078786f, 0.030993f, 0.066314f, 0.037683f, 0.152325f, -0.091683f, + 0.070203f, 0.217856f, 0.036435f, -0.076462f, 0.006254f, -0.094431f, + 0.154829f, -0.023038f, -0.196961f, -0.024594f, 0.178465f, -0.050139f, + -0.045932f, -0.000965f, 0.109112f, 0.046165f, -0.159373f, -0.008713f, + 0.041307f, 0.097129f, -0.057211f, -0.064599f, 0.077165f, 0.176167f, + 0.138322f, 0.065753f, -0.104950f, 0.017933f, 0.136255f, -0.011598f, + 0.047007f, 0.080550f, 0.068619f, 0.084661f, -0.035493f, -0.091314f, + -0.041411f, 0.060971f, -0.101912f, -0.079870f, -0.085977f, -0.022686f, + 0.079788f, -0.098064f, -0.054603f, 0.040383f, 0.300794f, 0.128603f, + 0.094844f, 0.047407f, 0.101825f, 0.061832f, -0.162160f, -0.204553f, + -0.035165f, 0.101450f, -0.016641f, -0.027140f, -0.134392f, -0.008743f, + 0.102331f, 0.114853f, 0.009644f, 0.062823f, 0.237339f, 0.167843f, + 0.053066f, -0.012592f, 0.043158f, 0.002305f, 0.065001f, -0.038929f, + -0.020356f, 0.152343f, 0.043469f, -0.029967f, -0.042948f, 0.032481f, + 0.068488f, -0.110840f, -0.111083f, 0.111980f, -0.002072f, -0.005562f, + 0.082926f, 0.006635f, -0.108153f, 0.024242f, -0.086464f, -0.189884f, + -0.017492f, 0.191456f, -0.007683f, -0.128769f, -0.038017f, -0.132380f, + 0.091926f, 0.079696f, -0.106728f, -0.007656f, 0.172744f, 0.011576f, + 0.009883f, 0.083258f, -0.026516f, 0.145534f, 0.153924f, -0.130290f, + -0.108945f, 0.124490f, -0.003186f, -0.100485f, 0.015024f, -0.060512f, + 0.026288f, -0.086713f, -0.169012f, 0.076517f, 0.215778f, 0.043701f, + -0.131642f, -0.012585f, -0.045181f, -0.118183f, -0.241544f, -0.167293f, + -0.020107f, -0.019917f, -0.101827f, -0.107096f, -0.010503f, 0.044938f, + 0.189680f, 0.217119f, -0.046086f, 0.044508f, 0.199716f, -0.036004f, + -0.148927f, 0.013355f, -0.078279f, 0.030451f, 0.056301f, -0.024609f, + 0.083224f, 0.099533f, -0.039432f, -0.138880f, 0.005482f, -0.024120f, + -0.140468f, -0.066381f, -0.017057f, 0.009260f, -0.058004f, -0.028486f, + -0.061610f, 0.007483f, -0.158309f, -0.150687f, -0.044595f, -0.105121f, + -0.045763f, -0.006618f, -0.024419f, -0.117713f, -0.119366f, -0.175941f, + -0.071542f, 0.119027f, 0.111362f, 0.043080f, 0.034889f, 0.093003f, + 0.007842f, 0.057368f, -0.108834f, -0.079968f, 0.230959f, 0.020205f, + 0.011470f, 0.098877f, 0.101310f, -0.030215f, -0.018018f, -0.059552f, + -0.106157f, 0.021866f, -0.036471f, 0.080051f, 0.041165f, -0.082101f, + 0.117726f, 0.030961f, -0.054763f, -0.084102f, -0.185778f, -0.061305f, + -0.038089f, -0.110728f, -0.264010f, 0.076675f, -0.077111f, -0.137644f, + 0.036232f, 0.277995f, 0.019116f, 0.107738f, 0.144003f, 0.080304f, + 0.215036f, 0.228897f, 0.072713f, 0.077773f, 0.120168f, 0.075324f, + 0.062730f, 0.122478f, -0.049008f, 0.164912f, 0.162450f, 0.041246f, + 0.009891f, -0.097827f, -0.038700f, -0.023027f, -0.120020f, 0.203364f, + 0.248474f, 0.149810f, -0.036276f, -0.082814f, -0.090343f, -0.027143f, + -0.075689f, -0.320310f, -0.000500f, -0.143334f, -0.065077f, -0.186936f, + 0.129372f, 0.116431f, 0.181699f, 0.170436f, 0.418854f, 0.460045f, + 0.333719f, 0.230515f, 0.047822f, -0.044954f, -0.068086f, 0.140179f, + -0.044821f, 0.085550f, 0.092483f, -0.107296f, -0.130670f, -0.206629f, + 0.114601f, -0.317869f, -0.076663f, 0.038680f, 0.212753f, -0.016059f, + -0.126526f, -0.163602f, 0.210154f, 0.099887f, -0.126366f, 0.118453f, + 0.019309f, -0.021611f, -0.096499f, -0.111809f, -0.200489f, 0.142854f, + 0.228840f, -0.353346f, -0.179151f, 0.116834f, 0.252389f, -0.031728f, + -0.188135f, -0.158998f, 0.386523f, 0.122315f, 0.209944f, 0.394023f, + 0.359030f, 0.260717f, 0.170335f, 0.013683f, -0.142596f, -0.026138f, + -0.011878f, -0.150519f, 0.047159f, -0.107062f, -0.147347f, -0.187689f, + -0.186027f, -0.208048f, 0.058468f, -0.073026f, -0.236556f, -0.079788f, + -0.146216f, -0.058563f, -0.101361f, -0.071294f, -0.071093f, 0.116919f, + 0.234304f, 0.306781f, 0.321866f, 0.240000f, 0.073261f, -0.012173f, + 0.026479f, 0.050173f, 0.166127f, 0.228955f, 0.061905f, 0.156460f, + 0.205990f, 0.120672f, 0.037350f, 0.167884f, 0.290099f, 0.420900f, + -0.012601f, 0.189839f, 0.306378f, 0.118383f, -0.095598f, -0.072360f, + -0.132496f, -0.224259f, -0.126021f, 0.022714f, 0.284039f, 0.051369f, + -0.000927f, -0.058735f, -0.083354f, -0.141254f, -0.187578f, -0.202669f, + 0.048902f, 0.246597f, 0.441863f, 0.342519f, 0.066979f, 0.215286f, + 0.188191f, -0.072240f, -0.208142f, -0.030196f, 0.178141f, 0.136985f, + -0.043374f, -0.181098f, 0.091815f, 0.116177f, -0.126690f, -0.386625f, + 0.368165f, 0.269149f, -0.088042f, -0.028823f, 0.092961f, 0.024099f, + 0.046112f, 0.176756f, 0.135849f, 0.124955f, 0.195467f, -0.037218f, + 0.167217f, 0.188938f, 0.053528f, -0.066561f, 0.133721f, -0.070565f, + 0.115898f, 0.152435f, -0.116993f, -0.110592f, -0.179005f, 0.026668f, + 0.080530f, 0.075084f, -0.070401f, 0.012497f, 0.021849f, -0.139764f, + -0.022020f, -0.096301f, -0.064954f, -0.127446f, -0.013806f, -0.108315f, + 0.156285f, 0.149867f, -0.011382f, 0.064532f, 0.029168f, 0.027393f, + 0.069716f, 0.153735f, 0.038459f, 0.230714f, 0.253840f, 0.059522f, + -0.045053f, 0.014083f, 0.071103f, 0.068747f, 0.095887f, 0.005832f, + 0.144887f, 0.026357f, -0.067359f, -0.044151f, -0.123283f, -0.019911f, + 0.005318f, 0.109208f, -0.003201f, -0.021734f, 0.142025f, -0.066907f, + -0.120070f, -0.188639f, 0.012472f, -0.048704f, -0.012366f, -0.184828f, + 0.168591f, 0.267166f, 0.058208f, -0.044101f, 0.033500f, 0.178558f, + 0.104550f, 0.122418f, 0.080177f, 0.173246f, 0.298537f, 0.064173f, + 0.053397f, 0.174341f, 0.230984f, 0.117025f, 0.166242f, 0.227781f, + 0.120623f, 0.176952f, -0.011393f, -0.086483f, -0.008270f, 0.051700f, + -0.153369f, -0.058837f, -0.057639f, -0.060115f, 0.026349f, -0.160745f, + -0.037894f, -0.048575f, 0.041052f, -0.022112f, 0.060365f, 0.051906f, + 0.162657f, 0.138519f, -0.050185f, -0.005938f, 0.071301f, 0.127686f, + 0.062342f, 0.144400f, 0.072600f, 0.198436f, 0.246219f, -0.078185f, + -0.036169f, 0.075934f, 0.047328f, -0.013601f, 0.087205f, 0.019900f, + 0.022606f, -0.015365f, -0.092506f, 0.075275f, -0.116375f, 0.050500f, + 0.045118f, 0.166567f, 0.072073f, 0.060371f, 0.131747f, -0.169863f, + -0.039352f, -0.047486f, -0.039797f, -0.204312f, 0.021710f, 0.129443f, + -0.021173f, 0.173416f, -0.070794f, -0.063986f, 0.069689f, -0.064099f, + -0.123201f, -0.017372f, -0.206870f, 0.065863f, 0.113226f, 0.024707f, + -0.071341f, -0.066964f, -0.098278f, -0.062927f, 0.075840f, 0.014716f, + 0.019378f, 0.132699f, -0.074191f, -0.089557f, -0.078446f, -0.197488f, + -0.173665f, 0.052583f, 0.044361f, 0.113549f, 0.098492f, 0.077379f, + -0.011146f, -0.192593f, -0.164435f, 0.045568f, 0.205699f, 0.049187f, + -0.082281f, 0.134874f, 0.185499f, 0.034968f, -0.119561f, -0.112372f, + -0.115091f, -0.054042f, -0.183816f, -0.078100f, 0.190695f, 0.091617f, + 0.004257f, -0.041135f, -0.061453f, -0.141592f, -0.194809f, -0.120638f, + 0.020168f, 0.109672f, 0.067398f, -0.015238f, -0.239145f, -0.264671f, + -0.185176f, 0.050472f, 0.020793f, 0.035678f, 0.022839f, -0.052055f, + -0.127968f, -0.113049f, -0.228416f, -0.258281f, -0.053437f, 0.076424f, + 0.061450f, 0.237478f, 0.003618f, -0.055865f, -0.108087f, -0.028937f, + 0.045585f, 0.052829f, -0.001471f, 0.022826f, 0.059565f, -0.104430f, + -0.077266f, -0.211882f, -0.212078f, 0.028074f, 0.075846f, 0.016265f, + 0.161879f, 0.134477f, 0.008935f, -0.048041f, 0.074692f, 0.004928f, + -0.025156f, 0.192874f, 0.074410f, 0.308732f, 0.267400f, 0.094208f, + -0.005251f, 0.042041f, -0.032148f, 0.015588f, 0.252869f, 0.175302f, + 0.022892f, 0.081673f, 0.063208f, 0.162626f, 0.194426f, 0.233890f, + 0.262292f, 0.186930f, 0.084079f, -0.286388f, -0.213034f, -0.048867f, + -0.207669f, -0.170050f, 0.011673f, -0.092958f, -0.192786f, -0.273536f, + 0.230904f, 0.266732f, 0.320519f, 0.297155f, 0.548169f, 0.304922f, + 0.132687f, 0.247333f, 0.212488f, -0.271472f, -0.142105f, -0.002627f, + -0.119215f, 0.128383f, 0.100079f, -0.057490f, -0.121902f, -0.228892f, + 0.202292f, -0.399795f, -0.371326f, -0.095836f, -0.063626f, -0.161375f, + -0.311180f, -0.294797f, 0.242122f, 0.011788f, 0.095573f, 0.322523f, + 0.511840f, 0.322880f, 0.313259f, 0.173331f, 0.002542f, -0.029802f, + 0.324766f, -0.326170f, -0.340547f, -0.138288f, -0.002963f, -0.114060f, + -0.377312f, -0.442570f, 0.212446f, -0.007759f, -0.011576f, 0.169711f, + 0.308689f, 0.317348f, 0.539390f, 0.332845f, 0.057331f, -0.068180f, + 0.101994f, 0.266995f, 0.209570f, 0.355730f, 0.091635f, 0.170238f, + 0.125215f, 0.274154f, 0.070223f, 0.025515f, 0.049946f, -0.000550f, + 0.043715f, -0.141843f, 0.020844f, 0.129871f, 0.256588f, 0.105015f, + 0.148339f, 0.170682f, 0.028792f, 0.074037f, 0.160042f, 0.405137f, + 0.246187f, 0.352160f, 0.168951f, 0.222263f, 0.264439f, 0.065945f, + 0.021963f, -0.075084f, 0.093105f, 0.027318f, 0.098864f, 0.057566f, + -0.080282f, 0.185032f, 0.314419f, 0.333727f, 0.125798f, 0.294919f, + 0.386002f, 0.217619f, -0.183517f, -0.278622f, -0.002342f, -0.027821f, + -0.134266f, -0.331843f, -0.008296f, 0.124564f, 0.053712f, -0.369016f, + -0.095036f, 0.209381f, 0.423760f, 0.371760f, 0.106397f, 0.369408f, + 0.485608f, 0.231201f, -0.138685f, -0.349208f, -0.070083f, 0.028991f, + -0.081630f, -0.395992f, -0.146791f, -0.027354f, 0.063396f, -0.272484f, + 0.058299f, 0.338207f, 0.110767f, -0.052642f, -0.233848f, -0.027448f, + 0.030328f, 0.155572f, -0.093826f, 0.019331f, 0.120638f, 0.006292f, + -0.106083f, -0.236290f, -0.140933f, -0.088067f, -0.025138f, -0.208395f, + -0.025502f, 0.144192f, -0.048353f, -0.106144f, -0.305121f, -0.114147f, + 0.090963f, 0.327727f, 0.035606f, -0.093779f, 0.002651f, -0.171081f, + -0.188131f, -0.216571f, -0.209101f, -0.054402f, 0.157147f, -0.057127f, + 0.066584f, 0.008988f, 0.041191f, 0.034456f, -0.078255f, 0.052099f, + -0.022239f, 0.066981f, -0.117520f, -0.072637f, 0.062512f, 0.037570f, + -0.057544f, -0.312359f, 0.034357f, -0.031549f, 0.002566f, -0.207375f, + -0.070654f, -0.018786f, -0.044815f, -0.012814f, -0.076320f, 0.078183f, + 0.023877f, 0.117078f, 0.022292f, -0.205424f, -0.060430f, -0.017296f, + -0.004827f, -0.321036f, -0.092155f, 0.038837f, 0.073190f, -0.067513f, + 0.026521f, 0.171945f, 0.087318f, 0.034495f, -0.034089f, 0.154410f, + -0.061431f, 0.007435f, -0.111094f, -0.095976f, 0.014741f, -0.132324f, + -0.029517f, -0.192160f, 0.098667f, 0.020762f, 0.177050f, -0.064510f, + -0.054437f, -0.058678f, -0.001858f, 0.167602f, 0.015735f, 0.054338f, + 0.016477f, 0.186381f, -0.010667f, 0.054692f, 0.126742f, 0.013140f, + 0.090353f, -0.133608f, -0.018017f, -0.152619f, 0.027600f, -0.138700f, + -0.050274f, 0.045141f, -0.118731f, 0.094797f, -0.167605f, 0.097461f, + -0.009131f, 0.199920f, -0.052976f, 0.158194f, 0.178568f, -0.107600f, + 0.009671f, -0.084072f, -0.040258f, -0.205673f, 0.102891f, 0.223511f, + 0.042699f, 0.118548f, -0.021274f, 0.110997f, -0.155121f, 0.027696f, + -0.149968f, 0.051552f, -0.129219f, 0.173524f, 0.073972f, -0.189045f, + -0.034523f, -0.106655f, -0.011843f, -0.197381f, 0.219413f, 0.183197f, + -0.054920f, 0.144955f, 0.036517f, -0.085412f, -0.229070f, -0.143710f, + -0.049486f, 0.156634f, -0.008673f, -0.064778f, 0.082344f, 0.145673f, + 0.002912f, -0.210121f, -0.116564f, 0.078425f, 0.220908f, -0.067594f, + 0.048610f, 0.084912f, -0.066202f, -0.112515f, -0.217767f, -0.082640f, + -0.017414f, 0.230265f, -0.070735f, 0.066073f, 0.215256f, 0.071157f, + -0.087220f, -0.202235f, -0.011918f, 0.099562f, 0.174716f, -0.063845f, + -0.121055f, 0.014367f, 0.132709f, -0.005060f, -0.244606f, -0.179693f, + -0.134690f, 0.023239f, -0.193116f, -0.076975f, -0.021164f, -0.001938f, + -0.163799f, -0.111437f, -0.210362f, -0.166376f, 0.034754f, 0.010036f, + -0.021917f, 0.068014f, -0.086893f, -0.251746f, -0.267171f, 0.037383f, + 0.003966f, 0.033571f, -0.151506f, 0.025437f, -0.020626f, -0.308454f, + -0.343143f, -0.092263f, -0.026261f, -0.028345f, 0.036036f, 0.035169f, + 0.129470f, 0.122205f, 0.015661f, -0.070612f, -0.094333f, -0.066055f, + -0.041083f, 0.159146f, 0.073184f, 0.110044f, 0.174471f, 0.078069f, + -0.014881f, 0.008116f, 0.013209f, 0.075857f, 0.195605f, 0.062714f, + 0.067955f, 0.056544f, -0.153908f, -0.141749f, -0.072550f, 0.033523f, + -0.024665f, 0.134487f, 0.079076f, 0.133562f, 0.227130f, 0.018054f, + 0.004928f, 0.169162f, 0.065152f, 0.072160f, 0.131631f, 0.096303f, + 0.054288f, 0.106256f, 0.114632f, 0.119038f, 0.515200f, 0.247429f, + 0.199134f, 0.211957f, 0.127558f, -0.294684f, -0.194890f, -0.049988f, + -0.112247f, -0.008122f, -0.006176f, 0.037035f, -0.110881f, -0.249989f, + 0.152434f, 0.234621f, 0.153340f, 0.349283f, 0.683049f, 0.157174f, + 0.124844f, 0.099136f, 0.064407f, -0.248400f, -0.155323f, -0.026498f, + -0.023450f, 0.049051f, -0.114187f, 0.007195f, -0.176825f, -0.376926f, + 0.366159f, -0.179938f, -0.148508f, 0.006043f, 0.170048f, 0.097866f, + -0.102658f, -0.260430f, 0.248868f, 0.037019f, -0.118111f, 0.078176f, + 0.194171f, 0.211328f, 0.368612f, 0.361213f, 0.130013f, 0.094650f, + 0.227396f, -0.178058f, -0.114782f, -0.008093f, 0.231080f, -0.011843f, + -0.097917f, -0.325788f, 0.141879f, 0.119738f, -0.230427f, -0.117419f, + -0.114153f, 0.037903f, 0.116383f, 0.218773f, -0.101884f, 0.059466f, + 0.119255f, 0.010874f, -0.031449f, 0.045996f, 0.119931f, 0.273760f, + 0.311700f, 0.261794f, 0.194809f, 0.339829f, 0.239449f, 0.064140f, + 0.077597f, 0.098996f, 0.143534f, 0.184602f, 0.037507f, 0.225494f, + 0.096142f, -0.147370f, -0.207833f, -0.174742f, -0.086391f, -0.038942f, + 0.159577f, -0.088492f, -0.000989f, 0.108154f, -0.025890f, -0.072713f, + 0.025997f, -0.006803f, -0.086879f, -0.011290f, -0.269200f, -0.103450f, + -0.124910f, -0.116340f, 0.141459f, 0.208800f, 0.042268f, 0.265034f, + 0.516474f, 0.217591f, -0.018843f, -0.313328f, -0.168363f, 0.047129f, + 0.090480f, -0.109852f, -0.018761f, 0.210669f, 0.281269f, -0.043591f, + -0.034147f, -0.237772f, -0.134843f, -0.072481f, -0.103831f, 0.038355f, + 0.308619f, 0.148023f, -0.045867f, -0.123950f, -0.210860f, -0.064973f, + -0.036308f, -0.046731f, -0.022099f, 0.095776f, 0.409423f, 0.060635f, + -0.065196f, 0.051828f, 0.027981f, -0.009609f, -0.137681f, -0.095011f, + -0.019045f, 0.177278f, 0.009759f, -0.092119f, -0.016958f, -0.133860f, + -0.118421f, -0.032039f, -0.006214f, -0.084541f, 0.063971f, -0.073642f, + 0.165676f, 0.110443f, 0.044131f, 0.046568f, 0.053292f, -0.055466f, + 0.015512f, 0.371947f, 0.232102f, -0.016923f, 0.103979f, -0.091758f, + 0.005907f, 0.209100f, 0.157433f, 0.030518f, 0.250366f, 0.062322f, + 0.036720f, 0.094676f, 0.017306f, -0.010328f, -0.079012f, 0.016781f, + -0.112435f, 0.061795f, 0.042543f, -0.126799f, -0.009975f, -0.056760f, + 0.046424f, -0.194712f, -0.139399f, -0.037731f, 0.157989f, -0.016261f, + 0.123345f, 0.230563f, 0.083300f, -0.016392f, 0.059567f, -0.016035f, + -0.064767f, 0.231945f, 0.156629f, 0.034602f, 0.145628f, 0.041315f, + 0.034535f, 0.019967f, -0.089188f, -0.012091f, 0.307857f, 0.211405f, + -0.025091f, -0.148249f, -0.129384f, 0.063536f, -0.068603f, -0.067941f, + -0.035104f, 0.210832f, 0.063810f, 0.062764f, -0.089889f, -0.030554f, + 0.014791f, -0.053362f, -0.037818f, -0.196640f, 0.008388f, -0.082654f, + 0.143056f, 0.064221f, 0.069795f, 0.191040f, 0.097321f, -0.028679f, + 0.075794f, 0.313154f, 0.086240f, 0.207643f, 0.017809f, 0.122867f, + 0.224586f, 0.167403f, -0.023884f, 0.047434f, 0.344091f, 0.187745f, + 0.136177f, 0.141738f, 0.063799f, 0.045233f, -0.077342f, -0.003525f, + -0.165041f, -0.025616f, -0.073745f, 0.164439f, 0.011200f, -0.145896f, + -0.027954f, -0.061987f, -0.039874f, -0.142775f, 0.151042f, -0.038238f, + 0.053152f, 0.078615f, 0.086061f, 0.100593f, 0.128046f, -0.071006f, + -0.116558f, 0.208445f, 0.051086f, 0.076843f, 0.023191f, -0.084781f, + -0.011790f, 0.147807f, -0.048554f, -0.113932f, 0.283322f, 0.190934f, + 0.092789f, 0.033018f, -0.142428f, -0.142480f, -0.099023f, -0.041020f, + -0.042760f, 0.203295f, -0.053475f, 0.042424f, 0.222839f, -0.019167f, + -0.133176f, -0.276216f, -0.031998f, 0.117290f, 0.177827f, -0.059973f, + -0.064744f, -0.117040f, -0.155482f, -0.099531f, 0.164121f, -0.026682f, + -0.093810f, 0.238993f, -0.006506f, 0.007830f, 0.065819f, -0.203643f, + -0.100925f, -0.053652f, -0.130770f, 0.026277f, 0.131796f, 0.032742f, + 0.127186f, 0.116694f, -0.161122f, -0.279773f, -0.252515f, -0.002638f, + 0.042812f, 0.096776f, -0.123280f, 0.064858f, -0.010455f, -0.219760f, + -0.239331f, -0.104363f, -0.058022f, -0.053584f, 0.025611f, 0.005129f, + -0.100418f, -0.045712f, -0.194418f, -0.126366f, -0.030530f, 0.051168f, + 0.215959f, 0.172402f, -0.054700f, -0.185995f, -0.278360f, -0.193693f, + -0.040309f, 0.003735f, -0.007770f, 0.123556f, 0.190179f, -0.077315f, + 0.117403f, 0.212942f, 0.012160f, 0.000113f, 0.027331f, 0.040202f, + 0.033293f, 0.219438f, 0.184174f, 0.259349f, 0.311206f, 0.082547f, + -0.047875f, -0.078417f, 0.010746f, 0.082620f, 0.311931f, 0.307605f, + 0.003863f, 0.021405f, -0.026388f, -0.019572f, 0.020582f, -0.059353f, + 0.025199f, 0.261319f, 0.086316f, 0.143614f, 0.107780f, 0.003900f, + -0.188397f, -0.038563f, -0.106045f, -0.125154f, -0.010509f, 0.054021f, + 0.242130f, 0.279152f, 0.215546f, 0.346995f, 0.440856f, 0.237452f, + 0.234154f, 0.301646f, 0.168929f, -0.208358f, -0.126848f, 0.010260f, + 0.121018f, -0.062975f, -0.052848f, 0.050341f, -0.061103f, -0.266482f, + 0.107186f, 0.140221f, 0.280065f, 0.287889f, 0.373198f, 0.151596f, + 0.013593f, 0.115616f, 0.014616f, -0.281710f, -0.237597f, -0.117305f, + -0.000034f, -0.136739f, -0.196275f, -0.095225f, -0.125310f, -0.250514f, + 0.236804f, -0.071805f, -0.037421f, 0.048230f, 0.321596f, 0.063632f, + 0.024039f, -0.029133f, 0.230983f, 0.160593f, -0.154355f, -0.013086f, + -0.079929f, 0.094692f, 0.160391f, 0.180239f, 0.053895f, 0.100759f, + 0.288631f, 0.038191f, 0.181692f, 0.229682f, 0.440166f, 0.063401f, + 0.006273f, 0.020865f, 0.338695f, 0.256244f, -0.043927f, 0.115617f, + 0.003296f, 0.173965f, 0.021318f, -0.040936f, -0.118932f, 0.182380f, + 0.235922f, -0.053233f, -0.015053f, -0.101057f, 0.095341f, 0.051111f, + 0.161831f, 0.032614f, 0.159496f, 0.072375f, 0.025089f, 0.023748f, + 0.029151f, 0.161284f, -0.117717f, -0.036191f, -0.176822f, -0.162006f, + 0.226542f, -0.078329f, 0.043079f, -0.119172f, 0.054614f, -0.101365f, + -0.064541f, -0.115304f, 0.135170f, 0.298872f, 0.098060f, 0.089428f, + -0.007497f, 0.110391f, -0.028824f, 0.020835f, -0.036804f, 0.125411f, + 0.192105f, -0.048931f, 0.003086f, -0.010681f, 0.074698f, -0.016263f, + 0.096063f, 0.060267f, -0.007277f, 0.139139f, -0.080635f, 0.036628f, + 0.086058f, 0.131979f, 0.085707f, 0.025301f, 0.226094f, 0.194759f, + 0.042193f, -0.157846f, -0.068402f, -0.141450f, -0.112659f, -0.076305f, + -0.069085f, -0.114332f, -0.102005f, 0.132193f, -0.067042f, 0.106643f, + 0.198964f, 0.171616f, 0.167237f, -0.033730f, -0.026755f, 0.083621f, + 0.149459f, -0.002799f, -0.000318f, 0.011753f, 0.065889f, -0.089375f, + -0.049610f, 0.224579f, 0.216548f, -0.034908f, -0.017851f, -0.088144f, + 0.007530f, 0.240268f, 0.073270f, 0.013263f, 0.175323f, 0.012082f, + 0.093993f, 0.015282f, 0.105854f, 0.107990f, 0.077798f, -0.096166f, + -0.079607f, 0.177820f, 0.142392f, 0.033337f, -0.078100f, -0.081616f, + -0.046993f, 0.139459f, 0.020272f, -0.123161f, 0.175269f, 0.105217f, + 0.057328f, 0.080909f, -0.012612f, -0.097081f, 0.082060f, -0.096716f, + -0.063921f, 0.201884f, 0.128166f, -0.035051f, -0.032227f, -0.068139f, + -0.115915f, 0.095080f, -0.086007f, -0.067543f, 0.030776f, 0.032712f, + 0.088937f, 0.054336f, -0.039329f, -0.114022f, 0.171672f, -0.112321f, + -0.217646f, 0.065186f, 0.060223f, 0.192174f, 0.055580f, -0.131107f, + -0.144338f, 0.056730f, -0.034707f, -0.081616f, -0.135298f, -0.000614f, + 0.087189f, 0.014614f, 0.067709f, 0.107689f, 0.225780f, 0.084361f, + -0.008544f, 0.051649f, -0.048369f, -0.037739f, -0.060710f, 0.002654f, + 0.016935f, 0.085563f, -0.015961f, -0.019265f, 0.111788f, 0.062376f, + 0.202019f, 0.047713f, 0.042261f, 0.069716f, 0.242913f, 0.021052f, + -0.072812f, -0.155920f, -0.026436f, 0.035621f, -0.079300f, -0.028787f, + -0.048329f, 0.084718f, -0.060565f, -0.083750f, -0.164075f, -0.040742f, + -0.086219f, 0.015271f, -0.005204f, -0.016038f, 0.045816f, -0.050433f, + -0.077652f, 0.117109f, 0.009611f, -0.009045f, -0.008634f, -0.055373f, + -0.085968f, 0.028527f, -0.054736f, -0.168089f, 0.175839f, 0.071205f, + -0.023603f, 0.037907f, -0.004561f, -0.022634f, 0.123831f, 0.094469f, + -0.072920f, -0.133642f, -0.014032f, -0.142754f, -0.026999f, -0.199409f, + 0.013268f, 0.226989f, 0.048650f, -0.170988f, -0.050141f, 0.007880f, + 0.061880f, 0.019078f, -0.043578f, -0.038139f, 0.134814f, 0.054097f, + -0.081670f, 0.176838f, 0.047920f, -0.038176f, 0.050406f, -0.107181f, + -0.036279f, 0.027060f, 0.081594f, -0.002820f, 0.090507f, -0.033338f, + -0.059571f, 0.013404f, -0.099860f, 0.073371f, 0.342805f, 0.098305f, + -0.150910f, -0.020822f, -0.056960f, 0.046262f, -0.043413f, -0.149405f, + -0.129105f, -0.010899f, -0.014229f, -0.179949f, -0.113044f, -0.049468f, + -0.065513f, 0.090269f, -0.011919f, 0.087846f, 0.095796f, 0.146127f, + 0.101599f, 0.078066f, -0.084348f, -0.100002f, -0.020134f, -0.050169f, + 0.062122f, 0.014640f, 0.019143f, 0.036543f, 0.180924f, -0.013976f, + -0.066768f, -0.001090f, -0.070419f, -0.004839f, -0.001504f, 0.034483f, + -0.044954f, -0.050336f, -0.088638f, -0.174782f, -0.116082f, -0.205507f, + 0.015587f, -0.042839f, -0.096879f, -0.144097f, -0.050268f, -0.196796f, + 0.109639f, 0.271411f, 0.173732f, 0.108070f, 0.156437f, 0.124255f, + 0.097242f, 0.238693f, 0.083941f, 0.109105f, 0.223940f, 0.267188f, + 0.027385f, 0.025819f, 0.125070f, 0.093738f, 0.040353f, 0.038645f, + -0.012730f, 0.144063f, 0.052931f, -0.009138f, 0.084193f, 0.160272f, + -0.041366f, 0.011951f, -0.121446f, -0.106713f, -0.047566f, 0.047984f, + -0.255224f, -0.076116f, 0.098685f, -0.150845f, -0.171513f, -0.156590f, + 0.058331f, 0.187493f, 0.413018f, 0.554265f, 0.372242f, 0.237943f, + 0.124571f, 0.110829f, 0.010322f, -0.174477f, -0.067627f, -0.001979f, + 0.142913f, 0.040597f, 0.019907f, 0.025963f, -0.043585f, -0.120732f, + 0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f, + -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f, + -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f, + -9.063785f }; + return vector(detector, detector + sizeof(detector)/sizeof(detector[0])); +} + + + + +std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() +{ + static const float detector[] = { + 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f, + 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f, + 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f, + 0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f, + -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f, + -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f, + -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f, + 0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f, + 0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f, + 0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f, + 0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f, + 0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f, + 5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f, + 0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f, + 0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f, + 0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f, + 0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f, + 0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f, + 0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f, + -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f, + -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f, + -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f, + 0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f, + 0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f, + -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f, + 0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f, + -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f, + 0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f, + 0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f, + -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f, + -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f, + -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f, + 0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f, + -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f, + 0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f, + 0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f, + -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f, + 0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f, + 3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f, + -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f, + -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f, + -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f, + -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f, + -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f, + -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f, + -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f, + 0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f, + 0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f, + 0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f, + 0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f, + 0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f, + 8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f, + -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f, + -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f, + -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f, + 0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f, + -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f, + -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f, + 0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f, + -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f, + -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f, + 0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f, + 0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f, + 2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f, + 0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f, + -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f, + 6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f, + 0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f, + -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f, + -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f, + -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f, + 1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f, + -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f, + -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f, + -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f, + 0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f, + 0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f, + 0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f, + -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f, + -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f, + -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f, + 0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f, + -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f, + 8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f, + -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f, + -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f, + -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f, + 9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f, + 0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f, + -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f, + 2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f, + 2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f, + -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f, + 0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f, + -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f, + 8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f, + -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f, + -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f, + 8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f, + 0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f, + 0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f, + 0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f, + -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f, + 0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f, + 0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f, + 0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f, + -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f, + -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f, + 0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f, + 0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f, + -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f, + 1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f, + 0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f, + -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f, + -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f, + -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f, + 0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f, + -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f, + 0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f, + -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f, + 0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f, + 0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f, + -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f, + -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f, + 0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f, + 9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f, + -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f, + -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f, + -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f, + -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f, + 0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f, + -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f, + 7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f, + -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f, + 7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f, + 0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f, + 0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f, + 0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f, + -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f, + -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f, + 0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f, + -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f, + 0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f, + 0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f, + -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f, + 0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f, + -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f, + 0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f, + -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f, + -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f, + -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f, + -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f, + -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f, + 0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f, + 0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f, + -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f, + 0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f, + 0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f, + 0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f, + -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f, + -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f, + 0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f, + -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f, + 8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f, + -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f, + -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f, + -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f, + -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f, + 0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f, + 0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f, + 0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f, + 0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f, + 0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f, + 0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f, + -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f, + 0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f, + 0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f, + 0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f, + 0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f, + -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f, + 0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f, + 4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f, + -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f, + -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f, + 0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f, + 0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f, + 0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f, + 0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f, + -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f, + 0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f, + -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f, + -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f, + -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f, + 0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f, + -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f, + -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f, + -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f, + -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f, + -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f, + 8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f, + -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f, + -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f, + -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f, + 3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f, + -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f, + -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f, + -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f, + 0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f, + 1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f, + 0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f, + -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f, + -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f, + -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f, + -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f, + 0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f, + -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f, + 0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f, + -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f, + -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f, + -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f, + 0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f, + -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f, + 0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f, + 0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f, + -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f, + -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f, + 0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f, + 0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f, + 0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f, + -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f, + -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f, + 0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f, + -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f, + 2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f, + 0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f, + -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f, + 0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f, + 0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f, + 0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f, + 0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f, + 0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f, + -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f, + -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f, + 0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f, + -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f, + 0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f, + -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f, + 0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f, + -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f, + -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f, + 0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f, + 5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f, + 0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f, + -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f, + 0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f, + 0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f, + 0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f, + 0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f, + 0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f, + -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f, + -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f, + -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f, + -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f, + 0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f, + -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f, + 0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f, + 7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f, + 8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f, + -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f, + -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f, + -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f, + -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f, + -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f, + 0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f, + 0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f, + 0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f, + -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f, + 5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f, + -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f, + -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f, + -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f, + 0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f, + 0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f, + 0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f, + 0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f, + -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f, + 0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f, + 0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f, + 0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f, + 0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f, + -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f, + -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f, + 0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f, + -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f, + -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f, + -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f, + 0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f, + -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f, + -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f, + -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f, + -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f, + 0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f, + 0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f, + -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f, + -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f, + -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f, + -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f, + 0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f, + -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f, + -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f, + -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f, + 0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f, + -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f, + 0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f, + -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f, + 0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f, + 6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f, + -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f, + -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f, + 0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f, + -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f, + -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f, + -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f, + 0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f, + -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f, + 8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f, + -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f, + -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f, + -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f, + 0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f, + 0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f, + 0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f, + -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f, + -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f, + -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f, + -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f, + 0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f, + 0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f, + -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f, + -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f, + 0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f, + 0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f, + -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f, + -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f, + -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f, + 7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f, + -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f, + -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f, + -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f, + -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f, + 0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f, + 0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f, + 6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f, + 0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f, + 0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f, + -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f, + 0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f, + 0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f, + 0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f, + 0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f, + -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f, + 0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f, + -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f, + 2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f, + 0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f, + 0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f, + 0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f, + 0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f, + 0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f, + 0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f, + 0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f, + -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f, + -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f, + -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f, + 0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f, + -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f, + -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f, + -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f, + 8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f, + -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f, + -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f, + 0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f, + -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f, + -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f, + -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f, + -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f, + 0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f, + 0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f, + 0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f, + -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f, + 0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f, + 6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f, + -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f, + 0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f, + -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f, + 0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f, + 0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f, + -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f, + 0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f, + -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f, + 0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f, + -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f, + 0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f, + 3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f, + -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f, + 0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f, + -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f, + 6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f, + -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f, + 1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f, + 0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f, + -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f, + 1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f, + 0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f, + 0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f, + -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f, + -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f, + 0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f, + -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f, + 0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f, + 8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f, + 0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f, + -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f, + 0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f, + -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f, + 0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f, + -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f, + -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f, + 0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f, + -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f, + 0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f, + -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f, + 0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f, + -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f, + -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f, + -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f, + -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f, + -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f, + 8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f, + 0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f, + 0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f, + 0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f, + 0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f, + 0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f, + 0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f, + 0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f, + 0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f, + -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f, + -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f, + 0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f, + 0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f, + 0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f, + 0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f, + 0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f, + -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f, + 0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f, + 2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f, + 0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f, + -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f, + -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f, + 7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f, + -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f, + -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f, + -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f, + -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f, + -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f, + -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f, + 0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f, + -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f, + 0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f, + 0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f, + 0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f, + -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f, + -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f, + -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f, + 0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f, + -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f, + 0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f, + 0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f, + 0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f, + -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f, + 8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f, + 0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f, + 0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f, + 0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f, + -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f, + 0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f, + -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f, + 0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f, + -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f, + 0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f, + -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f, + 0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f, + -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f, + -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f, + -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f, + 0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f, + -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f, + 0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f, + -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f, + 0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f, + -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f, + -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f, + -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f, + 0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f, + 0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f, + 0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f, + -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f, + -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f, + 0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f, + 0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f, + 0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f, + -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f, + 0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f, + -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f, + 4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f, + -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f, + 9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f, + -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f, + 0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f, + -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f, + -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f, + 0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f, + 0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f, + -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f, + -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f, + 0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f, + 5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f, + 0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f, + -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f, + -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f, + -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f, + -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f, + 0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f, + -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f, + 0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f, + -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f, + 0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f, + -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f, + 0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f, + -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f, + -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f, + -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f, + 0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f, + -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f, + -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f, + 0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f, + -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f, + -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f, + -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f, + 0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f, + 0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f, + 0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f, + 0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f, + -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f, + 3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f, + 0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f, + -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f, + 0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f, + 0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f, + -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f, + -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f, + 0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f, + 0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f, + -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f, + -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f, + -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f, + -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f, + -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f, + -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f, + -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f, + 0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f, + 0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f, + -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f, + 0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f, + -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f, + 0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f, + -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f, + -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f, + -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f, + 0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f, + 0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f, + 0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f, + -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f, + -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f, + 0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f, + 0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f, + 0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f, + -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f, + 0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f, + 0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f, + 0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f, + -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f, + 0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f, + 0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f, + 0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f, + -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f, + -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f, + 0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f, + -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f, + 0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f, + -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f, + -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f, + -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f, + -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f, + 0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f, + 0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f, + -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f, + 0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f, + 7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f, + 0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f, + -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f, + 0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f, + 0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f, + -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f, + 0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f, + 0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f, + -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f, + -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f, + 0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f, + 1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f, + 0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f, + 5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f, + 4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f, + 0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f, + 0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f, + -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f, + -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f, + 0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f, + 0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f, + 0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f, + 0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f, + 0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f, + 0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f, + -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f, + 0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f, + 0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f, + 0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f, + -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f, + 4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f, + -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f, + -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f, + 6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f, + -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f, + 0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f, + 0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f, + 0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f, + -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f, + -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f, + -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f, + -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f, + 0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f, + -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f, + 0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f, + 2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f, + -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f, + -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f, + -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f, + 2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f, + 0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f, + 0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f, + 0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f, + -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f, + -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f, + -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f, + -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f, + 0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f, + 0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f, + 0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f, + -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f, + -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f, + -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f, + -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f, + -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f, + 0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f, + -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f, + -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f, + 0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f, + -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f, + 0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f, + -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f, + 0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f, + 1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f, + -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f, + -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f, + -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f, + -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f, + 0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f, + 0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f, + 0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f, + 0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f, + 0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f, + -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f, + -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f, + 0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f, + -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f, + 0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f, + -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f, + 0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f, + 0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f, + 3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f, + 0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f, + 0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f, + -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f, + -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f, + 0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f, + 0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f, + 0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f, + -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f, + 0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f, + 0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f, + 6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f, + -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f, + 0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f, + -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f, + 0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f, + 0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f, + 9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f, + -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f, + 0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f, + -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f, + 0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f, + -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f, + 5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f, + -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f, + -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f, + -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f, + 0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f, + -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f, + 0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f, + -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f, + 0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f, + -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f, + -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f, + 0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f, + 0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f, + 8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f, + -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f, + -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f, + 0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f, + -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f, + 7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f, + 0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f, + 0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f, + -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f, + 0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f, + 0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f, + -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f, + -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f, + 0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f, + 8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f, + -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f, + 0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f, + -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f, + 0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f, + -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f, + 0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f, + 0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f, + -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f, + 0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f, + -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f, + 0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f, + 0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f, + 0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f, + -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f, + -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f, + 0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f, + -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f, + -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f, + -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f, + -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f, + -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f, + -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f, + -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f, + 0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f, + -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f, + -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f, + -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f, + 0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f, + -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f, + -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f, + -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f, + 0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f, + 0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f, + 0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f, + -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f, + -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f, + -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f, + -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f, + -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f, + 2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f, + -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f, + 0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f, + 0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f, + 0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f, + -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f, + 4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f, + 0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f, + 0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f, + -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f, + -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f, + -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f, + -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f, + -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f, + -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f }; + return vector(detector, detector + sizeof(detector)/sizeof(detector[0])); +} + +/* Returns the nearest upper power of two, works only for +the typical GPU thread count (pert block) values */ +static int power_2up(unsigned int n) +{ + if (n < 1) return 1; + else if (n < 2) return 2; + else if (n < 4) return 4; + else if (n < 8) return 8; + else if (n < 16) return 16; + else if (n < 32) return 32; + else if (n < 64) return 64; + else if (n < 128) return 128; + else if (n < 256) return 256; + else if (n < 512) return 512; + else if (n < 1024) return 1024; + return -1; // Input is too big +} + +void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y) +{ + cnbins = nbins; + cblock_stride_x = block_stride_x; + cblock_stride_y = block_stride_y; + cnblocks_win_x = nblocks_win_x; + cnblocks_win_y = nblocks_win_y; + + int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; + cblock_hist_size = block_hist_size; + + int block_hist_size_2up = power_2up(block_hist_size); + cblock_hist_size_2up = block_hist_size_2up; + + int descr_width = nblocks_win_x * block_hist_size; + cdescr_width = descr_width; + + int descr_size = descr_width * nblocks_win_y; + cdescr_size = descr_size; +} + +void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, + int height, int width, const cv::ocl::oclMat& grad, + const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists) +{ + Context *clCxt = Context::getContext(); + string kernelName = "compute_hists_kernel"; + vector< pair > args; + + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; + + size_t globalThreads[3] = { img_block_width * 32, img_block_height * 2, 1 }; + size_t localThreads[3] = { 32, 2, 1 }; + + int grad_quadstep = grad.step >> 2; + int qangle_step = qangle.step; + + // Precompute gaussian spatial window parameter + float scale = 1.f / (2.f * sigma * sigma); + + int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); + int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); + int smem = hists_size + final_hists_size; + + args.push_back( make_pair( sizeof(cl_int), (void *)&width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_stride_y)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); + args.push_back( make_pair( smem, (void *)NULL)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, + int height, int width, cv::ocl::oclMat& block_hists, float threshold) +{ + Context *clCxt = Context::getContext(); + string kernelName = "normalize_hists_kernel"; + vector< pair > args; + + int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; + int nthreads = power_2up(block_hist_size); + + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; + size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; + size_t localThreads[3] = { nthreads, 1, 1 }; + + if ((nthreads < 32) || (nthreads > 512) ) + cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists"); + + args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); + args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); + args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, int height, + int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef, + float threshold, cv::ocl::oclMat& labels) +{ + Context *clCxt = Context::getContext(); + string kernelName = "classify_hists_kernel"; + vector< pair > args; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + + size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; + size_t localThreads[3] = { NTHREADS, 1, 1 }; + + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&coefs.data)); + args.push_back( make_pair( sizeof(cl_float), (void *)&free_coef)); + args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, + const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors) +{ + Context *clCxt = Context::getContext(); + string kernelName = "extract_descrs_by_rows_kernel"; + vector< pair > args; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int descriptors_quadstep = descriptors.step >> 2; + + size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; + size_t localThreads[3] = { NTHREADS, 1, 1 }; + + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, + const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors) +{ + Context *clCxt = Context::getContext(); + string kernelName = "extract_descrs_by_cols_kernel"; + vector< pair > args; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int descriptors_quadstep = descriptors.step >> 2; + + size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; + size_t localThreads[3] = { NTHREADS, 1, 1 }; + + args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors_quadstep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cnblocks_win_y)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_y)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +static inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} + +void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img, + float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma) +{ + Context *clCxt = Context::getContext(); + string kernelName = "compute_gradients_8UC1_kernel"; + vector< pair > args; + + size_t localThreads[3] = { NTHREADS, 1, 1 }; + size_t globalThreads[3] = { width, height, 1 }; + char correctGamma = (correct_gamma) ? 1 : 0; + int img_step = img.step; + int grad_quadstep = grad.step >> 3; + int qangle_step = qangle.step >> 1; + + args.push_back( make_pair( sizeof(cl_int), (void *)&height)); + args.push_back( make_pair( sizeof(cl_int), (void *)&width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); + args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale)); + args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img, + float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma) +{ + Context *clCxt = Context::getContext(); + string kernelName = "compute_gradients_8UC4_kernel"; + vector< pair > args; + + size_t localThreads[3] = { NTHREADS, 1, 1 }; + size_t globalThreads[3] = { width, height, 1 }; + + char correctGamma = (correct_gamma) ? 1 : 0; + int img_step = img.step >> 2; + int grad_quadstep = grad.step >> 3; + int qangle_step = qangle.step >> 1; + + args.push_back( make_pair( sizeof(cl_int), (void *)&height)); + args.push_back( make_pair( sizeof(cl_int), (void *)&width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&grad_quadstep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); + args.push_back( make_pair( sizeof(cl_float), (void *)&angle_scale)); + args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); + + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); +} + +#endif diff --git a/modules/ocl/src/kernels/nonfree_surf.cl b/modules/ocl/src/kernels/nonfree_surf.cl new file mode 100644 index 0000000..16cd4be --- /dev/null +++ b/modules/ocl/src/kernels/nonfree_surf.cl @@ -0,0 +1,1259 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable + +// dynamically change the precision used for floating type + +#if defined (__ATI__) || defined (__NVIDIA__) +#define F double +#else +#define F float +#endif + +// Image read mode +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + +#define CV_PI_F 3.14159265f + +// print greyscale image to show image layout +__kernel void printImage(image2d_t img) +{ + printf("(%d, %d) - %3d \n", + get_global_id(0), + get_global_id(1), + read_imageui(img, (int2)(get_global_id(0), get_global_id(1))).x + ); +} + +// Use integral image to calculate haar wavelets. +// N = 2 +// for simple haar paatern +float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int oldSize, int newSize, int y, int x) +{ + + float ratio = (float)newSize / oldSize; + + F d = 0; + +#pragma unroll + for (int k = 0; k < 2; ++k) + { + int dx1 = convert_int_rte(ratio * src[k][0]); + int dy1 = convert_int_rte(ratio * src[k][1]); + int dx2 = convert_int_rte(ratio * src[k][2]); + int dy2 = convert_int_rte(ratio * src[k][3]); + + F t = 0; + t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x; + t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x; + + d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1)); + } + + return (float)d; +} + +// N = 3 +float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int oldSize, int newSize, int y, int x) +{ + + float ratio = (float)newSize / oldSize; + + F d = 0; + +#pragma unroll + for (int k = 0; k < 3; ++k) + { + int dx1 = convert_int_rte(ratio * src[k][0]); + int dy1 = convert_int_rte(ratio * src[k][1]); + int dx2 = convert_int_rte(ratio * src[k][2]); + int dy2 = convert_int_rte(ratio * src[k][3]); + + F t = 0; + t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x; + t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x; + + d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1)); + } + + return (float)d; +} + +// N = 4 +float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int oldSize, int newSize, int y, int x) +{ + + float ratio = (float)newSize / oldSize; + + F d = 0; + +#pragma unroll + for (int k = 0; k < 4; ++k) + { + int dx1 = convert_int_rte(ratio * src[k][0]); + int dy1 = convert_int_rte(ratio * src[k][1]); + int dx2 = convert_int_rte(ratio * src[k][2]); + int dy2 = convert_int_rte(ratio * src[k][3]); + + F t = 0; + t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x; + t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x; + t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x; + + d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1)); + } + + return (float)d; +} + +//////////////////////////////////////////////////////////////////////// +// Hessian + +__constant float c_DX [3][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} }; +__constant float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} }; +__constant float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} }; + +__inline int calcSize(int octave, int layer) +{ + /* Wavelet size at first layer of first octave. */ + const int HAAR_SIZE0 = 9; + + /* Wavelet size increment between layers. This should be an even number, + such that the wavelet sizes in an octave are either all even or all odd. + This ensures that when looking for the neighbours of a sample, the layers + above and below are aligned correctly. */ + const int HAAR_SIZE_INC = 6; + + return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; +} + + +//calculate targeted layer per-pixel determinant and trace with an integral image +__kernel void icvCalcLayerDetAndTrace( + image2d_t sumTex, // input integral image + __global float * det, // output Determinant + __global float * trace, // output trace + int det_step, // the step of det in bytes + int trace_step, // the step of trace in bytes + int c_img_rows, + int c_img_cols, + int c_nOctaveLayers, + int c_octave, + int c_layer_rows + ) +{ + det_step /= sizeof(*det); + trace_step /= sizeof(*trace); + // Determine the indices + const int gridDim_y = get_num_groups(1) / (c_nOctaveLayers + 2); + const int blockIdx_y = get_group_id(1) % gridDim_y; + const int blockIdx_z = get_group_id(1) / gridDim_y; + + const int j = get_local_id(0) + get_group_id(0) * get_local_size(0); + const int i = get_local_id(1) + blockIdx_y * get_local_size(1); + const int layer = blockIdx_z; + + const int size = calcSize(c_octave, layer); + + const int samples_i = 1 + ((c_img_rows - size) >> c_octave); + const int samples_j = 1 + ((c_img_cols - size) >> c_octave); + + // Ignore pixels where some of the kernel is outside the image + const int margin = (size >> 1) >> c_octave; + + if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j) + { + const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave); + const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave); + const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave); + + det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy; + trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy; + } +} + + +//////////////////////////////////////////////////////////////////////// +// NONMAX + +__constant float c_DM[5] = {0, 0, 9, 9, 1}; + +bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size) +{ + float ratio = (float)size / 9.0f; + + float d = 0; + + int dx1 = convert_int_rte(ratio * c_DM[0]); + int dy1 = convert_int_rte(ratio * c_DM[1]); + int dx2 = convert_int_rte(ratio * c_DM[2]); + int dy2 = convert_int_rte(ratio * c_DM[3]); + + float t = 0; + + t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1)).x; + t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2)).x; + t -= read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1)).x; + t += read_imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2)).x; + + d += t * c_DM[4] / ((dx2 - dx1) * (dy2 - dy1)); + + return (d >= 0.5f); +} + +// Non-maximal suppression to further filtering the candidates from previous step +__kernel + void icvFindMaximaInLayer_withmask( + __global const float * det, + __global const float * trace, + __global int4 * maxPosBuffer, + volatile __global unsigned int* maxCounter, + int counter_offset, + int det_step, // the step of det in bytes + int trace_step, // the step of trace in bytes + int c_img_rows, + int c_img_cols, + int c_nOctaveLayers, + int c_octave, + int c_layer_rows, + int c_layer_cols, + int c_max_candidates, + float c_hessianThreshold, + image2d_t maskSumTex + ) +{ + volatile __local float N9[768]; // threads.x * threads.y * 3 + + det_step /= sizeof(*det); + trace_step /= sizeof(*trace); + maxCounter += counter_offset; + + // Determine the indices + const int gridDim_y = get_num_groups(1) / c_nOctaveLayers; + const int blockIdx_y = get_group_id(1) % gridDim_y; + const int blockIdx_z = get_group_id(1) / gridDim_y; + + const int layer = blockIdx_z + 1; + + const int size = calcSize(c_octave, layer); + + // Ignore pixels without a 3x3x3 neighbourhood in the layer above + const int margin = ((calcSize(c_octave, layer + 1) >> 1) >> c_octave) + 1; + + const int j = get_local_id(0) + get_group_id(0) * (get_local_size(0) - 2) + margin - 1; + const int i = get_local_id(1) + blockIdx_y * (get_local_size(1) - 2) + margin - 1; + + // Is this thread within the hessian buffer? + const int zoff = get_local_size(0) * get_local_size(1); + const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff; + N9[localLin - zoff] = + det[det_step * + (c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1)) // y + + min(max(j, 0), c_img_cols - 1)]; // x + N9[localLin ] = + det[det_step * + (c_layer_rows * (layer ) + min(max(i, 0), c_img_rows - 1)) // y + + min(max(j, 0), c_img_cols - 1)]; // x + N9[localLin + zoff] = + det[det_step * + (c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1)) // y + + min(max(j, 0), c_img_cols - 1)]; // x + + barrier(CLK_LOCAL_MEM_FENCE); + + if (i < c_layer_rows - margin + && j < c_layer_cols - margin + && get_local_id(0) > 0 + && get_local_id(0) < get_local_size(0) - 1 + && get_local_id(1) > 0 + && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA + ) + { + float val0 = N9[localLin]; + + if (val0 > c_hessianThreshold) + { + // Coordinates for the start of the wavelet in the sum image. There + // is some integer division involved, so don't try to simplify this + // (cancel out sampleStep) without checking the result is the same + const int sum_i = (i - ((size >> 1) >> c_octave)) << c_octave; + const int sum_j = (j - ((size >> 1) >> c_octave)) << c_octave; + + if (within_check(maskSumTex, sum_i, sum_j, size)) + { + // Check to see if we have a max (in its 26 neighbours) + const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff] + && val0 > N9[localLin - get_local_size(0) - zoff] + && val0 > N9[localLin + 1 - get_local_size(0) - zoff] + && val0 > N9[localLin - 1 - zoff] + && val0 > N9[localLin - zoff] + && val0 > N9[localLin + 1 - zoff] + && val0 > N9[localLin - 1 + get_local_size(0) - zoff] + && val0 > N9[localLin + get_local_size(0) - zoff] + && val0 > N9[localLin + 1 + get_local_size(0) - zoff] + + && val0 > N9[localLin - 1 - get_local_size(0)] + && val0 > N9[localLin - get_local_size(0)] + && val0 > N9[localLin + 1 - get_local_size(0)] + && val0 > N9[localLin - 1 ] + && val0 > N9[localLin + 1 ] + && val0 > N9[localLin - 1 + get_local_size(0)] + && val0 > N9[localLin + get_local_size(0)] + && val0 > N9[localLin + 1 + get_local_size(0)] + + && val0 > N9[localLin - 1 - get_local_size(0) + zoff] + && val0 > N9[localLin - get_local_size(0) + zoff] + && val0 > N9[localLin + 1 - get_local_size(0) + zoff] + && val0 > N9[localLin - 1 + zoff] + && val0 > N9[localLin + zoff] + && val0 > N9[localLin + 1 + zoff] + && val0 > N9[localLin - 1 + get_local_size(0) + zoff] + && val0 > N9[localLin + get_local_size(0) + zoff] + && val0 > N9[localLin + 1 + get_local_size(0) + zoff] + ; + + if(condmax) + { + unsigned int ind = atomic_inc(maxCounter); + + if (ind < c_max_candidates) + { + const int laplacian = (int) copysign(1.0f, trace[trace_step* (layer * c_layer_rows + i) + j]); + + maxPosBuffer[ind] = (int4)(j, i, layer, laplacian); + } + } + } + } + } +} + +__kernel + void icvFindMaximaInLayer( + __global float * det, + __global float * trace, + __global int4 * maxPosBuffer, + volatile __global unsigned int* maxCounter, + int counter_offset, + int det_step, // the step of det in bytes + int trace_step, // the step of trace in bytes + int c_img_rows, + int c_img_cols, + int c_nOctaveLayers, + int c_octave, + int c_layer_rows, + int c_layer_cols, + int c_max_candidates, + float c_hessianThreshold + ) +{ + volatile __local float N9[768]; // threads.x * threads.y * 3 + + det_step /= sizeof(float); + trace_step /= sizeof(float); + maxCounter += counter_offset; + + // Determine the indices + const int gridDim_y = get_num_groups(1) / c_nOctaveLayers; + const int blockIdx_y = get_group_id(1) % gridDim_y; + const int blockIdx_z = get_group_id(1) / gridDim_y; + + const int layer = blockIdx_z + 1; + + const int size = calcSize(c_octave, layer); + + // Ignore pixels without a 3x3x3 neighbourhood in the layer above + const int margin = ((calcSize(c_octave, layer + 1) >> 1) >> c_octave) + 1; + + const int j = get_local_id(0) + get_group_id(0) * (get_local_size(0) - 2) + margin - 1; + const int i = get_local_id(1) + blockIdx_y * (get_local_size(1) - 2) + margin - 1; + + // Is this thread within the hessian buffer? + const int zoff = get_local_size(0) * get_local_size(1); + const int localLin = get_local_id(0) + get_local_id(1) * get_local_size(0) + zoff; + + int l_x = min(max(j, 0), c_img_cols - 1); + int l_y = c_layer_rows * layer + min(max(i, 0), c_img_rows - 1); + + N9[localLin - zoff] = + det[det_step * (l_y - c_layer_rows) + l_x]; + N9[localLin ] = + det[det_step * (l_y ) + l_x]; + N9[localLin + zoff] = + det[det_step * (l_y + c_layer_rows) + l_x]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (i < c_layer_rows - margin + && j < c_layer_cols - margin + && get_local_id(0) > 0 + && get_local_id(0) < get_local_size(0) - 1 + && get_local_id(1) > 0 + && get_local_id(1) < get_local_size(1) - 1 // these are unnecessary conditions ported from CUDA + ) + { + float val0 = N9[localLin]; + if (val0 > c_hessianThreshold) + { + //printf(\"(%3d, %3d) N9[%3d]=%7.1f val0=%7.1f\\n\", l_x, l_y, localLin - zoff, N9[localLin], val0); + // Coordinates for the start of the wavelet in the sum image. There + // is some integer division involved, so don't try to simplify this + // (cancel out sampleStep) without checking the result is the same + + // Check to see if we have a max (in its 26 neighbours) + const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff] + && val0 > N9[localLin - get_local_size(0) - zoff] + && val0 > N9[localLin + 1 - get_local_size(0) - zoff] + && val0 > N9[localLin - 1 - zoff] + && val0 > N9[localLin - zoff] + && val0 > N9[localLin + 1 - zoff] + && val0 > N9[localLin - 1 + get_local_size(0) - zoff] + && val0 > N9[localLin + get_local_size(0) - zoff] + && val0 > N9[localLin + 1 + get_local_size(0) - zoff] + + && val0 > N9[localLin - 1 - get_local_size(0)] + && val0 > N9[localLin - get_local_size(0)] + && val0 > N9[localLin + 1 - get_local_size(0)] + && val0 > N9[localLin - 1 ] + && val0 > N9[localLin + 1 ] + && val0 > N9[localLin - 1 + get_local_size(0)] + && val0 > N9[localLin + get_local_size(0)] + && val0 > N9[localLin + 1 + get_local_size(0)] + + && val0 > N9[localLin - 1 - get_local_size(0) + zoff] + && val0 > N9[localLin - get_local_size(0) + zoff] + && val0 > N9[localLin + 1 - get_local_size(0) + zoff] + && val0 > N9[localLin - 1 + zoff] + && val0 > N9[localLin + zoff] + && val0 > N9[localLin + 1 + zoff] + && val0 > N9[localLin - 1 + get_local_size(0) + zoff] + && val0 > N9[localLin + get_local_size(0) + zoff] + && val0 > N9[localLin + 1 + get_local_size(0) + zoff] + ; + + if(condmax) + { + unsigned int ind = atomic_inc(maxCounter); + + if (ind < c_max_candidates) + { + const int laplacian = (int) copysign(1.0f, trace[trace_step* (layer * c_layer_rows + i) + j]); + + maxPosBuffer[ind] = (int4)(j, i, layer, laplacian); + } + } + } + } +} + +// solve 3x3 linear system Ax=b for floating point input +inline bool solve3x3_float(volatile __local const float A[3][3], volatile __local const float b[3], volatile __local float x[3]) +{ + float det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) + - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]); + + if (det != 0) + { + F invdet = 1.0 / det; + + x[0] = invdet * + (b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) - + A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) + + A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )); + + x[1] = invdet * + (A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) - + b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) + + A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])); + + x[2] = invdet * + (A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) - + A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) + + b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])); + + return true; + } + return false; +} + +#define X_ROW 0 +#define Y_ROW 1 +#define LAPLACIAN_ROW 2 +#define OCTAVE_ROW 3 +#define SIZE_ROW 4 +#define ANGLE_ROW 5 +#define HESSIAN_ROW 6 +#define ROWS_COUNT 7 + +//////////////////////////////////////////////////////////////////////// +// INTERPOLATION +__kernel + void icvInterpolateKeypoint( + __global const float * det, + __global const int4 * maxPosBuffer, + __global float * keypoints, + volatile __global unsigned int * featureCounter, + int det_step, + int keypoints_step, + int c_img_rows, + int c_img_cols, + int c_octave, + int c_layer_rows, + int c_max_features + ) +{ + det_step /= sizeof(*det); + keypoints_step /= sizeof(*keypoints); + __global float * featureX = keypoints + X_ROW * keypoints_step; + __global float * featureY = keypoints + Y_ROW * keypoints_step; + __global int * featureLaplacian = (__global int *)keypoints + LAPLACIAN_ROW * keypoints_step; + __global int * featureOctave = (__global int *)keypoints + OCTAVE_ROW * keypoints_step; + __global float * featureSize = keypoints + SIZE_ROW * keypoints_step; + __global float * featureHessian = keypoints + HESSIAN_ROW * keypoints_step; + + const int4 maxPos = maxPosBuffer[get_group_id(0)]; + + const int j = maxPos.x - 1 + get_local_id(0); + const int i = maxPos.y - 1 + get_local_id(1); + const int layer = maxPos.z - 1 + get_local_id(2); + + volatile __local float N9[3][3][3]; + + N9[get_local_id(2)][get_local_id(1)][get_local_id(0)] = + det[det_step * (c_layer_rows * layer + i) + j]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0) + { + volatile __local float dD[3]; + + //dx + dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]); + //dy + dD[1] = -0.5f * (N9[1][2][1] - N9[1][0][1]); + //ds + dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]); + + volatile __local float H[3][3]; + + //dxx + H[0][0] = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2]; + //dxy + H[0][1]= 0.25f * (N9[1][2][2] - N9[1][2][0] - N9[1][0][2] + N9[1][0][0]); + //dxs + H[0][2]= 0.25f * (N9[2][1][2] - N9[2][1][0] - N9[0][1][2] + N9[0][1][0]); + //dyx = dxy + H[1][0] = H[0][1]; + //dyy + H[1][1] = N9[1][0][1] - 2.0f * N9[1][1][1] + N9[1][2][1]; + //dys + H[1][2]= 0.25f * (N9[2][2][1] - N9[2][0][1] - N9[0][2][1] + N9[0][0][1]); + //dsx = dxs + H[2][0] = H[0][2]; + //dsy = dys + H[2][1] = H[1][2]; + //dss + H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1]; + + volatile __local float x[3]; + + if (solve3x3_float(H, dD, x)) + { + if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f) + { + // if the step is within the interpolation region, perform it + + const int size = calcSize(c_octave, maxPos.z); + + const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave; + const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave; + + const float center_i = sum_i + (float)(size - 1) / 2; + const float center_j = sum_j + (float)(size - 1) / 2; + + const float px = center_j + x[0] * (1 << c_octave); + const float py = center_i + x[1] * (1 << c_octave); + + const int ds = size - calcSize(c_octave, maxPos.z - 1); + const float psize = round(size + x[2] * ds); + + /* The sampling intervals and wavelet sized for selecting an orientation + and building the keypoint descriptor are defined relative to 's' */ + const float s = psize * 1.2f / 9.0f; + + /* To find the dominant orientation, the gradients in x and y are + sampled in a circle of radius 6s using wavelets of size 4s. + We ensure the gradient wavelet size is even to ensure the + wavelet pattern is balanced and symmetric around its center */ + const int grad_wav_size = 2 * convert_int_rte(2.0f * s); + + // check when grad_wav_size is too big + if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size) + { + // Get a new feature index. + unsigned int ind = atomic_inc(featureCounter); + + if (ind < c_max_features) + { + featureX[ind] = px; + featureY[ind] = py; + featureLaplacian[ind] = maxPos.w; + featureOctave[ind] = c_octave; + featureSize[ind] = psize; + featureHessian[ind] = N9[1][1][1]; + } + } // grad_wav_size check + } // If the subpixel interpolation worked + } + } // If this is thread 0. +} + +//////////////////////////////////////////////////////////////////////// +// Orientation + +#define ORI_SEARCH_INC 5 +#define ORI_WIN 60 +#define ORI_SAMPLES 113 + +__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6}; +__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0}; +__constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448408f, 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, + 0.003238451667129993f, 0.002547456417232752f, 0.001707611023448408f, 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, + 0.00665318313986063f, 0.00720730796456337f, 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, + 0.002003900473937392f, 0.001707611023448408f, 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, + 0.01164754293859005f, 0.01261763460934162f, 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, + 0.0035081731621176f, 0.001707611023448408f, 0.002547456417232752f, 0.005233579315245152f, 0.009162282571196556f, + 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, 0.01366852037608624f, + 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.003238451667129993f, 0.00665318313986063f, + 0.01164754293859005f, 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, + 0.01737609319388866f, 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.001455130288377404f, + 0.0035081731621176f, 0.00720730796456337f, 0.01261763460934162f, 0.0188232995569706f, 0.02392910048365593f, + 0.02592208795249462f, 0.02392910048365593f, 0.0188232995569706f, 0.01261763460934162f, 0.00720730796456337f, + 0.0035081731621176f, 0.001455130288377404f, 0.003238451667129993f, 0.00665318313986063f, 0.01164754293859005f, + 0.01737609319388866f, 0.02208934165537357f, 0.02392910048365593f, 0.02208934165537357f, 0.01737609319388866f, + 0.01164754293859005f, 0.00665318313986063f, 0.003238451667129993f, 0.002547456417232752f, 0.005233579315245152f, + 0.009162282571196556f, 0.01366852037608624f, 0.01737609319388866f, 0.0188232995569706f, 0.01737609319388866f, + 0.01366852037608624f, 0.009162282571196556f, 0.005233579315245152f, 0.002547456417232752f, 0.001707611023448408f, + 0.0035081731621176f, 0.006141661666333675f, 0.009162282571196556f, 0.01164754293859005f, 0.01261763460934162f, + 0.01164754293859005f, 0.009162282571196556f, 0.006141661666333675f, 0.0035081731621176f, 0.001707611023448408f, + 0.002003900473937392f, 0.0035081731621176f, 0.005233579315245152f, 0.00665318313986063f, 0.00720730796456337f, + 0.00665318313986063f, 0.005233579315245152f, 0.0035081731621176f, 0.002003900473937392f, 0.001707611023448408f, + 0.002547456417232752f, 0.003238451667129993f, 0.0035081731621176f, 0.003238451667129993f, 0.002547456417232752f, + 0.001707611023448408f, 0.001455130288377404f}; + +__constant float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}}; +__constant float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}}; + +void reduce_32_sum(volatile __local float * data, float partial_reduction, int tid) +{ +#define op(A, B) (A)+(B) + data[tid] = partial_reduction; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]); + data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]); + data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]); + data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]); + data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]); + } +#undef op +} + +__kernel + void icvCalcOrientation( + image2d_t sumTex, + __global float * keypoints, + int keypoints_step, + int c_img_rows, + int c_img_cols + ) +{ + keypoints_step /= sizeof(*keypoints); + __global float* featureX = keypoints + X_ROW * keypoints_step; + __global float* featureY = keypoints + Y_ROW * keypoints_step; + __global float* featureSize = keypoints + SIZE_ROW * keypoints_step; + __global float* featureDir = keypoints + ANGLE_ROW * keypoints_step; + + volatile __local float s_X[128]; + volatile __local float s_Y[128]; + volatile __local float s_angle[128]; + + volatile __local float s_sumx[32 * 4]; + volatile __local float s_sumy[32 * 4]; + + /* The sampling intervals and wavelet sized for selecting an orientation + and building the keypoint descriptor are defined relative to 's' */ + const float s = featureSize[get_group_id(0)] * 1.2f / 9.0f; + + /* To find the dominant orientation, the gradients in x and y are + sampled in a circle of radius 6s using wavelets of size 4s. + We ensure the gradient wavelet size is even to ensure the + wavelet pattern is balanced and symmetric around its center */ + const int grad_wav_size = 2 * convert_int_rte(2.0f * s); + + // check when grad_wav_size is too big + if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size) + return; + + // Calc X, Y, angle and store it to shared memory + const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); + + float X = 0.0f, Y = 0.0f, angle = 0.0f; + + if (tid < ORI_SAMPLES) + { + const float margin = (float)(grad_wav_size - 1) / 2.0f; + const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin); + const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin); + + if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size && + x >= 0 && x < (c_img_cols + 1) - grad_wav_size) + { + X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x); + Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x); + + angle = atan2(Y, X); + if (angle < 0) + angle += 2.0f * CV_PI_F; + angle *= 180.0f / CV_PI_F; + } + } + s_X[tid] = X; + s_Y[tid] = Y; + s_angle[tid] = angle; + barrier(CLK_LOCAL_MEM_FENCE); + + float bestx = 0, besty = 0, best_mod = 0; + +#pragma unroll + for (int i = 0; i < 18; ++i) + { + const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC; + + float sumx = 0.0f, sumy = 0.0f; + int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx = s_X[get_local_id(0)]; + sumy = s_Y[get_local_id(0)]; + } + d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[get_local_id(0) + 32]; + sumy += s_Y[get_local_id(0) + 32]; + } + d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[get_local_id(0) + 64]; + sumy += s_Y[get_local_id(0) + 64]; + } + d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir); + if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) + { + sumx += s_X[get_local_id(0) + 96]; + sumy += s_Y[get_local_id(0) + 96]; + } + + reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0)); + reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0)); + + const float temp_mod = sumx * sumx + sumy * sumy; + if (temp_mod > best_mod) + { + best_mod = temp_mod; + bestx = sumx; + besty = sumy; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (get_local_id(0) == 0) + { + s_X[get_local_id(1)] = bestx; + s_Y[get_local_id(1)] = besty; + s_angle[get_local_id(1)] = best_mod; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(1) == 0 && get_local_id(0) == 0) + { + int bestIdx = 0; + + if (s_angle[1] > s_angle[bestIdx]) + bestIdx = 1; + if (s_angle[2] > s_angle[bestIdx]) + bestIdx = 2; + if (s_angle[3] > s_angle[bestIdx]) + bestIdx = 3; + + float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]); + if (kp_dir < 0) + kp_dir += 2.0f * CV_PI_F; + kp_dir *= 180.0f / CV_PI_F; + + featureDir[get_group_id(0)] = kp_dir; + } +} + +#undef ORI_SEARCH_INC +#undef ORI_WIN +#undef ORI_SAMPLES + +//////////////////////////////////////////////////////////////////////// +// Descriptors + +#define PATCH_SZ 20 + +__constant float c_DW[PATCH_SZ * PATCH_SZ] = +{ + 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f, + 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, + 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, + 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, + 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, + 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, + 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, + 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, + 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, + 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, + 0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f, + 0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f, + 0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f, + 0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f, + 9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f, + 5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f, + 3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f, + 1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f, + 8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f, + 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f +}; + +// utility for linear filter +inline uchar readerGet( + image2d_t src, + const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir, + int i, int j + ) +{ + float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir; + float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir; + return (uchar)read_imageui(src, sampler, (float2)(pixel_x, pixel_y)).x; +} + +inline float linearFilter( + image2d_t src, + const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir, + float y, float x + ) +{ + x -= 0.5f; + y -= 0.5f; + + float out = 0.0f; + + const int x1 = convert_int_rtn(x); + const int y1 = convert_int_rtn(y); + const int x2 = x1 + 1; + const int y2 = y1 + 1; + + uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1); + out = out + src_reg * ((x2 - x) * (y2 - y)); + + src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2); + out = out + src_reg * ((x - x1) * (y2 - y)); + + src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1); + out = out + src_reg * ((x2 - x) * (y - y1)); + + src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2); + out = out + src_reg * ((x - x1) * (y - y1)); + + return out; +} + +void calc_dx_dy( + image2d_t imgTex, + volatile __local float s_dx_bin[25], + volatile __local float s_dy_bin[25], + volatile __local float s_PATCH[6][6], + __global const float* featureX, + __global const float* featureY, + __global const float* featureSize, + __global const float* featureDir + ) +{ + const float centerX = featureX[get_group_id(0)]; + const float centerY = featureY[get_group_id(0)]; + const float size = featureSize[get_group_id(0)]; + const float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f); + + /* The sampling intervals and wavelet sized for selecting an orientation + and building the keypoint descriptor are defined relative to 's' */ + const float s = size * 1.2f / 9.0f; + + /* Extract a window of pixels around the keypoint of size 20s */ + const int win_size = (int)((PATCH_SZ + 1) * s); + + float sin_dir; + float cos_dir; + sin_dir = sincos(descriptor_dir, &cos_dir); + + /* Nearest neighbour version (faster) */ + const float win_offset = -(float)(win_size - 1) / 2; + + // Compute sampling points + // since grids are 2D, need to compute xBlock and yBlock indices + const int xBlock = (get_group_id(1) & 3); // get_group_id(1) % 4 + const int yBlock = (get_group_id(1) >> 2); // floor(get_group_id(1)/4) + const int xIndex = xBlock * 5 + get_local_id(0); + const int yIndex = yBlock * 5 + get_local_id(1); + + const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size; + const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size; + + s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo); + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 5 && get_local_id(1) < 5) + { + const int tid = get_local_id(1) * 5 + get_local_id(0); + + const float dw = c_DW[yIndex * PATCH_SZ + xIndex]; + + const float vx = ( + s_PATCH[get_local_id(1) ][get_local_id(0) + 1] - + s_PATCH[get_local_id(1) ][get_local_id(0) ] + + s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - + s_PATCH[get_local_id(1) + 1][get_local_id(0) ]) + * dw; + const float vy = ( + s_PATCH[get_local_id(1) + 1][get_local_id(0) ] - + s_PATCH[get_local_id(1) ][get_local_id(0) ] + + s_PATCH[get_local_id(1) + 1][get_local_id(0) + 1] - + s_PATCH[get_local_id(1) ][get_local_id(0) + 1]) + * dw; + s_dx_bin[tid] = vx; + s_dy_bin[tid] = vy; + } +} +void reduce_sum25( + volatile __local float* sdata1, + volatile __local float* sdata2, + volatile __local float* sdata3, + volatile __local float* sdata4, + int tid + ) +{ + // first step is to reduce from 25 to 16 + if (tid < 9) // use 9 threads + { + sdata1[tid] += sdata1[tid + 16]; + sdata2[tid] += sdata2[tid + 16]; + sdata3[tid] += sdata3[tid + 16]; + sdata4[tid] += sdata4[tid + 16]; + } + + // sum (reduce) from 16 to 1 (unrolled - aligned to a half-warp) + if (tid < 8) + { + sdata1[tid] += sdata1[tid + 8]; + sdata1[tid] += sdata1[tid + 4]; + sdata1[tid] += sdata1[tid + 2]; + sdata1[tid] += sdata1[tid + 1]; + + sdata2[tid] += sdata2[tid + 8]; + sdata2[tid] += sdata2[tid + 4]; + sdata2[tid] += sdata2[tid + 2]; + sdata2[tid] += sdata2[tid + 1]; + + sdata3[tid] += sdata3[tid + 8]; + sdata3[tid] += sdata3[tid + 4]; + sdata3[tid] += sdata3[tid + 2]; + sdata3[tid] += sdata3[tid + 1]; + + sdata4[tid] += sdata4[tid + 8]; + sdata4[tid] += sdata4[tid + 4]; + sdata4[tid] += sdata4[tid + 2]; + sdata4[tid] += sdata4[tid + 1]; + } +} + +__kernel + void compute_descriptors64( + image2d_t imgTex, + volatile __global float * descriptors, + __global const float * keypoints, + int descriptors_step, + int keypoints_step + ) +{ + descriptors_step /= sizeof(float); + keypoints_step /= sizeof(float); + + __global const float * featureX = keypoints + X_ROW * keypoints_step; + __global const float * featureY = keypoints + Y_ROW * keypoints_step; + __global const float * featureSize = keypoints + SIZE_ROW * keypoints_step; + __global const float * featureDir = keypoints + ANGLE_ROW * keypoints_step; + + // 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region) + volatile __local float sdx[25]; + volatile __local float sdy[25]; + volatile __local float sdxabs[25]; + volatile __local float sdyabs[25]; + volatile __local float s_PATCH[6][6]; + + calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir); + barrier(CLK_LOCAL_MEM_FENCE); + + const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); + + if (tid < 25) + { + sdxabs[tid] = fabs(sdx[tid]); // |dx| array + sdyabs[tid] = fabs(sdy[tid]); // |dy| array + barrier(CLK_LOCAL_MEM_FENCE); + + reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2); + + // write dx, dy, |dx|, |dy| + if (tid == 0) + { + descriptors_block[0] = sdx[0]; + descriptors_block[1] = sdy[0]; + descriptors_block[2] = sdxabs[0]; + descriptors_block[3] = sdyabs[0]; + } + } +} +__kernel + void compute_descriptors128( + image2d_t imgTex, + __global volatile float * descriptors, + __global float * keypoints, + int descriptors_step, + int keypoints_step + ) +{ + descriptors_step /= sizeof(*descriptors); + keypoints_step /= sizeof(*keypoints); + + __global float * featureX = keypoints + X_ROW * keypoints_step; + __global float * featureY = keypoints + Y_ROW * keypoints_step; + __global float* featureSize = keypoints + SIZE_ROW * keypoints_step; + __global float* featureDir = keypoints + ANGLE_ROW * keypoints_step; + + // 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region) + volatile __local float sdx[25]; + volatile __local float sdy[25]; + + // sum (reduce) 5x5 area response + volatile __local float sd1[25]; + volatile __local float sd2[25]; + volatile __local float sdabs1[25]; + volatile __local float sdabs2[25]; + volatile __local float s_PATCH[6][6]; + + calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir); + barrier(CLK_LOCAL_MEM_FENCE); + + const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); + + if (tid < 25) + { + if (sdy[tid] >= 0) + { + sd1[tid] = sdx[tid]; + sdabs1[tid] = fabs(sdx[tid]); + sd2[tid] = 0; + sdabs2[tid] = 0; + } + else + { + sd1[tid] = 0; + sdabs1[tid] = 0; + sd2[tid] = sdx[tid]; + sdabs2[tid] = fabs(sdx[tid]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3); + + // write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0) + if (tid == 0) + { + descriptors_block[0] = sd1[0]; + descriptors_block[1] = sdabs1[0]; + descriptors_block[2] = sd2[0]; + descriptors_block[3] = sdabs2[0]; + } + + if (sdx[tid] >= 0) + { + sd1[tid] = sdy[tid]; + sdabs1[tid] = fabs(sdy[tid]); + sd2[tid] = 0; + sdabs2[tid] = 0; + } + else + { + sd1[tid] = 0; + sdabs1[tid] = 0; + sd2[tid] = sdy[tid]; + sdabs2[tid] = fabs(sdy[tid]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid); + barrier(CLK_LOCAL_MEM_FENCE); + + // write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy| (dx < 0) + if (tid == 0) + { + descriptors_block[4] = sd1[0]; + descriptors_block[5] = sdabs1[0]; + descriptors_block[6] = sd2[0]; + descriptors_block[7] = sdabs2[0]; + } + } +} + +__kernel + void normalize_descriptors128(__global float * descriptors, int descriptors_step) +{ + descriptors_step /= sizeof(*descriptors); + // no need for thread ID + __global float* descriptor_base = descriptors + descriptors_step * get_group_id(0); + + // read in the unnormalized descriptor values (squared) + volatile __local float sqDesc[128]; + const float lookup = descriptor_base[get_local_id(0)]; + sqDesc[get_local_id(0)] = lookup * lookup; + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < 64) + sqDesc[get_local_id(0)] += sqDesc[get_local_id(0) + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + // reduction to get total + if (get_local_id(0) < 32) + { + volatile __local float* smem = sqDesc; + + smem[get_local_id(0)] += smem[get_local_id(0) + 32]; + smem[get_local_id(0)] += smem[get_local_id(0) + 16]; + smem[get_local_id(0)] += smem[get_local_id(0) + 8]; + smem[get_local_id(0)] += smem[get_local_id(0) + 4]; + smem[get_local_id(0)] += smem[get_local_id(0) + 2]; + smem[get_local_id(0)] += smem[get_local_id(0) + 1]; + } + + // compute length (square root) + volatile __local float len; + if (get_local_id(0) == 0) + { + len = sqrt(sqDesc[0]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + // normalize and store in output + descriptor_base[get_local_id(0)] = lookup / len; +} +__kernel + void normalize_descriptors64(__global float * descriptors, int descriptors_step) +{ + descriptors_step /= sizeof(*descriptors); + // no need for thread ID + __global float* descriptor_base = descriptors + descriptors_step * get_group_id(0); + + // read in the unnormalized descriptor values (squared) + volatile __local float sqDesc[64]; + const float lookup = descriptor_base[get_local_id(0)]; + sqDesc[get_local_id(0)] = lookup * lookup; + barrier(CLK_LOCAL_MEM_FENCE); + + // reduction to get total + if (get_local_id(0) < 32) + { + volatile __local float* smem = sqDesc; + + smem[get_local_id(0)] += smem[get_local_id(0) + 32]; + smem[get_local_id(0)] += smem[get_local_id(0) + 16]; + smem[get_local_id(0)] += smem[get_local_id(0) + 8]; + smem[get_local_id(0)] += smem[get_local_id(0) + 4]; + smem[get_local_id(0)] += smem[get_local_id(0) + 2]; + smem[get_local_id(0)] += smem[get_local_id(0) + 1]; + } + + // compute length (square root) + volatile __local float len; + if (get_local_id(0) == 0) + { + len = sqrt(sqDesc[0]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + // normalize and store in output + descriptor_base[get_local_id(0)] = lookup / len; +} diff --git a/modules/ocl/src/kernels/objdetect_hog.cl b/modules/ocl/src/kernels/objdetect_hog.cl new file mode 100644 index 0000000..4a950fa --- /dev/null +++ b/modules/ocl/src/kernels/objdetect_hog.cl @@ -0,0 +1,450 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Wenju He, wenju@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#define CELL_WIDTH 8 +#define CELL_HEIGHT 8 +#define CELLS_PER_BLOCK_X 2 +#define CELLS_PER_BLOCK_Y 2 +#define NTHREADS 256 +#define CV_PI_F 3.1415926535897932384626433832795f + +//---------------------------------------------------------------------------- +// Histogram computation + +__kernel void compute_hists_kernel(const int width, const int cblock_stride_x, const int cblock_stride_y, + const int cnbins, const int cblock_hist_size, const int img_block_width, + const int grad_quadstep, const int qangle_step, + __global const float* grad, __global const uchar* qangle, + const float scale, __global float* block_hists, __local float* smem) +{ + const int lidX = get_local_id(0); + const int lidY = get_local_id(1); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + const int cell_x = lidX / 16; + const int cell_y = lidY; + const int cell_thread_x = lidX & 0xF; + + __local float* hists = smem; + __local float* final_hist = smem + cnbins * 48; + + const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x; + const int offset_y = gidY * cblock_stride_y + (cell_y << 2); + + __global const float* grad_ptr = grad + offset_y * grad_quadstep + (offset_x << 1); + __global const uchar* qangle_ptr = qangle + offset_y * qangle_step + (offset_x << 1); + + // 12 means that 12 pixels affect on block's cell (in one row) + if (cell_thread_x < 12) + { + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + cell_thread_x; + for (int bin_id = 0; bin_id < cnbins; ++bin_id) + hist[bin_id * 48] = 0.f; + + const int dist_x = -4 + cell_thread_x - 4 * cell_x; + + const int dist_y_begin = -4 - 4 * lidY; + for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) + { + float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); + uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]); + + grad_ptr += grad_quadstep; + qangle_ptr += qangle_step; + + int dist_center_y = dist_y - 4 * (1 - 2 * cell_y); + int dist_center_x = dist_x - 4 * (1 - 2 * cell_x); + + float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x * dist_center_x) * scale); + float interp_weight = (8.f - fabs(dist_y + 0.5f)) * (8.f - fabs(dist_x + 0.5f)) / 64.f; + + hist[bin.x * 48] += gaussian * interp_weight * vote.x; + hist[bin.y * 48] += gaussian * interp_weight * vote.y; + } + + volatile __local float* hist_ = hist; + for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48) + { + if (cell_thread_x < 6) hist_[0] += hist_[6]; + if (cell_thread_x < 3) hist_[0] += hist_[3]; + if (cell_thread_x == 0) + final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + __global float* block_hist = block_hists + (gidY * img_block_width + gidX) * cblock_hist_size; + + int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x; + if (tid < cblock_hist_size) + block_hist[tid] = final_hist[tid]; +} + +//------------------------------------------------------------- +// Normalization of histograms via L2Hys_norm +// +float reduce_smem(volatile __local float* smem, int size) +{ + unsigned int tid = get_local_id(0); + float sum = smem[tid]; + + if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); } + + if (tid < 32) + { + if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; + if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; + if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; + if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; + if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; + if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0]; + + return sum; +} + +__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width, + __global float* block_hists, const float threshold, __local float *squares) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid; + + float elem = 0.f; + if (tid < block_hist_size) + elem = hist[0]; + + squares[tid] = elem * elem; + + barrier(CLK_LOCAL_MEM_FENCE); + float sum = reduce_smem(squares, nthreads); + + float scale = 1.0f / (sqrt(sum) + 0.1f * block_hist_size); + elem = min(elem * scale, threshold); + + barrier(CLK_LOCAL_MEM_FENCE); + squares[tid] = elem * elem; + + barrier(CLK_LOCAL_MEM_FENCE); + sum = reduce_smem(squares, nthreads); + scale = 1.0f / (sqrt(sum) + 1e-3f); + + if (tid < block_hist_size) + hist[0] = elem * scale; +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// +__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + } + + __local float products[NTHREADS]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) products[tid] = product = product + products[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) products[tid] = product = product + products[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 32) + { + volatile __local float* smem = products; + smem[tid] = product = product + smem[tid + 32]; + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + smem[tid] = product = product + smem[tid + 1]; + } + + if (tid == 0) + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); +} + +//---------------------------------------------------------------------------- +// Extract descriptors + +__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width, + const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) +{ + int tid = get_local_id(0); + int gidX = get_group_id(0); + int gidY = get_group_id(1); + + // Get left top corner of the window in src + __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + // Get left top corner of the window in dst + __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + + // Copy elements from src to dst + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + } +} + +__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, + const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x, + const int win_block_stride_y, __global const float* block_hists, __global float* descriptors) +{ + int tid = get_local_id(0); + int gidX = get_group_id(0); + int gidY = get_group_id(1); + + // Get left top corner of the window in src + __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + // Get left top corner of the window in dst + __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + + // Copy elements from src to dst + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int block_idx = i / cblock_hist_size; + int idx_in_block = i - block_idx * cblock_hist_size; + + int y = block_idx / cnblocks_win_x; + int x = block_idx - y * cnblocks_win_x; + + descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; + } +} + +//---------------------------------------------------------------------------- +// Gradients computation + +__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, + const __global uchar4 * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) +{ + const int x = get_global_id(0); + const int tid = get_local_id(0); + const int gSizeX = get_local_size(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const uchar4* row = img + gidY * img_step; + + __local float sh_row[(NTHREADS + 2) * 3]; + + uchar4 val; + if (x < width) + val = row[x]; + else + val = row[width - 2]; + + sh_row[tid + 1] = val.x; + sh_row[tid + 1 + (NTHREADS + 2)] = val.y; + sh_row[tid + 1 + 2 * (NTHREADS + 2)] = val.z; + + if (tid == 0) + { + val = row[max(x - 1, 1)]; + sh_row[0] = val.x; + sh_row[(NTHREADS + 2)] = val.y; + sh_row[2 * (NTHREADS + 2)] = val.z; + } + + if (tid == gSizeX - 1) + { + val = row[min(x + 1, width - 2)]; + sh_row[gSizeX + 1] = val.x; + sh_row[gSizeX + 1 + (NTHREADS + 2)] = val.y; + sh_row[gSizeX + 1 + 2 * (NTHREADS + 2)] = val.z; + } + + barrier(CLK_LOCAL_MEM_FENCE); + if (x < width) + { + float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]); + float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]); + + float3 dx; + if (correct_gamma == 1) + dx = sqrt(b) - sqrt(a); + else + dx = b - a; + + float3 dy = (float3) 0.f; + + if (gidY > 0 && gidY < height - 1) + { + a = convert_float3(img[(gidY - 1) * img_step + x].xyz); + b = convert_float3(img[(gidY + 1) * img_step + x].xyz); + + if (correct_gamma == 1) + dy = sqrt(b) - sqrt(a); + else + dy = b - a; + } + + float best_dx = dx.x; + float best_dy = dy.x; + + float mag0 = dx.x * dx.x + dy.x * dy.x; + float mag1 = dx.y * dx.y + dy.y * dy.y; + if (mag0 < mag1) + { + best_dx = dx.y; + best_dy = dy.y; + mag0 = mag1; + } + + mag1 = dx.z * dx.z + dy.z * dy.z; + if (mag0 < mag1) + { + best_dx = dx.z; + best_dy = dy.z; + mag0 = mag1; + } + + mag0 = sqrt(mag0); + + float ang = (atan2(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f; + int hidx = (int)floor(ang); + ang -= hidx; + hidx = (hidx + cnbins) % cnbins; + + qangle[(gidY * qangle_step + x) << 1] = hidx; + qangle[((gidY * qangle_step + x) << 1) + 1] = (hidx + 1) % cnbins; + grad[(gidY * grad_quadstep + x) << 1] = mag0 * (1.f - ang); + grad[((gidY * grad_quadstep + x) << 1) + 1] = mag0 * ang; + } +} + +__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, + __global const uchar * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) +{ + const int x = get_global_id(0); + const int tid = get_local_id(0); + const int gSizeX = get_local_size(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const uchar* row = img + gidY * img_step; + + __local float sh_row[NTHREADS + 2]; + + if (x < width) + sh_row[tid + 1] = row[x]; + else + sh_row[tid + 1] = row[width - 2]; + + if (tid == 0) + sh_row[0] = row[max(x - 1, 1)]; + + if (tid == gSizeX - 1) + sh_row[gSizeX + 1] = row[min(x + 1, width - 2)]; + + barrier(CLK_LOCAL_MEM_FENCE); + if (x < width) + { + float dx; + + if (correct_gamma == 1) + dx = sqrt(sh_row[tid + 2]) - sqrt(sh_row[tid]); + else + dx = sh_row[tid + 2] - sh_row[tid]; + + float dy = 0.f; + if (gidY > 0 && gidY < height - 1) + { + float a = (float) img[ (gidY + 1) * img_step + x ]; + float b = (float) img[ (gidY - 1) * img_step + x ]; + if (correct_gamma == 1) + dy = sqrt(a) - sqrt(b); + else + dy = a - b; + } + float mag = sqrt(dx * dx + dy * dy); + + float ang = (atan2(dy, dx) + CV_PI_F) * angle_scale - 0.5f; + int hidx = (int)floor(ang); + ang -= hidx; + hidx = (hidx + cnbins) % cnbins; + + qangle[ (gidY * qangle_step + x) << 1 ] = hidx; + qangle[ ((gidY * qangle_step + x) << 1) + 1 ] = (hidx + 1) % cnbins; + grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang); + grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; + } +} diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp new file mode 100644 index 0000000..7d9798d --- /dev/null +++ b/modules/ocl/src/surf.cpp @@ -0,0 +1,760 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include +#include "precomp.hpp" + + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#if !defined (HAVE_OPENCL) + +cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); } +cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); } +int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;} +void cv::ocl::SURF_OCL::uploadKeypoints(const vector&, oclMat&) { throw_nogpu(); } +void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector&) { throw_nogpu(); } +void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector&) { throw_nogpu(); } +void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } +void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); } +void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&) { throw_nogpu(); } +void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&, oclMat&, bool) { throw_nogpu(); } +void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&, vector&, bool) { throw_nogpu(); } +void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); } + +#else /* !defined (HAVE_OPENCL) */ +namespace cv { namespace ocl +{ + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char * nonfree_surf; +}} + +namespace +{ + static inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + static inline int calcSize(int octave, int layer) + { + /* Wavelet size at first layer of first octave. */ + const int HAAR_SIZE0 = 9; + + /* Wavelet size increment between layers. This should be an even number, + such that the wavelet sizes in an octave are either all even or all odd. + This ensures that when looking for the neighbours of a sample, the layers + + above and below are aligned correctly. */ + const int HAAR_SIZE_INC = 6; + + return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; + } + + class SURF_OCL_Invoker + { + public: + // facilities + void bindImgTex(const oclMat& img); + void bindSumTex(const oclMat& sum); + void bindMaskSumTex(const oclMat& maskSum); + + //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold); + //void loadOctaveConstants(int octave, int layer_rows, int layer_cols); + + // kernel callers declearations + void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows); + + void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, + int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols); + + void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, + oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures); + + void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures); + + void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures); + // end of kernel callers declearations + + + SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) : + surf_(surf), + img_cols(img.cols), img_rows(img.rows), + use_mask(!mask.empty()) + { + CV_Assert(!img.empty() && img.type() == CV_8UC1); + CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1)); + CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0); + + const int min_size = calcSize(surf_.nOctaves - 1, 0); + CV_Assert(img_rows - min_size >= 0); + CV_Assert(img_cols - min_size >= 0); + + const int layer_rows = img_rows >> (surf_.nOctaves - 1); + const int layer_cols = img_cols >> (surf_.nOctaves - 1); + const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1; + CV_Assert(layer_rows - 2 * min_margin > 0); + CV_Assert(layer_cols - 2 * min_margin > 0); + + maxFeatures = std::min(static_cast(img.size().area() * surf.keypointsRatio), 65535); + maxCandidates = std::min(static_cast(1.5 * maxFeatures), 65535); + + CV_Assert(maxFeatures > 0); + + counters.create(1, surf_.nOctaves + 1, CV_32SC1); + counters.setTo(Scalar::all(0)); + + //loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast(surf_.hessianThreshold)); + + bindImgTex(img); + oclMat integral_sqsum; + integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect + + bindSumTex(surf_.sum); + maskSumTex = 0; + + if (use_mask) + { + throw std::exception(); + //!FIXME + // temp fix for missing min overload + oclMat temp(mask.size(), mask.type()); + temp.setTo(Scalar::all(1.0)); + //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this + integral(surf_.mask1, surf_.maskSum); + bindMaskSumTex(surf_.maskSum); + } + } + + void detectKeypoints(oclMat& keypoints) + { + // create image pyramid buffers + // different layers have same sized buffers, but they are sampled from gaussin kernel. + surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1); + surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1); + + surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4); + keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1); + keypoints.setTo(Scalar::all(0)); + + for (int octave = 0; octave < surf_.nOctaves; ++octave) + { + const int layer_rows = img_rows >> octave; + const int layer_cols = img_cols >> octave; + + //loadOctaveConstants(octave, layer_rows, layer_cols); + + icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows); + + icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave, + octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); + + unsigned int maxCounter = Mat(counters).at(1 + octave); + maxCounter = std::min(maxCounter, static_cast(maxCandidates)); + + if (maxCounter > 0) + { + icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter, + keypoints, counters, octave, layer_rows, maxFeatures); + } + } + unsigned int featureCounter = Mat(counters).at(0); + featureCounter = std::min(featureCounter, static_cast(maxFeatures)); + + keypoints.cols = featureCounter; + + if (surf_.upright) + keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0)); + else + findOrientation(keypoints); + } + + void findOrientation(oclMat& keypoints) + { + const int nFeatures = keypoints.cols; + if (nFeatures > 0) + { + icvCalcOrientation_gpu(keypoints, nFeatures); + } + } + + void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize) + { + const int nFeatures = keypoints.cols; + if (nFeatures > 0) + { + descriptors.create(nFeatures, descriptorSize, CV_32F); + compute_descriptors_gpu(descriptors, keypoints, nFeatures); + } + } + + ~SURF_OCL_Invoker() + { + if(imgTex) + openCLFree(imgTex); + if(sumTex) + openCLFree(sumTex); + if(maskSumTex) + openCLFree(maskSumTex); + additioalParamBuffer.release(); + } + + private: + SURF_OCL& surf_; + + int img_cols, img_rows; + + bool use_mask; + + int maxCandidates; + int maxFeatures; + + oclMat counters; + + // texture buffers + cl_mem imgTex; + cl_mem sumTex; + cl_mem maskSumTex; + + oclMat additioalParamBuffer; + }; +} + +cv::ocl::SURF_OCL::SURF_OCL() +{ + hessianThreshold = 100.0f; + extended = true; + nOctaves = 4; + nOctaveLayers = 2; + keypointsRatio = 0.01f; + upright = false; +} + +cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright) +{ + hessianThreshold = _threshold; + extended = _extended; + nOctaves = _nOctaves; + nOctaveLayers = _nOctaveLayers; + keypointsRatio = _keypointsRatio; + upright = _upright; +} + +int cv::ocl::SURF_OCL::descriptorSize() const +{ + return extended ? 128 : 64; +} + +void cv::ocl::SURF_OCL::uploadKeypoints(const vector& keypoints, oclMat& keypointsGPU) +{ + if (keypoints.empty()) + keypointsGPU.release(); + else + { + Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast(keypoints.size()), CV_32FC1); + + float* kp_x = keypointsCPU.ptr(SURF_OCL::X_ROW); + float* kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); + int* kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); + int* kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); + float* kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); + float* kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); + float* kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); + + for (size_t i = 0, size = keypoints.size(); i < size; ++i) + { + const KeyPoint& kp = keypoints[i]; + kp_x[i] = kp.pt.x; + kp_y[i] = kp.pt.y; + kp_octave[i] = kp.octave; + kp_size[i] = kp.size; + kp_dir[i] = kp.angle; + kp_hessian[i] = kp.response; + kp_laplacian[i] = 1; + } + + keypointsGPU.upload(keypointsCPU); + } +} + +void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector& keypoints) +{ + const int nFeatures = keypointsGPU.cols; + + if (nFeatures == 0) + keypoints.clear(); + else + { + CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT); + + Mat keypointsCPU(keypointsGPU); + + keypoints.resize(nFeatures); + + float* kp_x = keypointsCPU.ptr(SURF_OCL::X_ROW); + float* kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); + int* kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); + int* kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); + float* kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); + float* kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); + float* kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); + + for (int i = 0; i < nFeatures; ++i) + { + KeyPoint& kp = keypoints[i]; + kp.pt.x = kp_x[i]; + kp.pt.y = kp_y[i]; + kp.class_id = kp_laplacian[i]; + kp.octave = kp_octave[i]; + kp.size = kp_size[i]; + kp.angle = kp_dir[i]; + kp.response = kp_hessian[i]; + } + } +} + +void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector& descriptors) +{ + if (descriptorsGPU.empty()) + descriptors.clear(); + else + { + CV_Assert(descriptorsGPU.type() == CV_32F); + + descriptors.resize(descriptorsGPU.rows * descriptorsGPU.cols); + Mat descriptorsCPU(descriptorsGPU.size(), CV_32F, &descriptors[0]); + descriptorsGPU.download(descriptorsCPU); + } +} + +void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints) +{ + if (!img.empty()) + { + SURF_OCL_Invoker surf(*this, img, mask); + + surf.detectKeypoints(keypoints); + } +} + +void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors, + bool useProvidedKeypoints) +{ + if (!img.empty()) + { + SURF_OCL_Invoker surf(*this, img, mask); + + if (!useProvidedKeypoints) + surf.detectKeypoints(keypoints); + else if (!upright) + { + surf.findOrientation(keypoints); + } + + surf.computeDescriptors(keypoints, descriptors, descriptorSize()); + } +} + +void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints) +{ + oclMat keypointsGPU; + + (*this)(img, mask, keypointsGPU); + + downloadKeypoints(keypointsGPU, keypoints); +} + +void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints, + oclMat& descriptors, bool useProvidedKeypoints) +{ + oclMat keypointsGPU; + + if (useProvidedKeypoints) + uploadKeypoints(keypoints, keypointsGPU); + + (*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints); + + downloadKeypoints(keypointsGPU, keypoints); +} + +void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints, + vector& descriptors, bool useProvidedKeypoints) +{ + oclMat descriptorsGPU; + + (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints); + + downloadDescriptors(descriptorsGPU, descriptors); +} + +void cv::ocl::SURF_OCL::releaseMemory() +{ + sum.release(); + mask1.release(); + maskSum.release(); + intBuffer.release(); + det.release(); + trace.release(); + maxPosBuffer.release(); +} + +// Facilities + +//// load SURF constants into device memory +//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold) +//{ +// Mat tmp(1, 9, CV_32FC1); +// float * tmp_data = tmp.ptr(); +// *tmp_data = maxCandidates; +// *(++tmp_data) = maxFeatures; +// *(++tmp_data) = img_rows; +// *(++tmp_data) = img_cols; +// *(++tmp_data) = nOctaveLayers; +// *(++tmp_data) = hessianThreshold; +// additioalParamBuffer = tmp; +//} +//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols) +//{ +// Mat tmp = additioalParamBuffer; +// float * tmp_data = tmp.ptr(); +// tmp_data += 6; +// *tmp_data = octave; +// *(++tmp_data) = layer_rows; +// *(++tmp_data) = layer_cols; +// additioalParamBuffer = tmp; +//} + +// create and bind source buffer to image oject. +void SURF_OCL_Invoker::bindImgTex(const oclMat& img) +{ + Mat cpu_img(img); // time consuming + cl_image_format format; + int err; + + format.image_channel_data_type = CL_UNSIGNED_INT8; + format.image_channel_order = CL_R; + +#if CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = cpu_img.cols; + desc.image_height = cpu_img.rows; + desc.image_depth = NULL; + desc.image_array_size = 1; + desc.image_row_pitch = cpu_img.step; + desc.image_slice_pitch= 0; + desc.buffer = NULL; + desc.num_mip_levels = 0; + desc.num_samples = 0; + imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); +#else + imgTex = clCreateImage2D( + img.clCxt->impl->clContext, + CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &format, + cpu_img.cols, + cpu_img.rows, + cpu_img.step, + cpu_img.data, + &err); +#endif + openCLSafeCall(err); +} + +void SURF_OCL_Invoker::bindSumTex(const oclMat& sum) +{ + Mat cpu_img(sum); // time consuming + cl_image_format format; + int err; + format.image_channel_data_type = CL_UNSIGNED_INT32; + format.image_channel_order = CL_R; +#if CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = cpu_img.cols; + desc.image_height = cpu_img.rows; + desc.image_depth = NULL; + desc.image_array_size = 1; + desc.image_row_pitch = cpu_img.step; + desc.image_slice_pitch= 0; + desc.buffer = NULL; + desc.num_mip_levels = 0; + desc.num_samples = 0; + sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); +#else + sumTex = clCreateImage2D( + sum.clCxt->impl->clContext, + CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &format, + cpu_img.cols, + cpu_img.rows, + cpu_img.step, + cpu_img.data, + &err); +#endif + openCLSafeCall(err); +} +void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum) +{ + Mat cpu_img(maskSum); // time consuming + cl_image_format format; + int err; + format.image_channel_data_type = CL_UNSIGNED_INT32; + format.image_channel_order = CL_R; +#if CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = cpu_img.cols; + desc.image_height = cpu_img.rows; + desc.image_depth = NULL; + desc.image_array_size = 1; + desc.image_row_pitch = cpu_img.step; + desc.image_slice_pitch= 0; + desc.buffer = NULL; + desc.num_mip_levels = 0; + desc.num_samples = 0; + maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err); +#else + maskSumTex = clCreateImage2D( + maskSum.clCxt->impl->clContext, + CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &format, + cpu_img.cols, + cpu_img.rows, + cpu_img.step, + cpu_img.data, + &err); +#endif + openCLSafeCall(err); +} + +//////////////////////////// +// kernel caller definitions +void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows) +{ + const int min_size = calcSize(octave, 0); + const int max_samples_i = 1 + ((img_rows - min_size) >> octave); + const int max_samples_j = 1 + ((img_cols - min_size) >> octave); + + Context *clCxt = det.clCxt; + string kernelName = "icvCalcLayerDetAndTrace"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers)); + args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows)); + + size_t localThreads[3] = {16, 16, 1}; + size_t globalThreads[3] = { + divUp(max_samples_j, localThreads[0]) * localThreads[0], + divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2), + 1}; + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, + int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols) +{ + const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1; + + Context *clCxt = det.clCxt; + string kernelName = use_mask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers)); + args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates)); + args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold)); + + if(use_mask) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex)); + } + + size_t localThreads[3] = {16, 16, 1}; + size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0], + divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1], + 1}; + + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, + oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures) +{ + Context *clCxt = det.clCxt; + string kernelName = "icvInterpolateKeypoint"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&counters.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures)); + + size_t localThreads[3] = {3, 3, 3}; + size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1}; + + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures) +{ + Context * clCxt = counters.clCxt; + string kernelName = "icvCalcOrientation"; + + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); + + size_t localThreads[3] = {32, 4, 1}; + size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1}; + + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures) +{ + // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D + Context *clCxt = descriptors.clCxt; + string kernelName = ""; + vector< pair > args; + size_t localThreads[3] = {1, 1, 1}; + size_t globalThreads[3] = {1, 1, 1}; + + if(descriptors.cols == 64) + { + kernelName = "compute_descriptors64"; + + localThreads[0] = 6; + localThreads[1] = 6; + + globalThreads[0] = nFeatures * localThreads[0]; + globalThreads[1] = 16 * localThreads[1]; + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); + + kernelName = "normalize_descriptors64"; + + localThreads[0] = 64; + localThreads[1] = 1; + + globalThreads[0] = nFeatures * localThreads[0]; + globalThreads[1] = localThreads[1]; + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); + } + else + { + kernelName = "compute_descriptors128"; + + localThreads[0] = 6; + localThreads[1] = 6; + + globalThreads[0] = nFeatures * localThreads[0]; + globalThreads[1] = 16 * localThreads[1]; + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); + + kernelName = "normalize_descriptors128"; + + localThreads[0] = 128; + localThreads[1] = 1; + + globalThreads[0] = nFeatures * localThreads[0]; + globalThreads[1] = localThreads[1]; + + args.clear(); + args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); + } +} + +#endif // /* !defined (HAVE_OPENCL) */ + diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_hog.cpp new file mode 100644 index 0000000..f497515 --- /dev/null +++ b/modules/ocl/test/test_hog.cpp @@ -0,0 +1,192 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Wenju He, wenju@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include "opencv2/core/core.hpp" +using namespace std; +#ifdef HAVE_OPENCL + + +PARAM_TEST_CASE(HOG,cv::Size,int) +{ + cv::Size winSize; + int type; + vector info; + virtual void SetUp() + { + winSize = GET_PARAM(0); + type = GET_PARAM(1); + cv::ocl::getDevice(info); + } +}; + +TEST_P(HOG, GetDescriptors) +{ + // Load image + cv::Mat img_rgb = readImage("../../../samples/gpu/road.png"); + ASSERT_FALSE(img_rgb.empty()); + + // Convert image + cv::Mat img; + switch (type) + { + case CV_8UC1: + cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + break; + case CV_8UC4: + default: + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + break; + } + cv::ocl::oclMat d_img(img); + + // HOGs + cv::ocl::HOGDescriptor ocl_hog; + ocl_hog.gamma_correction = true; + cv::HOGDescriptor hog; + hog.gammaCorrection = true; + + // Compute descriptor + cv::ocl::oclMat d_descriptors; + ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL); + cv::Mat down_descriptors; + d_descriptors.download(down_descriptors); + down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows); + + hog.setSVMDetector(hog.getDefaultPeopleDetector()); + std::vector descriptors; + switch (type) + { + case CV_8UC1: + hog.compute(img, descriptors, ocl_hog.win_size); + break; + case CV_8UC4: + default: + hog.compute(img_rgb, descriptors, ocl_hog.win_size); + break; + } + cv::Mat cpu_descriptors(descriptors); + + EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); +} + + +TEST_P(HOG, Detect) +{ + // Load image + cv::Mat img_rgb = readImage("../../../samples/gpu/road.png"); + ASSERT_FALSE(img_rgb.empty()); + + // Convert image + cv::Mat img; + switch (type) + { + case CV_8UC1: + cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + break; + case CV_8UC4: + default: + cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + break; + } + cv::ocl::oclMat d_img(img); + + // HOGs + if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128))) + winSize = cv::Size(64, 128); + cv::ocl::HOGDescriptor ocl_hog(winSize); + ocl_hog.gamma_correction = true; + + cv::HOGDescriptor hog; + hog.winSize = winSize; + hog.gammaCorrection = true; + + if (winSize.width == 48 && winSize.height == 96) + { + // daimler's base + ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96()); + hog.setSVMDetector(hog.getDaimlerPeopleDetector()); + } + else if (winSize.width == 64 && winSize.height == 128) + { + ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128()); + hog.setSVMDetector(hog.getDefaultPeopleDetector()); + } + else + { + ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); + hog.setSVMDetector(hog.getDefaultPeopleDetector()); + } + + // OpenCL detection + std::vector d_v_locations; + ocl_hog.detect(d_img, d_v_locations, 0); + cv::Mat d_locations(d_v_locations); + + // CPU detection + std::vector v_locations; + switch (type) + { + case CV_8UC1: + hog.detect(img, v_locations, 0); + break; + case CV_8UC4: + default: + hog.detect(img_rgb, v_locations, 0); + break; + } + cv::Mat locations(v_locations); + + char s[100]={0}; + EXPECT_MAT_NEAR(d_locations, locations, 0, s); +} + + +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( + testing::Values(cv::Size(64, 128), cv::Size(48, 96)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); + + +#endif //HAVE_OPENCL