modules/softcascade/src/detector_cuda.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2008-2012, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44
  45 #if !defined (HAVE_CUDA)
  46
  47 cv::softcascade::SCascade::SCascade(const double, const double, const int, const int) { throw_no_cuda(); }
  48
  49 cv::softcascade::SCascade::~SCascade() { throw_no_cuda(); }
  50
  51 bool cv::softcascade::SCascade::load(const FileNode&) { throw_no_cuda(); return false;}
  52
  53 void cv::softcascade::SCascade::detect(InputArray, InputArray, OutputArray, cv::cuda::Stream&) const { throw_no_cuda(); }
  54
  55 void cv::softcascade::SCascade::read(const FileNode& fn) { Algorithm::read(fn); }
  56
  57 cv::softcascade::ChannelsProcessor::ChannelsProcessor() { throw_no_cuda(); }
  58  cv::softcascade::ChannelsProcessor::~ChannelsProcessor() { throw_no_cuda(); }
  59
  60 cv::Ptr<cv::softcascade::ChannelsProcessor> cv::softcascade::ChannelsProcessor::create(const int, const int, const int)
  61 { throw_no_cuda(); return cv::Ptr<cv::softcascade::ChannelsProcessor>(); }
  62
  63 #else
  64
  65 # include "cuda_invoker.hpp"
  66
  67 cv::softcascade::cudev::Level::Level(int idx, const Octave& oct, const float scale, const int w, const int h)
  68 :  octave(idx), step(oct.stages), relScale(scale / oct.scale)
  69 {
  70     workRect.x = (unsigned char)cvRound(w / (float)oct.shrinkage);
  71     workRect.y = (unsigned char)cvRound(h / (float)oct.shrinkage);
  72
  73     objSize.x  = cv::saturate_cast<uchar>(oct.size.x * relScale);
  74     objSize.y  = cv::saturate_cast<uchar>(oct.size.y * relScale);
  75
  76     // according to R. Benenson, M. Mathias, R. Timofte and L. Van Gool's and Dallal's papers
  77     if (fabs(relScale - 1.f) < FLT_EPSILON)
  78         scaling[0] = scaling[1] = 1.f;
  79     else
  80     {
  81         scaling[0] = (relScale < 1.f) ? 0.89f * ::pow(relScale, 1.099f / ::log(2.0f)) : 1.f;
  82         scaling[1] = relScale * relScale;
  83     }
  84 }
  85
  86 namespace cv { namespace softcascade { namespace cudev {
  87
  88     void fillBins(cv::cuda::PtrStepSzb hogluv, const cv::cuda::PtrStepSzf& nangle,
  89         const int fw, const int fh, const int bins, cudaStream_t stream);
  90
  91     void suppress(const cv::cuda::PtrStepSzb& objects, cv::cuda::PtrStepSzb overlaps, cv::cuda::PtrStepSzi ndetections,
  92         cv::cuda::PtrStepSzb suppressed, cudaStream_t stream);
  93
  94     void bgr2Luv(const cv::cuda::PtrStepSzb& bgr, cv::cuda::PtrStepSzb luv);
  95     void transform(const cv::cuda::PtrStepSz<uchar3>& bgr, cv::cuda::PtrStepSzb gray);
  96     void gray2hog(const cv::cuda::PtrStepSzb& gray, cv::cuda::PtrStepSzb mag, const int bins);
  97     void shrink(const cv::cuda::PtrStepSzb& channels, cv::cuda::PtrStepSzb shrunk);
  98
  99     void shfl_integral(const cv::cuda::PtrStepSzb& img, cv::cuda::PtrStepSz<unsigned int> integral, cudaStream_t stream);
 100 }}}
 101
 102 struct cv::softcascade::SCascade::Fields
 103 {
 104     static Fields* parseCascade(const FileNode &root, const float mins, const float maxs, const int totals, const int method)
 105     {
 106         static const char *const SC_STAGE_TYPE          = "stageType";
 107         static const char *const SC_BOOST               = "BOOST";
 108         static const char *const SC_FEATURE_TYPE        = "featureType";
 109         static const char *const SC_ICF                 = "ICF";
 110         static const char *const SC_ORIG_W              = "width";
 111         static const char *const SC_ORIG_H              = "height";
 112         static const char *const SC_FEATURE_FORMAT      = "featureFormat";
 113         static const char *const SC_SHRINKAGE           = "shrinkage";
 114         static const char *const SC_OCTAVES             = "octaves";
 115         static const char *const SC_OCT_SCALE           = "scale";
 116         static const char *const SC_OCT_WEAKS           = "weaks";
 117         static const char *const SC_TREES               = "trees";
 118         static const char *const SC_WEAK_THRESHOLD      = "treeThreshold";
 119         static const char *const SC_FEATURES            = "features";
 120         static const char *const SC_INTERNAL            = "internalNodes";
 121         static const char *const SC_LEAF                = "leafValues";
 122         static const char *const SC_F_CHANNEL           = "channel";
 123         static const char *const SC_F_RECT              = "rect";
 124
 125         // only Ada Boost supported
 126         String stageTypeStr = (String)root[SC_STAGE_TYPE];
 127         CV_Assert(stageTypeStr == SC_BOOST);
 128
 129         // only HOG-like integral channel features supported
 130         String featureTypeStr = (String)root[SC_FEATURE_TYPE];
 131         CV_Assert(featureTypeStr == SC_ICF);
 132
 133         int origWidth  = (int)root[SC_ORIG_W];
 134         int origHeight = (int)root[SC_ORIG_H];
 135
 136         String fformat = (String)root[SC_FEATURE_FORMAT];
 137         bool useBoxes = (fformat == "BOX");
 138         ushort shrinkage = cv::saturate_cast<ushort>((int)root[SC_SHRINKAGE]);
 139
 140         FileNode fn = root[SC_OCTAVES];
 141         if (fn.empty()) return 0;
 142
 143         std::vector<cudev::Octave>  voctaves;
 144         std::vector<float>   vstages;
 145         std::vector<cudev::Node>    vnodes;
 146         std::vector<float>   vleaves;
 147
 148         FileNodeIterator it = fn.begin(), it_end = fn.end();
 149         for (ushort octIndex = 0; it != it_end; ++it, ++octIndex)
 150         {
 151             FileNode fns = *it;
 152             float scale = powf(2.f,saturate_cast<float>((int)fns[SC_OCT_SCALE]));
 153             bool isUPOctave = scale >= 1;
 154
 155             ushort nweaks = saturate_cast<ushort>((int)fns[SC_OCT_WEAKS]);
 156
 157             ushort2 size;
 158             size.x = (unsigned short)cvRound(origWidth * scale);
 159             size.y = (unsigned short)cvRound(origHeight * scale);
 160
 161             cudev::Octave octave(octIndex, nweaks, shrinkage, size, scale);
 162             CV_Assert(octave.stages > 0);
 163             voctaves.push_back(octave);
 164
 165             FileNode ffs = fns[SC_FEATURES];
 166             if (ffs.empty()) return 0;
 167
 168             std::vector<cv::Rect> feature_rects;
 169             std::vector<int> feature_channels;
 170
 171             FileNodeIterator ftrs = ffs.begin(), ftrs_end = ffs.end();
 172             int feature_offset = 0;
 173             for (; ftrs != ftrs_end; ++ftrs, ++feature_offset )
 174             {
 175                 cv::FileNode ftn = (*ftrs)[SC_F_RECT];
 176                 cv::FileNodeIterator r_it = ftn.begin();
 177                 int x = (int)*(r_it++);
 178                 int y = (int)*(r_it++);
 179                 int w = (int)*(r_it++);
 180                 int h = (int)*(r_it++);
 181
 182                 if (useBoxes)
 183                 {
 184                     if (isUPOctave)
 185                     {
 186                         w -= x;
 187                         h -= y;
 188                     }
 189                 }
 190                 else
 191                 {
 192                     if (!isUPOctave)
 193                     {
 194                         w += x;
 195                         h += y;
 196                     }
 197                 }
 198                 feature_rects.push_back(cv::Rect(x, y, w, h));
 199                 feature_channels.push_back((int)(*ftrs)[SC_F_CHANNEL]);
 200             }
 201
 202             fns = fns[SC_TREES];
 203             if (fn.empty()) return 0;
 204
 205             // for each stage (~ decision tree with H = 2)
 206             FileNodeIterator st = fns.begin(), st_end = fns.end();
 207             for (; st != st_end; ++st )
 208             {
 209                 FileNode octfn = *st;
 210                 float threshold = (float)octfn[SC_WEAK_THRESHOLD];
 211                 vstages.push_back(threshold);
 212
 213                 FileNode intfns = octfn[SC_INTERNAL];
 214                 FileNodeIterator inIt = intfns.begin(), inIt_end = intfns.end();
 215                 for (; inIt != inIt_end;)
 216                 {
 217                     inIt +=2;
 218                     int featureIdx = (int)(*(inIt++));
 219
 220                     float orig_threshold = (float)(*(inIt++));
 221                     unsigned int th = saturate_cast<unsigned int>((int)orig_threshold);
 222                     cv::Rect& r = feature_rects[featureIdx];
 223                     uchar4 rect;
 224                     rect.x = saturate_cast<uchar>(r.x);
 225                     rect.y = saturate_cast<uchar>(r.y);
 226                     rect.z = saturate_cast<uchar>(r.width);
 227                     rect.w = saturate_cast<uchar>(r.height);
 228
 229                     unsigned int channel = saturate_cast<unsigned int>(feature_channels[featureIdx]);
 230                     vnodes.push_back(cudev::Node(rect, channel, th));
 231                 }
 232
 233                 intfns = octfn[SC_LEAF];
 234                 inIt = intfns.begin(), inIt_end = intfns.end();
 235                 for (; inIt != inIt_end; ++inIt)
 236                 {
 237                     vleaves.push_back((float)(*inIt));
 238                 }
 239             }
 240         }
 241
 242         cv::Mat hoctaves(1, (int) (voctaves.size() * sizeof(cudev::Octave)), CV_8UC1, (uchar*)&(voctaves[0]));
 243         CV_Assert(!hoctaves.empty());
 244
 245         cv::Mat hstages(cv::Mat(vstages).reshape(1,1));
 246         CV_Assert(!hstages.empty());
 247
 248         cv::Mat hnodes(1, (int) (vnodes.size() * sizeof(cudev::Node)), CV_8UC1, (uchar*)&(vnodes[0]) );
 249         CV_Assert(!hnodes.empty());
 250
 251         cv::Mat hleaves(cv::Mat(vleaves).reshape(1,1));
 252         CV_Assert(!hleaves.empty());
 253
 254         Fields* fields = new Fields(mins, maxs, totals, origWidth, origHeight, shrinkage, 0,
 255             hoctaves, hstages, hnodes, hleaves, method);
 256         fields->voctaves = voctaves;
 257         fields->createLevels(DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH);
 258
 259         return fields;
 260     }
 261
 262     bool check(float mins,float  maxs, int scales)
 263     {
 264         bool updated = ((minScale == mins) || (maxScale == maxs) || (totals == scales));
 265
 266         minScale = mins;
 267         maxScale = maxScale;
 268         totals   = scales;
 269
 270         return updated;
 271     }
 272
 273     int createLevels(const int fh, const int fw)
 274     {
 275         std::vector<cudev::Level> vlevels;
 276         float logFactor = (::log(maxScale) - ::log(minScale)) / (totals -1);
 277
 278         float scale = minScale;
 279         int dcs = 0;
 280         for (int sc = 0; sc < totals; ++sc)
 281         {
 282             int width  = (int)::std::max(0.0f, fw - (origObjWidth  * scale));
 283             int height = (int)::std::max(0.0f, fh - (origObjHeight * scale));
 284
 285             float logScale = ::log(scale);
 286             int fit = fitOctave(voctaves, logScale);
 287
 288             cudev::Level level(fit, voctaves[fit], scale, width, height);
 289
 290             if (!width || !height)
 291                 break;
 292             else
 293             {
 294                 vlevels.push_back(level);
 295                 if (voctaves[fit].scale < 1) ++dcs;
 296             }
 297
 298             if (::fabs(scale - maxScale) < FLT_EPSILON) break;
 299             scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
 300         }
 301
 302         cv::Mat hlevels = cv::Mat(1, (int) (vlevels.size() * sizeof(cudev::Level)), CV_8UC1, (uchar*)&(vlevels[0]) );
 303         CV_Assert(!hlevels.empty());
 304         levels.upload(hlevels);
 305         downscales = dcs;
 306         return dcs;
 307     }
 308
 309     bool update(int fh, int fw, int shr)
 310     {
 311         shrunk.create(fh / shr * HOG_LUV_BINS, fw / shr, CV_8UC1);
 312         integralBuffer.create(shrunk.rows, shrunk.cols, CV_32SC1);
 313
 314         hogluv.create((fh / shr) * HOG_LUV_BINS + 1, fw / shr + 1, CV_32SC1);
 315         hogluv.setTo(cv::Scalar::all(0));
 316
 317         overlaps.create(1, 5000, CV_8UC1);
 318         suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
 319
 320         return true;
 321     }
 322
 323     Fields( const float mins, const float maxs, const int tts, const int ow, const int oh, const int shr, const int ds,
 324         cv::Mat hoctaves, cv::Mat hstages, cv::Mat hnodes, cv::Mat hleaves, int method)
 325     : minScale(mins), maxScale(maxs), totals(tts), origObjWidth(ow), origObjHeight(oh), shrinkage(shr), downscales(ds)
 326     {
 327         update(DEFAULT_FRAME_HEIGHT, DEFAULT_FRAME_WIDTH, shr);
 328         octaves.upload(hoctaves);
 329         stages.upload(hstages);
 330         nodes.upload(hnodes);
 331         leaves.upload(hleaves);
 332
 333         preprocessor = ChannelsProcessor::create(shrinkage, 6, method);
 334     }
 335
 336     void detect(cv::cuda::GpuMat& objects, cv::cuda::Stream& s) const
 337     {
 338         objects.setTo(Scalar::all(0), s);
 339
 340         cudaSafeCall( cudaGetLastError());
 341
 342         cudev::CascadeInvoker<cudev::GK107PolicyX4> invoker
 343         = cudev::CascadeInvoker<cudev::GK107PolicyX4>(levels, stages, nodes, leaves);
 344
 345         cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
 346         invoker(mask, hogluv, objects, downscales, stream);
 347     }
 348
 349     void suppress(cv::cuda::GpuMat& objects, cv::cuda::Stream& s)
 350     {
 351         cv::cuda::GpuMat ndetections = cv::cuda::GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
 352         ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
 353
 354         overlaps.setTo(0, s);
 355         suppressed.setTo(0, s);
 356
 357         cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
 358         cudev::suppress(objects, overlaps, ndetections, suppressed, stream);
 359     }
 360
 361 private:
 362
 363     typedef std::vector<cudev::Octave>::const_iterator  octIt_t;
 364     static int fitOctave(const std::vector<cudev::Octave>& octs, const float& logFactor)
 365     {
 366         float minAbsLog = FLT_MAX;
 367         int res =  0;
 368         for (int oct = 0; oct < (int)octs.size(); ++oct)
 369         {
 370             const cudev::Octave& octave =octs[oct];
 371             float logOctave = ::log(octave.scale);
 372             float logAbsScale = ::fabs(logFactor - logOctave);
 373
 374             if(logAbsScale < minAbsLog)
 375             {
 376                 res = oct;
 377                 minAbsLog = logAbsScale;
 378             }
 379         }
 380         return res;
 381     }
 382
 383 public:
 384
 385     cv::Ptr<ChannelsProcessor> preprocessor;
 386
 387     // scales range
 388     float minScale;
 389     float maxScale;
 390
 391     int totals;
 392
 393     int origObjWidth;
 394     int origObjHeight;
 395
 396     const int shrinkage;
 397     int downscales;
 398
 399
 400     // 160x120x10
 401     cv::cuda::GpuMat shrunk;
 402
 403     // temporal mat for integral
 404     cv::cuda::GpuMat integralBuffer;
 405
 406     // 161x121x10
 407     cv::cuda::GpuMat hogluv;
 408
 409
 410     // used for suppression
 411     cv::cuda::GpuMat suppressed;
 412     // used for area overlap computing during
 413     cv::cuda::GpuMat overlaps;
 414
 415
 416     // Cascade from xml
 417     cv::cuda::GpuMat octaves;
 418     cv::cuda::GpuMat stages;
 419     cv::cuda::GpuMat nodes;
 420     cv::cuda::GpuMat leaves;
 421     cv::cuda::GpuMat levels;
 422
 423
 424     // For ROI
 425     cv::cuda::GpuMat mask;
 426     cv::cuda::GpuMat genRoiTmp;
 427
 428 //     cv::cuda::GpuMat collected;
 429
 430
 431     std::vector<cudev::Octave> voctaves;
 432
 433 //     DeviceInfo info;
 434
 435     enum { BOOST = 0 };
 436     enum
 437     {
 438         DEFAULT_FRAME_WIDTH        = 640,
 439         DEFAULT_FRAME_HEIGHT       = 480,
 440         HOG_LUV_BINS               = 10
 441     };
 442
 443 private:
 444     cv::softcascade::SCascade::Fields& operator=( const cv::softcascade::SCascade::Fields & );
 445 };
 446
 447 cv::softcascade::SCascade::SCascade(const double mins, const double maxs, const int sc, const int fl)
 448 : fields(0),  minScale(mins), maxScale(maxs), scales(sc), flags(fl) {}
 449
 450 cv::softcascade::SCascade::~SCascade() { delete fields; }
 451
 452 bool cv::softcascade::SCascade::load(const FileNode& fn)
 453 {
 454     if (fields) delete fields;
 455     fields = Fields::parseCascade(fn, (float)minScale, (float)maxScale, scales, flags);
 456     return fields != 0;
 457 }
 458
 459 namespace {
 460
 461 void integral(const cv::cuda::GpuMat& src, cv::cuda::GpuMat& sum, cv::cuda::GpuMat& buffer, cv::cuda::Stream& s)
 462 {
 463     CV_Assert(src.type() == CV_8UC1);
 464
 465     cudaStream_t stream = cv::cuda::StreamAccessor::getStream(s);
 466
 467     cv::Size whole;
 468     cv::Point offset;
 469
 470     src.locateROI(whole, offset);
 471
 472     if (cv::cuda::deviceSupports(cv::cuda::WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
 473         && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
 474     {
 475         ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
 476
 477         cv::softcascade::cudev::shfl_integral(src, buffer, stream);
 478
 479         sum.create(src.rows + 1, src.cols + 1, CV_32SC1);
 480         sum.setTo(cv::Scalar::all(0), s);
 481
 482         cv::cuda::GpuMat inner = sum(cv::Rect(1, 1, src.cols, src.rows));
 483         cv::cuda::GpuMat res = buffer(cv::Rect(0, 0, src.cols, src.rows));
 484
 485         res.copyTo(inner, s);
 486     }
 487     else {CV_Error(cv::Error::GpuNotSupported, ": CC 3.x required.");}
 488 }
 489
 490 }
 491
 492 void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, OutputArray _objects, cv::cuda::Stream& s) const
 493 {
 494     CV_Assert(fields);
 495
 496     // only color images and precomputed integrals are supported
 497     int type = _image.type();
 498     CV_Assert(type == CV_8UC3 || type == CV_32SC1 || (!_rois.empty()));
 499
 500     const cv::cuda::GpuMat image = _image.getGpuMat();
 501
 502     if (_objects.empty()) _objects.create(1, 4096 * sizeof(Detection), CV_8UC1);
 503
 504     cv::cuda::GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
 505
 506     /// roi
 507     Fields& flds = *fields;
 508     int shr = flds.shrinkage;
 509
 510     flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());
 511
 512     cudev::shrink(rois, flds.mask);
 513     //cv::cuda::transpose(flds.genRoiTmp, flds.mask, s);
 514
 515     if (type == CV_8UC3)
 516     {
 517         flds.update(image.rows, image.cols, flds.shrinkage);
 518
 519         if (flds.check((float)minScale, (float)maxScale, scales))
 520             flds.createLevels(image.rows, image.cols);
 521
 522         flds.preprocessor->apply(image, flds.shrunk);
 523         ::integral(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
 524     }
 525     else
 526     {
 527         image.copyTo(flds.hogluv, s);
 528     }
 529
 530     flds.detect(objects, s);
 531
 532     if ( (flags && NMS_MASK) != NO_REJECT)
 533     {
 534         cv::cuda::GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
 535         flds.suppress(objects, s);
 536         flds.suppressed.copyTo(spr);
 537     }
 538 }
 539
 540 void cv::softcascade::SCascade::read(const FileNode& fn)
 541 {
 542     Algorithm::read(fn);
 543 }
 544
 545 namespace {
 546
 547 using cv::InputArray;
 548 using cv::OutputArray;
 549 using cv::cuda::Stream;
 550 using cv::cuda::GpuMat;
 551
 552 inline void setZero(cv::cuda::GpuMat& m, cv::cuda::Stream& s)
 553 {
 554     m.setTo(0, s);
 555 }
 556
 557 struct SeparablePreprocessor : public cv::softcascade::ChannelsProcessor
 558 {
 559     SeparablePreprocessor(const int s, const int b) : cv::softcascade::ChannelsProcessor(), shrinkage(s), bins(b) {}
 560     virtual ~SeparablePreprocessor() {}
 561
 562     virtual void apply(InputArray _frame, OutputArray _shrunk, cv::cuda::Stream& s = cv::cuda::Stream::Null())
 563     {
 564         bgr = _frame.getGpuMat();
 565         //cv::cuda::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
 566
 567         _shrunk.create(bgr.rows * (4 + bins) / shrinkage, bgr.cols / shrinkage, CV_8UC1);
 568         cv::cuda::GpuMat shrunk = _shrunk.getGpuMat();
 569
 570         channels.create(bgr.rows * (4 + bins), bgr.cols, CV_8UC1);
 571         setZero(channels, s);
 572
 573         gray.create(bgr.size(), CV_8UC1);
 574         cv::softcascade::cudev::transform(bgr, gray); //cv::cuda::cvtColor(bgr, gray, CV_BGR2GRAY);
 575         cv::softcascade::cudev::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);
 576
 577         cv::cuda::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
 578         cv::softcascade::cudev::bgr2Luv(bgr, luv);
 579         cv::softcascade::cudev::shrink(channels, shrunk);
 580     }
 581
 582 private:
 583     const int shrinkage;
 584     const int bins;
 585
 586     cv::cuda::GpuMat bgr;
 587     cv::cuda::GpuMat gray;
 588     cv::cuda::GpuMat channels;
 589     SeparablePreprocessor& operator=( const SeparablePreprocessor& );
 590 };
 591
 592 }
 593
 594 cv::Ptr<cv::softcascade::ChannelsProcessor> cv::softcascade::ChannelsProcessor::create(const int s, const int b, const int m)
 595 {
 596     CV_Assert((m && SEPARABLE));
 597     return makePtr<SeparablePreprocessor>(s, b);
 598 }
 599
 600 cv::softcascade::ChannelsProcessor::ChannelsProcessor() { }
 601 cv::softcascade::ChannelsProcessor::~ChannelsProcessor() { }
 602
 603 #endif