modules/softcascade/src/detector.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and / or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44
  45 cv::softcascade::Detection::Detection(const cv::Rect& b, const float c, int k)
  46 : x(static_cast<ushort>(b.x)), y(static_cast<ushort>(b.y)),
  47   w(static_cast<ushort>(b.width)), h(static_cast<ushort>(b.height)), confidence(c), kind(k) {}
  48
  49 cv::Rect cv::softcascade::Detection::bb() const
  50 {
  51     return cv::Rect(x, y, w, h);
  52 }
  53
  54 namespace {
  55
  56 struct SOctave
  57 {
  58     SOctave(const int i, const cv::Size& origObjSize, const cv::FileNode& fn)
  59     : index(i), weaks((int)fn[SC_OCT_WEAKS]), scale((float)std::pow(2,(float)fn[SC_OCT_SCALE])),
  60       size(cvRound(origObjSize.width * scale), cvRound(origObjSize.height * scale)) {}
  61
  62     int   index;
  63     int   weaks;
  64
  65     float scale;
  66
  67     cv::Size size;
  68
  69     static const char *const SC_OCT_SCALE;
  70     static const char *const SC_OCT_WEAKS;
  71     static const char *const SC_OCT_SHRINKAGE;
  72 };
  73
  74
  75 struct Weak
  76 {
  77     Weak(){}
  78     Weak(const cv::FileNode& fn) : threshold((float)fn[SC_WEAK_THRESHOLD]) {}
  79
  80     float threshold;
  81
  82     static const char *const SC_WEAK_THRESHOLD;
  83 };
  84
  85
  86 struct Node
  87 {
  88     Node(){}
  89     Node(const int offset, cv::FileNodeIterator& fIt)
  90     : feature((int)(*(fIt +=2)++) + offset), threshold((float)(*(fIt++))) {}
  91
  92     int   feature;
  93     float threshold;
  94 };
  95
  96 struct Feature
  97 {
  98     Feature() {}
  99     Feature(const cv::FileNode& fn, bool useBoxes = false) : channel((int)fn[SC_F_CHANNEL])
 100     {
 101         cv::FileNode rn = fn[SC_F_RECT];
 102         cv::FileNodeIterator r_it = rn.begin();
 103
 104         int x = *r_it++;
 105         int y = *r_it++;
 106         int w = *r_it++;
 107         int h = *r_it++;
 108
 109         // ToDo: fix me
 110         if (useBoxes)
 111             rect = cv::Rect(x, y, w, h);
 112         else
 113             rect = cv::Rect(x, y, w + x, h + y);
 114
 115         // 1 / area
 116         rarea = 1.f / ((rect.width - rect.x) * (rect.height - rect.y));
 117     }
 118
 119     int channel;
 120     cv::Rect rect;
 121     float rarea;
 122
 123     static const char *const SC_F_CHANNEL;
 124     static const char *const SC_F_RECT;
 125 };
 126
 127 const char *const SOctave::SC_OCT_SCALE      = "scale";
 128 const char *const SOctave::SC_OCT_WEAKS      = "weaks";
 129 const char *const SOctave::SC_OCT_SHRINKAGE  = "shrinkingFactor";
 130 const char *const Weak::SC_WEAK_THRESHOLD   = "treeThreshold";
 131 const char *const Feature::SC_F_CHANNEL     = "channel";
 132 const char *const Feature::SC_F_RECT        = "rect";
 133
 134 struct Level
 135 {
 136     const SOctave* octave;
 137
 138     float origScale;
 139     float relScale;
 140     int scaleshift;
 141
 142     cv::Size workRect;
 143     cv::Size objSize;
 144
 145     float scaling[2]; // 0-th for channels <= 6, 1-st otherwise
 146
 147     Level(const SOctave& oct, const float scale, const int shrinkage, const int w, const int h)
 148     :  octave(&oct), origScale(scale), relScale(scale / oct.scale),
 149        workRect(cv::Size(cvRound(w / (float)shrinkage),cvRound(h / (float)shrinkage))),
 150        objSize(cv::Size(cvRound(oct.size.width * relScale), cvRound(oct.size.height * relScale)))
 151     {
 152         scaling[0] = ((relScale >= 1.f)? 1.f : (0.89f * std::pow(relScale, 1.099f / std::log(2.f)))) / (relScale * relScale);
 153         scaling[1] = 1.f;
 154         scaleshift = static_cast<int>(relScale * (1 << 16));
 155     }
 156
 157     void addDetection(const int x, const int y, float confidence, std::vector<cv::softcascade::Detection>& detections) const
 158     {
 159         // fix me
 160         int shrinkage = 4;//(*octave).shrinkage;
 161         cv::Rect rect(cvRound(x * shrinkage), cvRound(y * shrinkage), objSize.width, objSize.height);
 162
 163         detections.push_back(cv::softcascade::Detection(rect, confidence));
 164     }
 165
 166     float rescale(cv::Rect& scaledRect, const float threshold, int idx) const
 167     {
 168 #define SSHIFT(a) ((a) + (1 << 15)) >> 16
 169         // rescale
 170         scaledRect.x      = SSHIFT(scaleshift * scaledRect.x);
 171         scaledRect.y      = SSHIFT(scaleshift * scaledRect.y);
 172         scaledRect.width  = SSHIFT(scaleshift * scaledRect.width);
 173         scaledRect.height = SSHIFT(scaleshift * scaledRect.height);
 174 #undef SSHIFT
 175         float sarea = static_cast<float>((scaledRect.width - scaledRect.x) * (scaledRect.height - scaledRect.y));
 176
 177         // compensation areas rounding
 178         return (sarea == 0.0f)? threshold : (threshold * scaling[idx] * sarea);
 179     }
 180 };
 181 struct ChannelStorage
 182 {
 183     cv::Mat hog;
 184     int shrinkage;
 185     int offset;
 186     size_t step;
 187     int model_height;
 188
 189     cv::Ptr<cv::softcascade::ChannelFeatureBuilder> builder;
 190
 191     enum {HOG_BINS = 6, HOG_LUV_BINS = 10};
 192
 193     ChannelStorage(const cv::Mat& colored, int shr, cv::String featureTypeStr) : shrinkage(shr)
 194     {
 195         model_height = cvRound(colored.rows / (float)shrinkage);
 196         if (featureTypeStr == "ICF") featureTypeStr = "HOG6MagLuv";
 197
 198         builder = cv::softcascade::ChannelFeatureBuilder::create(featureTypeStr);
 199         (*builder)(colored, hog, cv::Size(cvRound(colored.cols / (float)shrinkage), model_height));
 200
 201         step = hog.step1();
 202     }
 203
 204     float get(const int channel, const cv::Rect& area) const
 205     {
 206         const int *ptr = hog.ptr<const int>(0) + model_height * channel * step + offset;
 207
 208         int a = ptr[area.y      * step + area.x];
 209         int b = ptr[area.y      * step + area.width];
 210         int c = ptr[area.height * step + area.width];
 211         int d = ptr[area.height * step + area.x];
 212
 213         return static_cast<float>(a - b + c - d);
 214     }
 215 };
 216
 217 }
 218
 219 struct cv::softcascade::Detector::Fields
 220 {
 221     float minScale;
 222     float maxScale;
 223     int scales;
 224
 225     int origObjWidth;
 226     int origObjHeight;
 227
 228     int shrinkage;
 229
 230     std::vector<SOctave> octaves;
 231     std::vector<Weak>    weaks;
 232     std::vector<Node>    nodes;
 233     std::vector<float>   leaves;
 234     std::vector<Feature> features;
 235
 236     std::vector<Level> levels;
 237
 238     cv::Size frameSize;
 239
 240     typedef std::vector<SOctave>::iterator  octIt_t;
 241     typedef std::vector<Detection> dvector;
 242
 243     String featureTypeStr;
 244
 245     void detectAt(const int dx, const int dy, const Level& level, const ChannelStorage& storage, dvector& detections) const
 246     {
 247         float detectionScore = 0.f;
 248
 249         const SOctave& octave = *(level.octave);
 250
 251         int stBegin = octave.index * octave.weaks, stEnd = stBegin + octave.weaks;
 252
 253         for(int st = stBegin; st < stEnd; ++st)
 254         {
 255             const Weak& weak = weaks[st];
 256
 257             int nId = st * 3;
 258
 259             // work with root node
 260             const Node& node = nodes[nId];
 261             const Feature& feature = features[node.feature];
 262
 263             cv::Rect scaledRect(feature.rect);
 264
 265             float threshold = level.rescale(scaledRect, node.threshold, (int)(feature.channel > 6)) * feature.rarea;
 266             float sum = storage.get(feature.channel, scaledRect);
 267             int next = (sum >= threshold)? 2 : 1;
 268
 269             // leaves
 270             const Node& leaf = nodes[nId + next];
 271             const Feature& fLeaf = features[leaf.feature];
 272
 273             scaledRect = fLeaf.rect;
 274             threshold = level.rescale(scaledRect, leaf.threshold, (int)(fLeaf.channel > 6)) * fLeaf.rarea;
 275             sum = storage.get(fLeaf.channel, scaledRect);
 276
 277             int lShift = (next - 1) * 2 + ((sum >= threshold) ? 1 : 0);
 278             float impact = leaves[(st * 4) + lShift];
 279
 280             detectionScore += impact;
 281
 282             if (detectionScore <= weak.threshold) return;
 283         }
 284
 285         if (detectionScore > 0)
 286             level.addDetection(dx, dy, detectionScore, detections);
 287     }
 288
 289     octIt_t fitOctave(const float& logFactor)
 290     {
 291         float minAbsLog = FLT_MAX;
 292         octIt_t res =  octaves.begin();
 293         for (octIt_t oct = octaves.begin(); oct < octaves.end(); ++oct)
 294         {
 295             const SOctave& octave =*oct;
 296             float logOctave = std::log(octave.scale);
 297             float logAbsScale = fabs(logFactor - logOctave);
 298
 299             if(logAbsScale < minAbsLog)
 300             {
 301                 res = oct;
 302                 minAbsLog = logAbsScale;
 303             }
 304         }
 305         return res;
 306     }
 307
 308     // compute levels of full pyramid
 309     void calcLevels(const cv::Size& curr, float mins, float maxs, int total)
 310     {
 311         if (frameSize == curr && maxs == maxScale && mins == minScale && total == scales) return;
 312
 313         frameSize = curr;
 314         maxScale = maxs; minScale = mins; scales = total;
 315         CV_Assert(scales > 1);
 316
 317         levels.clear();
 318         float logFactor = (std::log(maxScale) - std::log(minScale)) / (scales -1);
 319
 320         float scale = minScale;
 321         for (int sc = 0; sc < scales; ++sc)
 322         {
 323             int width  = static_cast<int>(std::max(0.0f, frameSize.width  - (origObjWidth  * scale)));
 324             int height = static_cast<int>(std::max(0.0f, frameSize.height - (origObjHeight * scale)));
 325
 326             float logScale = std::log(scale);
 327             octIt_t fit = fitOctave(logScale);
 328
 329
 330             Level level(*fit, scale, shrinkage, width, height);
 331
 332             if (!width || !height)
 333                 break;
 334             else
 335                 levels.push_back(level);
 336
 337             if (fabs(scale - maxScale) < FLT_EPSILON) break;
 338             scale = std::min(maxScale, expf(std::log(scale) + logFactor));
 339         }
 340     }
 341
 342     bool fill(const FileNode &root)
 343     {
 344         // cascade properties
 345         static const char *const SC_STAGE_TYPE       = "stageType";
 346         static const char *const SC_BOOST            = "BOOST";
 347
 348         static const char *const SC_FEATURE_TYPE     = "featureType";
 349         static const char *const SC_HOG6_MAG_LUV     = "HOG6MagLuv";
 350         static const char *const SC_ICF              = "ICF";
 351
 352         static const char *const SC_ORIG_W           = "width";
 353         static const char *const SC_ORIG_H           = "height";
 354
 355         static const char *const SC_OCTAVES          = "octaves";
 356         static const char *const SC_TREES            = "trees";
 357         static const char *const SC_FEATURES         = "features";
 358
 359         static const char *const SC_INTERNAL         = "internalNodes";
 360         static const char *const SC_LEAF             = "leafValues";
 361
 362         static const char *const SC_SHRINKAGE        = "shrinkage";
 363
 364         static const char *const FEATURE_FORMAT      = "featureFormat";
 365
 366         // only Ada Boost supported
 367         String stageTypeStr = (String)root[SC_STAGE_TYPE];
 368         CV_Assert(stageTypeStr == SC_BOOST);
 369
 370         String fformat = (String)root[FEATURE_FORMAT];
 371         bool useBoxes = (fformat == "BOX");
 372
 373         // only HOG-like integral channel features supported
 374         featureTypeStr = (String)root[SC_FEATURE_TYPE];
 375         CV_Assert(featureTypeStr == SC_ICF || featureTypeStr == SC_HOG6_MAG_LUV);
 376
 377         origObjWidth  = (int)root[SC_ORIG_W];
 378         origObjHeight = (int)root[SC_ORIG_H];
 379
 380         shrinkage = (int)root[SC_SHRINKAGE];
 381
 382         FileNode fn = root[SC_OCTAVES];
 383         if (fn.empty()) return false;
 384
 385         // for each octave
 386         FileNodeIterator it = fn.begin(), it_end = fn.end();
 387         for (int octIndex = 0; it != it_end; ++it, ++octIndex)
 388         {
 389             FileNode fns = *it;
 390             SOctave octave(octIndex, cv::Size(origObjWidth, origObjHeight), fns);
 391             CV_Assert(octave.weaks > 0);
 392             octaves.push_back(octave);
 393
 394             FileNode ffs = fns[SC_FEATURES];
 395             if (ffs.empty()) return false;
 396
 397             fns = fns[SC_TREES];
 398             if (fn.empty()) return false;
 399
 400             FileNodeIterator st = fns.begin(), st_end = fns.end();
 401             for (; st != st_end; ++st )
 402             {
 403                 weaks.push_back(Weak(*st));
 404
 405                 fns = (*st)[SC_INTERNAL];
 406                 FileNodeIterator inIt = fns.begin(), inIt_end = fns.end();
 407                 for (; inIt != inIt_end;)
 408                     nodes.push_back(Node((int)features.size(), inIt));
 409
 410                 fns = (*st)[SC_LEAF];
 411                 inIt = fns.begin(), inIt_end = fns.end();
 412
 413                 for (; inIt != inIt_end; ++inIt)
 414                     leaves.push_back((float)(*inIt));
 415             }
 416
 417             st = ffs.begin(), st_end = ffs.end();
 418             for (; st != st_end; ++st )
 419                 features.push_back(Feature(*st, useBoxes));
 420         }
 421
 422         return true;
 423     }
 424 };
 425
 426 cv::softcascade::Detector::Detector(const double mins, const double maxs, const int nsc, const int rej)
 427 : fields(0), minScale(mins), maxScale(maxs), scales(nsc), rejCriteria(rej) {}
 428
 429 cv::softcascade::Detector::~Detector() { delete fields;}
 430
 431 void cv::softcascade::Detector::read(const cv::FileNode& fn)
 432 {
 433     Algorithm::read(fn);
 434 }
 435
 436 bool cv::softcascade::Detector::load(const cv::FileNode& fn)
 437 {
 438     if (fields) delete fields;
 439
 440     fields = new Fields;
 441     return fields->fill(fn);
 442 }
 443
 444 namespace {
 445
 446 using cv::softcascade::Detection;
 447 typedef std::vector<Detection>  dvector;
 448
 449
 450 struct ConfidenceGt
 451 {
 452     bool operator()(const Detection& a, const Detection& b) const
 453     {
 454         return a.confidence > b.confidence;
 455     }
 456 };
 457
 458 static float overlap(const cv::Rect &a, const cv::Rect &b)
 459 {
 460     int w = std::min(a.x + a.width,  b.x + b.width)  - std::max(a.x, b.x);
 461     int h = std::min(a.y + a.height, b.y + b.height) - std::max(a.y, b.y);
 462
 463     return (w < 0 || h < 0)? 0.f : (float)(w * h);
 464 }
 465
 466 void DollarNMS(dvector& objects)
 467 {
 468     static const float DollarThreshold = 0.65f;
 469     std::sort(objects.begin(), objects.end(), ConfidenceGt());
 470
 471     for (dvector::iterator dIt = objects.begin(); dIt != objects.end(); ++dIt)
 472     {
 473         const Detection &a = *dIt;
 474         for (dvector::iterator next = dIt + 1; next != objects.end(); )
 475         {
 476             const Detection &b = *next;
 477
 478             const float ovl =  overlap(a.bb(), b.bb()) / std::min(a.bb().area(), b.bb().area());
 479
 480             if (ovl > DollarThreshold)
 481                 next = objects.erase(next);
 482             else
 483                 ++next;
 484         }
 485     }
 486 }
 487
 488 static void suppress(int type, std::vector<Detection>& objects)
 489 {
 490     CV_Assert(type == cv::softcascade::Detector::DOLLAR);
 491     DollarNMS(objects);
 492 }
 493
 494 }
 495
 496 void cv::softcascade::Detector::detectNoRoi(const cv::Mat& image, std::vector<Detection>& objects) const
 497 {
 498     Fields& fld = *fields;
 499     // create integrals
 500     ChannelStorage storage(image, fld.shrinkage, fld.featureTypeStr);
 501
 502     typedef std::vector<Level>::const_iterator lIt;
 503     for (lIt it = fld.levels.begin(); it != fld.levels.end(); ++it)
 504     {
 505         const Level& level = *it;
 506
 507         // we train only 3 scales.
 508         if (level.origScale > 2.5) break;
 509
 510         for (int dy = 0; dy < level.workRect.height; ++dy)
 511         {
 512             for (int dx = 0; dx < level.workRect.width; ++dx)
 513             {
 514                 storage.offset = (int)(dy * storage.step + dx);
 515                 fld.detectAt(dx, dy, level, storage, objects);
 516             }
 517         }
 518     }
 519
 520     if (rejCriteria != NO_REJECT) suppress(rejCriteria, objects);
 521 }
 522
 523 void cv::softcascade::Detector::detect(cv::InputArray _image, cv::InputArray _rois, std::vector<Detection>& objects) const
 524 {
 525     // only color images are suppered
 526     cv::Mat image = _image.getMat();
 527     CV_Assert(image.type() == CV_8UC3);
 528
 529     Fields& fld = *fields;
 530     fld.calcLevels(image.size(),(float) minScale, (float)maxScale, scales);
 531
 532     objects.clear();
 533
 534     if (_rois.empty())
 535         return detectNoRoi(image, objects);
 536
 537     int shr = fld.shrinkage;
 538
 539     cv::Mat roi = _rois.getMat();
 540     cv::Mat mask(image.rows / shr, image.cols / shr, CV_8UC1);
 541
 542     mask.setTo(cv::Scalar::all(0));
 543     cv::Rect* r = roi.ptr<cv::Rect>(0);
 544     for (int i = 0; i < (int)roi.cols; ++i)
 545         cv::Mat(mask, cv::Rect(r[i].x / shr, r[i].y / shr, r[i].width / shr , r[i].height / shr)).setTo(cv::Scalar::all(1));
 546
 547     // create integrals
 548     ChannelStorage storage(image, shr, fld.featureTypeStr);
 549
 550     typedef std::vector<Level>::const_iterator lIt;
 551     for (lIt it = fld.levels.begin(); it != fld.levels.end(); ++it)
 552     {
 553          const Level& level = *it;
 554
 555         // we train only 3 scales.
 556         if (level.origScale > 2.5) break;
 557
 558          for (int dy = 0; dy < level.workRect.height; ++dy)
 559          {
 560              uchar* m  = mask.ptr<uchar>(dy);
 561              for (int dx = 0; dx < level.workRect.width; ++dx)
 562              {
 563                  if (m[dx])
 564                  {
 565                      storage.offset = (int)(dy * storage.step + dx);
 566                      fld.detectAt(dx, dy, level, storage, objects);
 567                  }
 568              }
 569          }
 570     }
 571
 572     if (rejCriteria != NO_REJECT) suppress(rejCriteria, objects);
 573 }
 574
 575 void cv::softcascade::Detector::detect(InputArray _image, InputArray _rois,  OutputArray _rects, OutputArray _confs) const
 576 {
 577     std::vector<Detection> objects;
 578     detect( _image, _rois, objects);
 579
 580     _rects.create(1, (int)objects.size(), CV_32SC4);
 581     cv::Mat_<cv::Rect> rects = (cv::Mat_<cv::Rect>)_rects.getMat();
 582     cv::Rect* rectPtr = rects.ptr<cv::Rect>(0);
 583
 584     _confs.create(1, (int)objects.size(), CV_32F);
 585     cv::Mat confs = _confs.getMat();
 586     float* confPtr = confs.ptr<float>(0);
 587
 588     typedef std::vector<Detection>::const_iterator IDet;
 589
 590     int i = 0;
 591     for (IDet it = objects.begin(); it != objects.end(); ++it, ++i)
 592     {
 593         rectPtr[i] = (*it).bb();
 594         confPtr[i] = (*it).confidence;
 595     }
 596 }