2 #include "cascadeclassifier.h"
11 const double eps = 1e-5;
13 val = max( val, eps );
14 val = min( val, 1. - eps );
15 return log( val/(1. - val) );
18 #define CV_CMP_FLT(i,j) (i < j)
19 static CV_IMPLEMENT_QSORT_EX( icvSortFlt, float, CV_CMP_FLT, const float* )
21 #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j])
22 static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* )
23 static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* )
25 #define CV_THRESHOLD_EPS (0.00001F)
27 static const int MinBlockSize = 1 << 16;
28 static const int BlockSizeDelta = 1 << 10;
30 // TODO remove this code duplication with ml/precomp.hpp
32 static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
34 return *(const int*)a - *(const int*)b;
37 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
41 CV_FUNCNAME( "cvPreprocessIndexArray" );
45 int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
50 if( !CV_IS_MAT(idx_arr) )
51 CV_ERROR( CV_StsBadArg, "Invalid index array" );
53 if( idx_arr->rows != 1 && idx_arr->cols != 1 )
54 CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
56 idx_total = idx_arr->rows + idx_arr->cols - 1;
57 srcb = idx_arr->data.ptr;
58 srci = idx_arr->data.i;
60 type = CV_MAT_TYPE(idx_arr->type);
61 step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
67 // idx_arr is array of 1's and 0's -
68 // i.e. it is a mask of the selected components
69 if( idx_total != data_arr_size )
70 CV_ERROR( CV_StsUnmatchedSizes,
71 "Component mask should contain as many elements as the total number of input variables" );
73 for( i = 0; i < idx_total; i++ )
74 idx_selected += srcb[i*step] != 0;
76 if( idx_selected == 0 )
77 CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
81 // idx_arr is array of integer indices of selected components
82 if( idx_total > data_arr_size )
83 CV_ERROR( CV_StsOutOfRange,
84 "index array may not contain more elements than the total number of input variables" );
85 idx_selected = idx_total;
86 // check if sorted already
87 for( i = 0; i < idx_total; i++ )
89 int val = srci[i*step];
99 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
100 "(it should be 8uC1, 8sC1 or 32sC1)" );
103 CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
106 if( type < CV_32SC1 )
108 for( i = 0; i < idx_total; i++ )
114 for( i = 0; i < idx_total; i++ )
115 dsti[i] = srci[i*step];
118 qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
120 if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
121 CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
123 if( check_for_duplicates )
125 for( i = 1; i < idx_total; i++ )
126 if( dsti[i] <= dsti[i-1] )
127 CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
133 if( cvGetErrStatus() < 0 )
134 cvReleaseMat( &idx );
139 //----------------------------- CascadeBoostParams -------------------------------------------------
141 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
143 boost_type = CvBoost::GENTLE;
144 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
147 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
148 float _minHitRate, float _maxFalseAlarm,
149 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
150 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
152 boost_type = CvBoost::GENTLE;
153 minHitRate = _minHitRate;
154 maxFalseAlarm = _maxFalseAlarm;
155 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
158 void CvCascadeBoostParams::write( FileStorage &fs ) const
160 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
161 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
162 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
163 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
164 CV_Assert( !boostTypeStr.empty() );
165 fs << CC_BOOST_TYPE << boostTypeStr;
166 fs << CC_MINHITRATE << minHitRate;
167 fs << CC_MAXFALSEALARM << maxFalseAlarm;
168 fs << CC_TRIM_RATE << weight_trim_rate;
169 fs << CC_MAX_DEPTH << max_depth;
170 fs << CC_WEAK_COUNT << weak_count;
173 bool CvCascadeBoostParams::read( const FileNode &node )
176 FileNode rnode = node[CC_BOOST_TYPE];
177 rnode >> boostTypeStr;
178 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
179 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
180 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
181 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
182 if (boost_type == -1)
183 CV_Error( CV_StsBadArg, "unsupported Boost type" );
184 node[CC_MINHITRATE] >> minHitRate;
185 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
186 node[CC_TRIM_RATE] >> weight_trim_rate ;
187 node[CC_MAX_DEPTH] >> max_depth ;
188 node[CC_WEAK_COUNT] >> weak_count ;
189 if ( minHitRate <= 0 || minHitRate > 1 ||
190 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
191 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
192 max_depth <= 0 || weak_count <= 0 )
193 CV_Error( CV_StsBadArg, "bad parameters range");
197 void CvCascadeBoostParams::printDefaults() const
199 cout << "--boostParams--" << endl;
200 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
201 << CC_REAL_BOOST << ", "
202 << CC_LOGIT_BOOST ", "
203 << CC_GENTLE_BOOST << "(default)}>]" << endl;
204 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
205 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
206 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
207 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
208 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
211 void CvCascadeBoostParams::printAttrs() const
213 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
214 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
215 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
216 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
217 CV_Assert( !boostTypeStr.empty() );
218 cout << "boostType: " << boostTypeStr << endl;
219 cout << "minHitRate: " << minHitRate << endl;
220 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
221 cout << "weightTrimRate: " << weight_trim_rate << endl;
222 cout << "maxDepth: " << max_depth << endl;
223 cout << "maxWeakCount: " << weak_count << endl;
226 bool CvCascadeBoostParams::scanAttr( const String prmName, const String val)
230 if( !prmName.compare( "-bt" ) )
232 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
233 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
234 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
235 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
236 if (boost_type == -1)
239 else if( !prmName.compare( "-minHitRate" ) )
241 minHitRate = (float) atof( val.c_str() );
243 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
245 maxFalseAlarm = (float) atof( val.c_str() );
247 else if( !prmName.compare( "-weightTrimRate" ) )
249 weight_trim_rate = (float) atof( val.c_str() );
251 else if( !prmName.compare( "-maxDepth" ) )
253 max_depth = atoi( val.c_str() );
255 else if( !prmName.compare( "-maxWeakCount" ) )
257 weak_count = atoi( val.c_str() );
265 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
267 CvDTreeNode* root = 0;
268 CvMat* isubsample_idx = 0;
269 CvMat* subsample_co = 0;
271 bool isMakeRootCopy = true;
274 CV_Error( CV_StsError, "No training data has been set" );
278 CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
280 if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
282 const int* sidx = isubsample_idx->data.i;
283 for( int i = 0; i < sample_count; i++ )
287 isMakeRootCopy = false;
293 isMakeRootCopy = false;
298 // make a copy of the root node
301 root = new_node( 0, 1, 0, 0 );
304 root->num_valid = temp.num_valid;
305 if( root->num_valid )
307 for( i = 0; i < var_count; i++ )
308 root->num_valid[i] = data_root->num_valid[i];
310 root->cv_Tn = temp.cv_Tn;
311 root->cv_node_risk = temp.cv_node_risk;
312 root->cv_node_error = temp.cv_node_error;
316 int* sidx = isubsample_idx->data.i;
317 // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
318 int* co, cur_ofs = 0;
319 int workVarCount = get_work_var_count();
320 int count = isubsample_idx->rows + isubsample_idx->cols - 1;
322 root = new_node( 0, count, 1, 0 );
324 CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
325 cvZero( subsample_co );
326 co = subsample_co->data.i;
327 for( int i = 0; i < count; i++ )
329 for( int i = 0; i < sample_count; i++ )
340 cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
341 // subsample ordered variables
342 for( int vi = 0; vi < numPrecalcIdx; vi++ )
344 int ci = get_var_type(vi);
347 int *src_idx_buf = (int*)(uchar*)inn_buf;
348 float *src_val_buf = (float*)(src_idx_buf + sample_count);
349 int* sample_indices_buf = (int*)(src_val_buf + sample_count);
350 const int* src_idx = 0;
351 const float* src_val = 0;
352 get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
354 int j = 0, idx, count_i;
355 int num_valid = data_root->get_num_valid(vi);
356 CV_Assert( num_valid == sample_count );
360 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
361 vi*sample_count + data_root->offset);
362 for( int i = 0; i < num_valid; i++ )
367 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
368 udst_idx[j] = (unsigned short)cur_ofs;
373 int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
374 vi*sample_count + root->offset;
375 for( int i = 0; i < num_valid; i++ )
380 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
381 idst_idx[j] = cur_ofs;
386 // subsample cv_lables
387 const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
390 unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
391 (workVarCount-1)*sample_count + root->offset);
392 for( int i = 0; i < count; i++ )
393 udst[i] = (unsigned short)src_lbls[sidx[i]];
397 int* idst = buf->data.i + root->buf_idx*buf->cols +
398 (workVarCount-1)*sample_count + root->offset;
399 for( int i = 0; i < count; i++ )
400 idst[i] = src_lbls[sidx[i]];
403 // subsample sample_indices
404 const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
407 unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
408 workVarCount*sample_count + root->offset);
409 for( int i = 0; i < count; i++ )
410 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
414 int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
415 workVarCount*sample_count + root->offset;
416 for( int i = 0; i < count; i++ )
417 sample_idx_dst[i] = sample_idx_src[sidx[i]];
420 for( int vi = 0; vi < var_count; vi++ )
421 root->set_num_valid(vi, count);
424 cvReleaseMat( &isubsample_idx );
425 cvReleaseMat( &subsample_co );
430 //---------------------------- CascadeBoostTrainData -----------------------------
432 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
433 const CvDTreeParams& _params )
435 is_classifier = true;
436 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
438 featureEvaluator = _featureEvaluator;
440 set_params( _params );
441 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
442 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
443 if ( featureEvaluator->getMaxCatCount() > 0 )
446 cat_var_count = var_count;
448 for( int vi = 0; vi < var_count; vi++ )
450 var_type->data.i[vi] = vi;
456 ord_var_count = var_count;
457 for( int vi = 1; vi <= var_count; vi++ )
459 var_type->data.i[vi-1] = -vi;
462 var_type->data.i[var_count] = cat_var_count;
463 var_type->data.i[var_count+1] = cat_var_count+1;
465 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
466 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
467 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
468 tree_storage = cvCreateMemStorage( treeBlockSize );
469 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
470 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
473 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
475 int _precalcValBufSize, int _precalcIdxBufSize,
476 const CvDTreeParams& _params )
478 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
481 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
483 int _precalcValBufSize, int _precalcIdxBufSize,
484 const CvDTreeParams& _params )
487 unsigned short* udst = 0;
493 is_classifier = true;
497 set_params( _params );
499 CV_Assert( _featureEvaluator );
500 featureEvaluator = _featureEvaluator;
502 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
503 _resp = featureEvaluator->getCls();
505 // TODO: check responses: elements must be 0 or 1
507 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
508 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
510 var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
511 sample_count = _numSamples;
514 if (sample_count < 65536)
517 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
518 numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
519 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
521 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
523 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
524 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
526 if ( featureEvaluator->getMaxCatCount() > 0 )
529 cat_var_count = var_count;
531 for( int vi = 0; vi < var_count; vi++ )
533 var_type->data.i[vi] = vi;
539 ord_var_count = var_count;
540 for( int vi = 1; vi <= var_count; vi++ )
542 var_type->data.i[vi-1] = -vi;
545 var_type->data.i[var_count] = cat_var_count;
546 var_type->data.i[var_count+1] = cat_var_count+1;
547 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
548 buf_size = (work_var_count + 1) * sample_count/*sample_indices*/;
552 buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
554 buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
556 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
558 // precalculate valCache and set indices in buf
561 // now calculate the maximum size of split,
562 // create memory storage that will keep nodes and splits of the decision tree
563 // allocate root node and the buffer for the whole training data
564 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
565 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
566 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
567 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
568 tree_storage = cvCreateMemStorage( treeBlockSize );
569 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
571 int nvSize = var_count*sizeof(int);
572 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
573 int tempBlockSize = nvSize;
574 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
575 temp_storage = cvCreateMemStorage( tempBlockSize );
576 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
578 data_root = new_node( 0, sample_count, 0, 0 );
582 udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
584 idst = buf->data.i + work_var_count*sample_count;
586 for (int si = 0; si < sample_count; si++)
589 udst[si] = (unsigned short)si;
593 for( int vi = 0; vi < var_count; vi++ )
594 data_root->set_num_valid(vi, sample_count);
595 for( int vi = 0; vi < cat_var_count; vi++ )
596 cat_count->data.i[vi] = max_c_count;
598 cat_count->data.i[cat_var_count] = 2;
600 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
601 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
602 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
604 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
605 cvSet(priors, cvScalar(1));
606 priors_mult = cvCloneMat( priors );
607 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
608 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
609 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
612 void CvCascadeBoostTrainData::free_train_data()
614 CvDTreeTrainData::free_train_data();
618 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
620 int nodeSampleCount = n->sample_count;
621 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
623 int* sampleIndicesBuf = labelsBuf; //
624 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
625 for( int si = 0; si < nodeSampleCount; si++ )
627 int sidx = sampleIndices[si];
628 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
633 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
635 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
638 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
640 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
643 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
644 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
646 int nodeSampleCount = n->sample_count;
647 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
649 if ( vi < numPrecalcIdx )
652 *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
655 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
656 vi*sample_count + n->offset );
657 for( int i = 0; i < nodeSampleCount; i++ )
658 sortedIndicesBuf[i] = shortIndices[i];
660 *sortedIndices = sortedIndicesBuf;
663 if( vi < numPrecalcVal )
665 for( int i = 0; i < nodeSampleCount; i++ )
667 int idx = (*sortedIndices)[i];
668 idx = sampleIndices[idx];
669 ordValuesBuf[i] = valCache.at<float>( vi, idx);
674 for( int i = 0; i < nodeSampleCount; i++ )
676 int idx = (*sortedIndices)[i];
677 idx = sampleIndices[idx];
678 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
682 else // vi >= numPrecalcIdx
684 cv::AutoBuffer<float> abuf(nodeSampleCount);
685 float* sampleValues = &abuf[0];
687 if ( vi < numPrecalcVal )
689 for( int i = 0; i < nodeSampleCount; i++ )
691 sortedIndicesBuf[i] = i;
692 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
697 for( int i = 0; i < nodeSampleCount; i++ )
699 sortedIndicesBuf[i] = i;
700 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
703 icvSortIntAux( sortedIndicesBuf, nodeSampleCount, &sampleValues[0] );
704 for( int i = 0; i < nodeSampleCount; i++ )
705 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
706 *sortedIndices = sortedIndicesBuf;
709 *ordValues = ordValuesBuf;
712 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
714 int nodeSampleCount = n->sample_count;
715 int* sampleIndicesBuf = catValuesBuf; //
716 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
718 if ( vi < numPrecalcVal )
720 for( int i = 0; i < nodeSampleCount; i++ )
721 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
725 if( vi >= numPrecalcVal && vi < var_count )
727 for( int i = 0; i < nodeSampleCount; i++ )
728 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
732 get_cv_labels( n, catValuesBuf );
739 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
741 if ( vi < numPrecalcVal && !valCache.empty() )
742 return valCache.at<float>( vi, si );
743 return (*featureEvaluator)( vi, si );
747 struct FeatureIdxOnlyPrecalc
749 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
751 featureEvaluator = _featureEvaluator;
752 sample_count = _sample_count;
753 udst = (unsigned short*)_buf->data.s;
755 is_buf_16u = _is_buf_16u;
757 void operator()( const BlockedRange& range ) const
759 cv::AutoBuffer<float> valCache(sample_count);
760 float* valCachePtr = (float*)valCache;
761 for ( int fi = range.begin(); fi < range.end(); fi++)
763 for( int si = 0; si < sample_count; si++ )
765 valCachePtr[si] = (*featureEvaluator)( fi, si );
767 *(udst + fi*sample_count + si) = (unsigned short)si;
769 *(idst + fi*sample_count + si) = si;
772 icvSortUShAux( udst + fi*sample_count, sample_count, valCachePtr );
774 icvSortIntAux( idst + fi*sample_count, sample_count, valCachePtr );
777 const CvFeatureEvaluator* featureEvaluator;
780 unsigned short* udst;
784 struct FeatureValAndIdxPrecalc
786 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
788 featureEvaluator = _featureEvaluator;
789 valCache = _valCache;
790 sample_count = _sample_count;
791 udst = (unsigned short*)_buf->data.s;
793 is_buf_16u = _is_buf_16u;
795 void operator()( const BlockedRange& range ) const
797 for ( int fi = range.begin(); fi < range.end(); fi++)
799 for( int si = 0; si < sample_count; si++ )
801 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
803 *(udst + fi*sample_count + si) = (unsigned short)si;
805 *(idst + fi*sample_count + si) = si;
808 icvSortUShAux( udst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
810 icvSortIntAux( idst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
813 const CvFeatureEvaluator* featureEvaluator;
817 unsigned short* udst;
821 struct FeatureValOnlyPrecalc
823 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
825 featureEvaluator = _featureEvaluator;
826 valCache = _valCache;
827 sample_count = _sample_count;
829 void operator()( const BlockedRange& range ) const
831 for ( int fi = range.begin(); fi < range.end(); fi++)
832 for( int si = 0; si < sample_count; si++ )
833 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
835 const CvFeatureEvaluator* featureEvaluator;
840 void CvCascadeBoostTrainData::precalculate()
842 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
844 double proctime = -TIME( 0 );
845 parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
846 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
847 parallel_for( BlockedRange(0, minNum),
848 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
849 parallel_for( BlockedRange(minNum, numPrecalcVal),
850 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
851 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
854 //-------------------------------- CascadeBoostTree ----------------------------------------
856 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
858 CvDTreeNode* node = root;
860 CV_Error( CV_StsError, "The tree has not been trained yet" );
862 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
866 CvDTreeSplit* split = node->split;
867 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
868 node = val <= split->ord.c ? node->left : node->right;
875 CvDTreeSplit* split = node->split;
876 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
877 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
883 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
885 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
886 int subsetN = (maxCatCount + 31)/32;
887 queue<CvDTreeNode*> internalNodesQueue;
888 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
889 Ptr<float> leafVals = new float[size];
891 int internalNodeIdx = 1;
892 CvDTreeNode* tempNode;
894 CV_DbgAssert( root );
895 internalNodesQueue.push( root );
898 fs << CC_INTERNAL_NODES << "[:";
899 while (!internalNodesQueue.empty())
901 tempNode = internalNodesQueue.front();
902 CV_Assert( tempNode->left );
903 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
905 leafVals[-leafValIdx] = (float)tempNode->left->value;
910 internalNodesQueue.push( tempNode->left );
911 fs << internalNodeIdx++;
913 CV_Assert( tempNode->right );
914 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
916 leafVals[-leafValIdx] = (float)tempNode->right->value;
921 internalNodesQueue.push( tempNode->right );
922 fs << internalNodeIdx++;
924 int fidx = tempNode->split->var_idx;
925 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
928 fs << tempNode->split->ord.c;
930 for( int i = 0; i < subsetN; i++ )
931 fs << tempNode->split->subset[i];
932 internalNodesQueue.pop();
934 fs << "]"; // CC_INTERNAL_NODES
936 fs << CC_LEAF_VALUES << "[:";
937 for (int ni = 0; ni < -leafValIdx; ni++)
939 fs << "]"; // CC_LEAF_VALUES
943 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
944 CvDTreeTrainData* _data )
946 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
947 int subsetN = (maxCatCount + 31)/32;
948 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
950 queue<CvDTreeNode*> internalNodesQueue;
951 FileNodeIterator internalNodesIt, leafValsuesIt;
952 CvDTreeNode* prntNode, *cldNode;
956 ensemble = _ensemble;
960 FileNode rnode = node[CC_INTERNAL_NODES];
961 internalNodesIt = rnode.end();
962 leafValsuesIt = node[CC_LEAF_VALUES].end();
963 internalNodesIt--; leafValsuesIt--;
964 for( size_t i = 0; i < rnode.size()/step; i++ )
966 prntNode = data->new_node( 0, 0, 0, 0 );
967 if ( maxCatCount > 0 )
969 prntNode->split = data->new_split_cat( 0, 0 );
970 for( int j = subsetN-1; j>=0; j--)
972 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
978 *internalNodesIt >> split_value; internalNodesIt--;
979 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
981 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
983 *internalNodesIt >> ridx; internalNodesIt--;
984 *internalNodesIt >> lidx;internalNodesIt--;
987 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
988 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
989 cldNode->parent = prntNode;
993 prntNode->right = internalNodesQueue.front();
994 prntNode->right->parent = prntNode;
995 internalNodesQueue.pop();
1000 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1001 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1002 cldNode->parent = prntNode;
1006 prntNode->left = internalNodesQueue.front();
1007 prntNode->left->parent = prntNode;
1008 internalNodesQueue.pop();
1011 internalNodesQueue.push( prntNode );
1014 root = internalNodesQueue.front();
1015 internalNodesQueue.pop();
1018 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1020 int n = node->sample_count, nl, nr, scount = data->sample_count;
1021 char* dir = (char*)data->direction->data.ptr;
1022 CvDTreeNode *left = 0, *right = 0;
1023 int* newIdx = data->split_buf->data.i;
1024 int newBufIdx = data->get_child_buf_idx( node );
1025 int workVarCount = data->get_work_var_count();
1026 CvMat* buf = data->buf;
1027 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1028 int* tempBuf = (int*)(uchar*)inn_buf;
1029 bool splitInputData;
1031 complete_node_dir(node);
1033 for( int i = nl = nr = 0; i < n; i++ )
1036 // initialize new indices for splitting ordered variables
1037 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1042 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1043 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1045 splitInputData = node->depth + 1 < data->params.max_depth &&
1046 (node->left->sample_count > data->params.min_sample_count ||
1047 node->right->sample_count > data->params.min_sample_count);
1049 // split ordered variables, keep both halves sorted.
1050 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1052 int ci = data->get_var_type(vi);
1053 if( ci >= 0 || !splitInputData )
1056 int n1 = node->get_num_valid(vi);
1057 float *src_val_buf = (float*)(tempBuf + n);
1058 int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1059 int *src_sample_idx_buf = src_sorted_idx_buf + n;
1060 const int* src_sorted_idx = 0;
1061 const float* src_val = 0;
1062 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1064 for(int i = 0; i < n; i++)
1065 tempBuf[i] = src_sorted_idx[i];
1067 if (data->is_buf_16u)
1069 ushort *ldst, *rdst;
1070 ldst = (ushort*)(buf->data.s + left->buf_idx*buf->cols +
1071 vi*scount + left->offset);
1072 rdst = (ushort*)(ldst + nl);
1075 for( int i = 0; i < n1; i++ )
1077 int idx = tempBuf[i];
1082 *rdst = (ushort)idx;
1087 *ldst = (ushort)idx;
1091 CV_Assert( n1 == n );
1096 ldst = buf->data.i + left->buf_idx*buf->cols +
1097 vi*scount + left->offset;
1098 rdst = buf->data.i + right->buf_idx*buf->cols +
1099 vi*scount + right->offset;
1102 for( int i = 0; i < n1; i++ )
1104 int idx = tempBuf[i];
1118 CV_Assert( n1 == n );
1122 // split cv_labels using newIdx relocation table
1123 int *src_lbls_buf = tempBuf + n;
1124 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1126 for(int i = 0; i < n; i++)
1127 tempBuf[i] = src_lbls[i];
1129 if (data->is_buf_16u)
1131 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
1132 (workVarCount-1)*scount + left->offset);
1133 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
1134 (workVarCount-1)*scount + right->offset);
1136 for( int i = 0; i < n; i++ )
1138 int idx = tempBuf[i];
1141 *rdst = (unsigned short)idx;
1146 *ldst = (unsigned short)idx;
1154 int *ldst = buf->data.i + left->buf_idx*buf->cols +
1155 (workVarCount-1)*scount + left->offset;
1156 int *rdst = buf->data.i + right->buf_idx*buf->cols +
1157 (workVarCount-1)*scount + right->offset;
1159 for( int i = 0; i < n; i++ )
1161 int idx = tempBuf[i];
1175 // split sample indices
1176 int *sampleIdx_src_buf = tempBuf + n;
1177 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1179 for(int i = 0; i < n; i++)
1180 tempBuf[i] = sampleIdx_src[i];
1182 if (data->is_buf_16u)
1184 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
1185 workVarCount*scount + left->offset);
1186 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
1187 workVarCount*scount + right->offset);
1188 for (int i = 0; i < n; i++)
1190 unsigned short idx = (unsigned short)tempBuf[i];
1205 int* ldst = buf->data.i + left->buf_idx*buf->cols +
1206 workVarCount*scount + left->offset;
1207 int* rdst = buf->data.i + right->buf_idx*buf->cols +
1208 workVarCount*scount + right->offset;
1209 for (int i = 0; i < n; i++)
1211 int idx = tempBuf[i];
1225 for( int vi = 0; vi < data->var_count; vi++ )
1227 left->set_num_valid(vi, (int)(nl));
1228 right->set_num_valid(vi, (int)(nr));
1231 // deallocate the parent node data that is not needed anymore
1232 data->free_node_data(node);
1235 void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1237 if ( node && node->split )
1239 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1240 auxMarkFeaturesInMap( node->left, featureMap );
1241 auxMarkFeaturesInMap( node->right, featureMap );
1245 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1247 auxMarkFeaturesInMap( root, featureMap );
1250 //----------------------------------- CascadeBoost --------------------------------------
1252 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1254 int _precalcValBufSize, int _precalcIdxBufSize,
1255 const CvCascadeBoostParams& _params )
1259 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1260 _precalcValBufSize, _precalcIdxBufSize, _params );
1261 CvMemStorage *storage = cvCreateMemStorage();
1262 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1265 set_params( _params );
1266 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1267 data->do_responses_copy();
1269 update_weights( 0 );
1271 cout << "+----+---------+---------+" << endl;
1272 cout << "| N | HR | FA |" << endl;
1273 cout << "+----+---------+---------+" << endl;
1277 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1278 if( !tree->train( data, subsample_mask, this ) )
1283 cvSeqPush( weak, &tree );
1284 update_weights( tree );
1286 if( cvCountNonZero(subsample_mask) == 0 )
1289 while( !isErrDesired() && (weak->total < params.weak_count) );
1291 data->is_classifier = true;
1292 data->free_train_data();
1296 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1301 cvStartReadSeq( weak, &reader );
1302 cvSetSeqReaderPos( &reader, 0 );
1303 for( int i = 0; i < weak->total; i++ )
1306 CV_READ_SEQ_ELEM( wtree, reader );
1307 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1310 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1314 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1316 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1317 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1318 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1319 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1320 CvBoost::set_params( _params ));
1323 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1325 int n = data->sample_count;
1330 const int* sampleIdx = 0;
1331 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1332 ( !tree ? n*sizeof(int) : 0 );
1333 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1334 uchar* cur_inn_buf_pos = (uchar*)inn_buf;
1335 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1337 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1338 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1339 fdata = data->responses_copy->data.fl;
1340 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1341 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1343 CvMat* buf = data->buf;
1344 if( !tree ) // before training the first tree, initialize weights and other parameters
1346 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1347 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1348 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1349 // so we need to convert class labels to floating-point values
1351 double p[2] = { 1, 1 };
1353 cvReleaseMat( &orig_response );
1354 cvReleaseMat( &sum_response );
1355 cvReleaseMat( &weak_eval );
1356 cvReleaseMat( &subsample_mask );
1357 cvReleaseMat( &weights );
1359 orig_response = cvCreateMat( 1, n, CV_32S );
1360 weak_eval = cvCreateMat( 1, n, CV_64F );
1361 subsample_mask = cvCreateMat( 1, n, CV_8U );
1362 weights = cvCreateMat( 1, n, CV_64F );
1363 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1365 if (data->is_buf_16u)
1367 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
1368 data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1369 for( int i = 0; i < n; i++ )
1371 // save original categorical responses {0,1}, convert them to {-1,1}
1372 orig_response->data.i[i] = classLabels[i]*2 - 1;
1373 // make all the samples active at start.
1374 // later, in trim_weights() deactivate/reactive again some, if need
1375 subsample_mask->data.ptr[i] = (uchar)1;
1376 // make all the initial weights the same.
1377 weights->data.db[i] = w0*p[classLabels[i]];
1378 // set the labels to find (from within weak tree learning proc)
1379 // the particular sample weight, and where to store the response.
1380 labels[i] = (unsigned short)i;
1385 int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
1386 data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1388 for( int i = 0; i < n; i++ )
1390 // save original categorical responses {0,1}, convert them to {-1,1}
1391 orig_response->data.i[i] = classLabels[i]*2 - 1;
1392 subsample_mask->data.ptr[i] = (uchar)1;
1393 weights->data.db[i] = w0*p[classLabels[i]];
1398 if( params.boost_type == LOGIT )
1400 sum_response = cvCreateMat( 1, n, CV_64F );
1402 for( int i = 0; i < n; i++ )
1404 sum_response->data.db[i] = 0;
1405 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1408 // in case of logitboost each weak tree is a regression tree.
1409 // the target function values are recalculated for each of the trees
1410 data->is_classifier = false;
1412 else if( params.boost_type == GENTLE )
1414 for( int i = 0; i < n; i++ )
1415 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1417 data->is_classifier = false;
1422 // at this moment, for all the samples that participated in the training of the most
1423 // recent weak classifier we know the responses. For other samples we need to compute them
1424 if( have_subsample )
1426 // invert the subsample mask
1427 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1429 // run tree through all the non-processed samples
1430 for( int i = 0; i < n; i++ )
1431 if( subsample_mask->data.ptr[i] )
1433 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1437 // now update weights and other parameters for each type of boosting
1438 if( params.boost_type == DISCRETE )
1440 // Discrete AdaBoost:
1441 // weak_eval[i] (=f(x_i)) is in {-1,1}
1442 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1443 // C = log((1-err)/err)
1444 // w_i *= exp(C*(f(x_i) != y_i))
1447 double scale[] = { 1., 0. };
1449 for( int i = 0; i < n; i++ )
1451 double w = weights->data.db[i];
1453 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1458 C = err = -logRatio( err );
1459 scale[1] = exp(err);
1462 for( int i = 0; i < n; i++ )
1464 double w = weights->data.db[i]*
1465 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1467 weights->data.db[i] = w;
1472 else if( params.boost_type == REAL )
1475 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1476 // w_i *= exp(-y_i*f(x_i))
1478 for( int i = 0; i < n; i++ )
1479 weak_eval->data.db[i] *= -orig_response->data.i[i];
1481 cvExp( weak_eval, weak_eval );
1483 for( int i = 0; i < n; i++ )
1485 double w = weights->data.db[i]*weak_eval->data.db[i];
1487 weights->data.db[i] = w;
1490 else if( params.boost_type == LOGIT )
1493 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1494 // sum_response = F(x_i).
1495 // F(x_i) += 0.5*f(x_i)
1496 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1497 // reuse weak_eval: weak_eval[i] <- p(x_i)
1498 // w_i = p(x_i)*1(1 - p(x_i))
1499 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1500 // store z_i to the data->data_root as the new target responses
1502 const double lbWeightThresh = FLT_EPSILON;
1503 const double lbZMax = 10.;
1505 for( int i = 0; i < n; i++ )
1507 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1508 sum_response->data.db[i] = s;
1509 weak_eval->data.db[i] = -2*s;
1512 cvExp( weak_eval, weak_eval );
1514 for( int i = 0; i < n; i++ )
1516 double p = 1./(1. + weak_eval->data.db[i]);
1517 double w = p*(1 - p), z;
1518 w = MAX( w, lbWeightThresh );
1519 weights->data.db[i] = w;
1521 if( orig_response->data.i[i] > 0 )
1524 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1529 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1536 // weak_eval[i] = f(x_i) in [-1,1]
1537 // w_i *= exp(-y_i*f(x_i))
1538 assert( params.boost_type == GENTLE );
1540 for( int i = 0; i < n; i++ )
1541 weak_eval->data.db[i] *= -orig_response->data.i[i];
1543 cvExp( weak_eval, weak_eval );
1545 for( int i = 0; i < n; i++ )
1547 double w = weights->data.db[i] * weak_eval->data.db[i];
1548 weights->data.db[i] = w;
1554 // renormalize weights
1555 if( sumW > FLT_EPSILON )
1558 for( int i = 0; i < n; ++i )
1559 weights->data.db[i] *= sumW;
1563 bool CvCascadeBoost::isErrDesired()
1565 int sCount = data->sample_count,
1566 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1567 vector<float> eval(sCount);
1569 for( int i = 0; i < sCount; i++ )
1570 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1571 eval[numPos++] = predict( i, true );
1572 icvSortFlt( &eval[0], numPos, 0 );
1573 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1574 threshold = eval[ thresholdIdx ];
1575 numPosTrue = numPos - thresholdIdx;
1576 for( int i = thresholdIdx - 1; i >= 0; i--)
1577 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1579 float hitRate = ((float) numPosTrue) / ((float) numPos);
1581 for( int i = 0; i < sCount; i++ )
1583 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1590 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1592 cout << "|"; cout.width(4); cout << right << weak->total;
1593 cout << "|"; cout.width(9); cout << right << hitRate;
1594 cout << "|"; cout.width(9); cout << right << falseAlarm;
1595 cout << "|" << endl;
1596 cout << "+----+---------+---------+" << endl;
1598 return falseAlarm <= maxFalseAlarm;
1601 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1604 CvCascadeBoostTree* weakTree;
1605 fs << CC_WEAK_COUNT << weak->total;
1606 fs << CC_STAGE_THRESHOLD << threshold;
1607 fs << CC_WEAK_CLASSIFIERS << "[";
1608 for( int wi = 0; wi < weak->total; wi++)
1610 /*sprintf( cmnt, "tree %i", wi );
1611 cvWriteComment( fs, cmnt, 0 );*/
1612 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1613 weakTree->write( fs, featureMap );
1618 bool CvCascadeBoost::read( const FileNode &node,
1619 const CvFeatureEvaluator* _featureEvaluator,
1620 const CvCascadeBoostParams& _params )
1622 CvMemStorage* storage;
1624 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1625 set_params( _params );
1627 node[CC_STAGE_THRESHOLD] >> threshold;
1628 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1630 storage = cvCreateMemStorage();
1631 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1632 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1634 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1635 tree->read( *it, this, data );
1636 cvSeqPush( weak, &tree );
1641 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1643 for( int wi = 0; wi < weak->total; wi++ )
1645 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1646 weakTree->markFeaturesInMap( featureMap );