1 #include "opencv2/core/core.hpp"
2 #include "opencv2/core/internal.hpp"
13 using cv::FileNodeIterator;
14 using cv::ParallelLoopBody;
18 #include "cascadeclassifier.h"
25 logRatio( double val )
27 const double eps = 1e-5;
29 val = max( val, eps );
30 val = min( val, 1. - eps );
31 return log( val/(1. - val) );
34 #define CV_CMP_FLT(i,j) (i < j)
35 static CV_IMPLEMENT_QSORT_EX( icvSortFlt, float, CV_CMP_FLT, const float* )
37 #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j])
38 static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* )
39 static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* )
41 #define CV_THRESHOLD_EPS (0.00001F)
43 static const int MinBlockSize = 1 << 16;
44 static const int BlockSizeDelta = 1 << 10;
46 // TODO remove this code duplication with ml/precomp.hpp
48 static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
50 return *(const int*)a - *(const int*)b;
53 static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
57 CV_FUNCNAME( "cvPreprocessIndexArray" );
61 int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
66 if( !CV_IS_MAT(idx_arr) )
67 CV_ERROR( CV_StsBadArg, "Invalid index array" );
69 if( idx_arr->rows != 1 && idx_arr->cols != 1 )
70 CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
72 idx_total = idx_arr->rows + idx_arr->cols - 1;
73 srcb = idx_arr->data.ptr;
74 srci = idx_arr->data.i;
76 type = CV_MAT_TYPE(idx_arr->type);
77 step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
83 // idx_arr is array of 1's and 0's -
84 // i.e. it is a mask of the selected components
85 if( idx_total != data_arr_size )
86 CV_ERROR( CV_StsUnmatchedSizes,
87 "Component mask should contain as many elements as the total number of input variables" );
89 for( i = 0; i < idx_total; i++ )
90 idx_selected += srcb[i*step] != 0;
92 if( idx_selected == 0 )
93 CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
97 // idx_arr is array of integer indices of selected components
98 if( idx_total > data_arr_size )
99 CV_ERROR( CV_StsOutOfRange,
100 "index array may not contain more elements than the total number of input variables" );
101 idx_selected = idx_total;
102 // check if sorted already
103 for( i = 0; i < idx_total; i++ )
105 int val = srci[i*step];
115 CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
116 "(it should be 8uC1, 8sC1 or 32sC1)" );
119 CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
122 if( type < CV_32SC1 )
124 for( i = 0; i < idx_total; i++ )
130 for( i = 0; i < idx_total; i++ )
131 dsti[i] = srci[i*step];
134 qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
136 if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
137 CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
139 if( check_for_duplicates )
141 for( i = 1; i < idx_total; i++ )
142 if( dsti[i] <= dsti[i-1] )
143 CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
149 if( cvGetErrStatus() < 0 )
150 cvReleaseMat( &idx );
155 //----------------------------- CascadeBoostParams -------------------------------------------------
157 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
159 boost_type = CvBoost::GENTLE;
160 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
163 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
164 float _minHitRate, float _maxFalseAlarm,
165 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
166 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
168 boost_type = CvBoost::GENTLE;
169 minHitRate = _minHitRate;
170 maxFalseAlarm = _maxFalseAlarm;
171 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
174 void CvCascadeBoostParams::write( FileStorage &fs ) const
176 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
177 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
178 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
179 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
180 CV_Assert( !boostTypeStr.empty() );
181 fs << CC_BOOST_TYPE << boostTypeStr;
182 fs << CC_MINHITRATE << minHitRate;
183 fs << CC_MAXFALSEALARM << maxFalseAlarm;
184 fs << CC_TRIM_RATE << weight_trim_rate;
185 fs << CC_MAX_DEPTH << max_depth;
186 fs << CC_WEAK_COUNT << weak_count;
189 bool CvCascadeBoostParams::read( const FileNode &node )
192 FileNode rnode = node[CC_BOOST_TYPE];
193 rnode >> boostTypeStr;
194 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
195 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
196 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
197 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
198 if (boost_type == -1)
199 CV_Error( CV_StsBadArg, "unsupported Boost type" );
200 node[CC_MINHITRATE] >> minHitRate;
201 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
202 node[CC_TRIM_RATE] >> weight_trim_rate ;
203 node[CC_MAX_DEPTH] >> max_depth ;
204 node[CC_WEAK_COUNT] >> weak_count ;
205 if ( minHitRate <= 0 || minHitRate > 1 ||
206 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
207 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
208 max_depth <= 0 || weak_count <= 0 )
209 CV_Error( CV_StsBadArg, "bad parameters range");
213 void CvCascadeBoostParams::printDefaults() const
215 cout << "--boostParams--" << endl;
216 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
217 << CC_REAL_BOOST << ", "
218 << CC_LOGIT_BOOST ", "
219 << CC_GENTLE_BOOST << "(default)}>]" << endl;
220 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
221 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
222 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
223 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
224 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
227 void CvCascadeBoostParams::printAttrs() const
229 string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
230 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
231 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
232 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
233 CV_Assert( !boostTypeStr.empty() );
234 cout << "boostType: " << boostTypeStr << endl;
235 cout << "minHitRate: " << minHitRate << endl;
236 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
237 cout << "weightTrimRate: " << weight_trim_rate << endl;
238 cout << "maxDepth: " << max_depth << endl;
239 cout << "maxWeakCount: " << weak_count << endl;
242 bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
246 if( !prmName.compare( "-bt" ) )
248 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
249 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
250 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
251 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
252 if (boost_type == -1)
255 else if( !prmName.compare( "-minHitRate" ) )
257 minHitRate = (float) atof( val.c_str() );
259 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
261 maxFalseAlarm = (float) atof( val.c_str() );
263 else if( !prmName.compare( "-weightTrimRate" ) )
265 weight_trim_rate = (float) atof( val.c_str() );
267 else if( !prmName.compare( "-maxDepth" ) )
269 max_depth = atoi( val.c_str() );
271 else if( !prmName.compare( "-maxWeakCount" ) )
273 weak_count = atoi( val.c_str() );
281 CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
283 CvDTreeNode* root = 0;
284 CvMat* isubsample_idx = 0;
285 CvMat* subsample_co = 0;
287 bool isMakeRootCopy = true;
290 CV_Error( CV_StsError, "No training data has been set" );
294 CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
296 if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
298 const int* sidx = isubsample_idx->data.i;
299 for( int i = 0; i < sample_count; i++ )
303 isMakeRootCopy = false;
309 isMakeRootCopy = false;
314 // make a copy of the root node
317 root = new_node( 0, 1, 0, 0 );
320 root->num_valid = temp.num_valid;
321 if( root->num_valid )
323 for( i = 0; i < var_count; i++ )
324 root->num_valid[i] = data_root->num_valid[i];
326 root->cv_Tn = temp.cv_Tn;
327 root->cv_node_risk = temp.cv_node_risk;
328 root->cv_node_error = temp.cv_node_error;
332 int* sidx = isubsample_idx->data.i;
333 // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
334 int* co, cur_ofs = 0;
335 int workVarCount = get_work_var_count();
336 int count = isubsample_idx->rows + isubsample_idx->cols - 1;
338 root = new_node( 0, count, 1, 0 );
340 CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
341 cvZero( subsample_co );
342 co = subsample_co->data.i;
343 for( int i = 0; i < count; i++ )
345 for( int i = 0; i < sample_count; i++ )
356 cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
357 // subsample ordered variables
358 for( int vi = 0; vi < numPrecalcIdx; vi++ )
360 int ci = get_var_type(vi);
363 int *src_idx_buf = (int*)(uchar*)inn_buf;
364 float *src_val_buf = (float*)(src_idx_buf + sample_count);
365 int* sample_indices_buf = (int*)(src_val_buf + sample_count);
366 const int* src_idx = 0;
367 const float* src_val = 0;
368 get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
370 int j = 0, idx, count_i;
371 int num_valid = data_root->get_num_valid(vi);
372 CV_Assert( num_valid == sample_count );
376 unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
377 vi*sample_count + data_root->offset);
378 for( int i = 0; i < num_valid; i++ )
383 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
384 udst_idx[j] = (unsigned short)cur_ofs;
389 int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
390 vi*sample_count + root->offset;
391 for( int i = 0; i < num_valid; i++ )
396 for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
397 idst_idx[j] = cur_ofs;
402 // subsample cv_lables
403 const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
406 unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
407 (workVarCount-1)*sample_count + root->offset);
408 for( int i = 0; i < count; i++ )
409 udst[i] = (unsigned short)src_lbls[sidx[i]];
413 int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
414 (workVarCount-1)*sample_count + root->offset;
415 for( int i = 0; i < count; i++ )
416 idst[i] = src_lbls[sidx[i]];
419 // subsample sample_indices
420 const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
423 unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
424 workVarCount*sample_count + root->offset);
425 for( int i = 0; i < count; i++ )
426 sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
430 int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
431 workVarCount*sample_count + root->offset;
432 for( int i = 0; i < count; i++ )
433 sample_idx_dst[i] = sample_idx_src[sidx[i]];
436 for( int vi = 0; vi < var_count; vi++ )
437 root->set_num_valid(vi, count);
440 cvReleaseMat( &isubsample_idx );
441 cvReleaseMat( &subsample_co );
446 //---------------------------- CascadeBoostTrainData -----------------------------
448 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
449 const CvDTreeParams& _params )
451 is_classifier = true;
452 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
454 featureEvaluator = _featureEvaluator;
456 set_params( _params );
457 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
458 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
459 if ( featureEvaluator->getMaxCatCount() > 0 )
462 cat_var_count = var_count;
464 for( int vi = 0; vi < var_count; vi++ )
466 var_type->data.i[vi] = vi;
472 ord_var_count = var_count;
473 for( int vi = 1; vi <= var_count; vi++ )
475 var_type->data.i[vi-1] = -vi;
478 var_type->data.i[var_count] = cat_var_count;
479 var_type->data.i[var_count+1] = cat_var_count+1;
481 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
482 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
483 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
484 tree_storage = cvCreateMemStorage( treeBlockSize );
485 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
486 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
489 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
491 int _precalcValBufSize, int _precalcIdxBufSize,
492 const CvDTreeParams& _params )
494 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
497 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
499 int _precalcValBufSize, int _precalcIdxBufSize,
500 const CvDTreeParams& _params )
503 unsigned short* udst = 0;
505 uint64 effective_buf_size = 0;
506 int effective_buf_height = 0, effective_buf_width = 0;
513 is_classifier = true;
517 set_params( _params );
519 CV_Assert( _featureEvaluator );
520 featureEvaluator = _featureEvaluator;
522 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
523 _resp = featureEvaluator->getCls();
525 // TODO: check responses: elements must be 0 or 1
527 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
528 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
530 var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
531 sample_count = _numSamples;
534 if (sample_count < 65536)
537 numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
538 numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
539 ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
541 assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
543 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
544 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
546 if ( featureEvaluator->getMaxCatCount() > 0 )
549 cat_var_count = var_count;
551 for( int vi = 0; vi < var_count; vi++ )
553 var_type->data.i[vi] = vi;
559 ord_var_count = var_count;
560 for( int vi = 1; vi <= var_count; vi++ )
562 var_type->data.i[vi-1] = -vi;
565 var_type->data.i[var_count] = cat_var_count;
566 var_type->data.i[var_count+1] = cat_var_count+1;
567 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
570 buf_size = -1; // the member buf_size is obsolete
572 effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
573 effective_buf_width = sample_count;
574 effective_buf_height = work_var_count+1;
576 if (effective_buf_width >= effective_buf_height)
577 effective_buf_height *= buf_count;
579 effective_buf_width *= buf_count;
581 if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
583 CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
587 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
589 buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
591 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
593 // precalculate valCache and set indices in buf
596 // now calculate the maximum size of split,
597 // create memory storage that will keep nodes and splits of the decision tree
598 // allocate root node and the buffer for the whole training data
599 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
600 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
601 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
602 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
603 tree_storage = cvCreateMemStorage( treeBlockSize );
604 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
606 int nvSize = var_count*sizeof(int);
607 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
608 int tempBlockSize = nvSize;
609 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
610 temp_storage = cvCreateMemStorage( tempBlockSize );
611 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
613 data_root = new_node( 0, sample_count, 0, 0 );
617 udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
619 idst = buf->data.i + work_var_count*sample_count;
621 for (int si = 0; si < sample_count; si++)
624 udst[si] = (unsigned short)si;
628 for( int vi = 0; vi < var_count; vi++ )
629 data_root->set_num_valid(vi, sample_count);
630 for( int vi = 0; vi < cat_var_count; vi++ )
631 cat_count->data.i[vi] = max_c_count;
633 cat_count->data.i[cat_var_count] = 2;
635 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
636 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
637 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
639 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
640 cvSet(priors, cvScalar(1));
641 priors_mult = cvCloneMat( priors );
642 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
643 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
644 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
647 void CvCascadeBoostTrainData::free_train_data()
649 CvDTreeTrainData::free_train_data();
653 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
655 int nodeSampleCount = n->sample_count;
656 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
658 int* sampleIndicesBuf = labelsBuf; //
659 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
660 for( int si = 0; si < nodeSampleCount; si++ )
662 int sidx = sampleIndices[si];
663 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
668 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
670 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
673 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
675 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
678 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
679 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
681 int nodeSampleCount = n->sample_count;
682 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
684 if ( vi < numPrecalcIdx )
687 *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + vi*sample_count + n->offset;
690 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
691 vi*sample_count + n->offset );
692 for( int i = 0; i < nodeSampleCount; i++ )
693 sortedIndicesBuf[i] = shortIndices[i];
695 *sortedIndices = sortedIndicesBuf;
698 if( vi < numPrecalcVal )
700 for( int i = 0; i < nodeSampleCount; i++ )
702 int idx = (*sortedIndices)[i];
703 idx = sampleIndices[idx];
704 ordValuesBuf[i] = valCache.at<float>( vi, idx);
709 for( int i = 0; i < nodeSampleCount; i++ )
711 int idx = (*sortedIndices)[i];
712 idx = sampleIndices[idx];
713 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
717 else // vi >= numPrecalcIdx
719 cv::AutoBuffer<float> abuf(nodeSampleCount);
720 float* sampleValues = &abuf[0];
722 if ( vi < numPrecalcVal )
724 for( int i = 0; i < nodeSampleCount; i++ )
726 sortedIndicesBuf[i] = i;
727 sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
732 for( int i = 0; i < nodeSampleCount; i++ )
734 sortedIndicesBuf[i] = i;
735 sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
738 icvSortIntAux( sortedIndicesBuf, nodeSampleCount, &sampleValues[0] );
739 for( int i = 0; i < nodeSampleCount; i++ )
740 ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
741 *sortedIndices = sortedIndicesBuf;
744 *ordValues = ordValuesBuf;
747 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
749 int nodeSampleCount = n->sample_count;
750 int* sampleIndicesBuf = catValuesBuf; //
751 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
753 if ( vi < numPrecalcVal )
755 for( int i = 0; i < nodeSampleCount; i++ )
756 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
760 if( vi >= numPrecalcVal && vi < var_count )
762 for( int i = 0; i < nodeSampleCount; i++ )
763 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
767 get_cv_labels( n, catValuesBuf );
774 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
776 if ( vi < numPrecalcVal && !valCache.empty() )
777 return valCache.at<float>( vi, si );
778 return (*featureEvaluator)( vi, si );
782 struct FeatureIdxOnlyPrecalc : ParallelLoopBody
784 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
786 featureEvaluator = _featureEvaluator;
787 sample_count = _sample_count;
788 udst = (unsigned short*)_buf->data.s;
790 is_buf_16u = _is_buf_16u;
792 void operator()( const Range& range ) const
794 cv::AutoBuffer<float> valCache(sample_count);
795 float* valCachePtr = (float*)valCache;
796 for ( int fi = range.start; fi < range.end; fi++)
798 for( int si = 0; si < sample_count; si++ )
800 valCachePtr[si] = (*featureEvaluator)( fi, si );
802 *(udst + fi*sample_count + si) = (unsigned short)si;
804 *(idst + fi*sample_count + si) = si;
807 icvSortUShAux( udst + fi*sample_count, sample_count, valCachePtr );
809 icvSortIntAux( idst + fi*sample_count, sample_count, valCachePtr );
812 const CvFeatureEvaluator* featureEvaluator;
815 unsigned short* udst;
819 struct FeatureValAndIdxPrecalc : ParallelLoopBody
821 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
823 featureEvaluator = _featureEvaluator;
824 valCache = _valCache;
825 sample_count = _sample_count;
826 udst = (unsigned short*)_buf->data.s;
828 is_buf_16u = _is_buf_16u;
830 void operator()( const Range& range ) const
832 for ( int fi = range.start; fi < range.end; fi++)
834 for( int si = 0; si < sample_count; si++ )
836 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
838 *(udst + fi*sample_count + si) = (unsigned short)si;
840 *(idst + fi*sample_count + si) = si;
843 icvSortUShAux( udst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
845 icvSortIntAux( idst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
848 const CvFeatureEvaluator* featureEvaluator;
852 unsigned short* udst;
856 struct FeatureValOnlyPrecalc : ParallelLoopBody
858 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
860 featureEvaluator = _featureEvaluator;
861 valCache = _valCache;
862 sample_count = _sample_count;
864 void operator()( const Range& range ) const
866 for ( int fi = range.start; fi < range.end; fi++)
867 for( int si = 0; si < sample_count; si++ )
868 valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
870 const CvFeatureEvaluator* featureEvaluator;
875 void CvCascadeBoostTrainData::precalculate()
877 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
879 double proctime = -TIME( 0 );
880 parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
881 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
882 parallel_for_( Range(0, minNum),
883 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
884 parallel_for_( Range(minNum, numPrecalcVal),
885 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
886 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
889 //-------------------------------- CascadeBoostTree ----------------------------------------
891 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
893 CvDTreeNode* node = root;
895 CV_Error( CV_StsError, "The tree has not been trained yet" );
897 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
901 CvDTreeSplit* split = node->split;
902 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
903 node = val <= split->ord.c ? node->left : node->right;
910 CvDTreeSplit* split = node->split;
911 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
912 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
918 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
920 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
921 int subsetN = (maxCatCount + 31)/32;
922 queue<CvDTreeNode*> internalNodesQueue;
923 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
924 Ptr<float> leafVals = new float[size];
926 int internalNodeIdx = 1;
927 CvDTreeNode* tempNode;
929 CV_DbgAssert( root );
930 internalNodesQueue.push( root );
933 fs << CC_INTERNAL_NODES << "[:";
934 while (!internalNodesQueue.empty())
936 tempNode = internalNodesQueue.front();
937 CV_Assert( tempNode->left );
938 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
940 leafVals[-leafValIdx] = (float)tempNode->left->value;
945 internalNodesQueue.push( tempNode->left );
946 fs << internalNodeIdx++;
948 CV_Assert( tempNode->right );
949 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
951 leafVals[-leafValIdx] = (float)tempNode->right->value;
956 internalNodesQueue.push( tempNode->right );
957 fs << internalNodeIdx++;
959 int fidx = tempNode->split->var_idx;
960 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
963 fs << tempNode->split->ord.c;
965 for( int i = 0; i < subsetN; i++ )
966 fs << tempNode->split->subset[i];
967 internalNodesQueue.pop();
969 fs << "]"; // CC_INTERNAL_NODES
971 fs << CC_LEAF_VALUES << "[:";
972 for (int ni = 0; ni < -leafValIdx; ni++)
974 fs << "]"; // CC_LEAF_VALUES
978 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
979 CvDTreeTrainData* _data )
981 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
982 int subsetN = (maxCatCount + 31)/32;
983 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
985 queue<CvDTreeNode*> internalNodesQueue;
986 FileNodeIterator internalNodesIt, leafValsuesIt;
987 CvDTreeNode* prntNode, *cldNode;
991 ensemble = _ensemble;
995 FileNode rnode = node[CC_INTERNAL_NODES];
996 internalNodesIt = rnode.end();
997 leafValsuesIt = node[CC_LEAF_VALUES].end();
998 internalNodesIt--; leafValsuesIt--;
999 for( size_t i = 0; i < rnode.size()/step; i++ )
1001 prntNode = data->new_node( 0, 0, 0, 0 );
1002 if ( maxCatCount > 0 )
1004 prntNode->split = data->new_split_cat( 0, 0 );
1005 for( int j = subsetN-1; j>=0; j--)
1007 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
1013 *internalNodesIt >> split_value; internalNodesIt--;
1014 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
1016 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
1018 *internalNodesIt >> ridx; internalNodesIt--;
1019 *internalNodesIt >> lidx;internalNodesIt--;
1022 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
1023 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1024 cldNode->parent = prntNode;
1028 prntNode->right = internalNodesQueue.front();
1029 prntNode->right->parent = prntNode;
1030 internalNodesQueue.pop();
1035 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
1036 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
1037 cldNode->parent = prntNode;
1041 prntNode->left = internalNodesQueue.front();
1042 prntNode->left->parent = prntNode;
1043 internalNodesQueue.pop();
1046 internalNodesQueue.push( prntNode );
1049 root = internalNodesQueue.front();
1050 internalNodesQueue.pop();
1053 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
1055 int n = node->sample_count, nl, nr, scount = data->sample_count;
1056 char* dir = (char*)data->direction->data.ptr;
1057 CvDTreeNode *left = 0, *right = 0;
1058 int* newIdx = data->split_buf->data.i;
1059 int newBufIdx = data->get_child_buf_idx( node );
1060 int workVarCount = data->get_work_var_count();
1061 CvMat* buf = data->buf;
1062 size_t length_buf_row = data->get_length_subbuf();
1063 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
1064 int* tempBuf = (int*)(uchar*)inn_buf;
1065 bool splitInputData;
1067 complete_node_dir(node);
1069 for( int i = nl = nr = 0; i < n; i++ )
1072 // initialize new indices for splitting ordered variables
1073 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
1078 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
1079 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
1081 splitInputData = node->depth + 1 < data->params.max_depth &&
1082 (node->left->sample_count > data->params.min_sample_count ||
1083 node->right->sample_count > data->params.min_sample_count);
1085 // split ordered variables, keep both halves sorted.
1086 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
1088 int ci = data->get_var_type(vi);
1089 if( ci >= 0 || !splitInputData )
1092 int n1 = node->get_num_valid(vi);
1093 float *src_val_buf = (float*)(tempBuf + n);
1094 int *src_sorted_idx_buf = (int*)(src_val_buf + n);
1095 int *src_sample_idx_buf = src_sorted_idx_buf + n;
1096 const int* src_sorted_idx = 0;
1097 const float* src_val = 0;
1098 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
1100 for(int i = 0; i < n; i++)
1101 tempBuf[i] = src_sorted_idx[i];
1103 if (data->is_buf_16u)
1105 ushort *ldst, *rdst;
1106 ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
1107 vi*scount + left->offset);
1108 rdst = (ushort*)(ldst + nl);
1111 for( int i = 0; i < n1; i++ )
1113 int idx = tempBuf[i];
1118 *rdst = (ushort)idx;
1123 *ldst = (ushort)idx;
1127 CV_Assert( n1 == n );
1132 ldst = buf->data.i + left->buf_idx*length_buf_row +
1133 vi*scount + left->offset;
1134 rdst = buf->data.i + right->buf_idx*length_buf_row +
1135 vi*scount + right->offset;
1138 for( int i = 0; i < n1; i++ )
1140 int idx = tempBuf[i];
1154 CV_Assert( n1 == n );
1158 // split cv_labels using newIdx relocation table
1159 int *src_lbls_buf = tempBuf + n;
1160 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
1162 for(int i = 0; i < n; i++)
1163 tempBuf[i] = src_lbls[i];
1165 if (data->is_buf_16u)
1167 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
1168 (workVarCount-1)*scount + left->offset);
1169 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
1170 (workVarCount-1)*scount + right->offset);
1172 for( int i = 0; i < n; i++ )
1174 int idx = tempBuf[i];
1177 *rdst = (unsigned short)idx;
1182 *ldst = (unsigned short)idx;
1190 int *ldst = buf->data.i + left->buf_idx*length_buf_row +
1191 (workVarCount-1)*scount + left->offset;
1192 int *rdst = buf->data.i + right->buf_idx*length_buf_row +
1193 (workVarCount-1)*scount + right->offset;
1195 for( int i = 0; i < n; i++ )
1197 int idx = tempBuf[i];
1211 // split sample indices
1212 int *sampleIdx_src_buf = tempBuf + n;
1213 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
1215 for(int i = 0; i < n; i++)
1216 tempBuf[i] = sampleIdx_src[i];
1218 if (data->is_buf_16u)
1220 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
1221 workVarCount*scount + left->offset);
1222 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
1223 workVarCount*scount + right->offset);
1224 for (int i = 0; i < n; i++)
1226 unsigned short idx = (unsigned short)tempBuf[i];
1241 int* ldst = buf->data.i + left->buf_idx*length_buf_row +
1242 workVarCount*scount + left->offset;
1243 int* rdst = buf->data.i + right->buf_idx*length_buf_row +
1244 workVarCount*scount + right->offset;
1245 for (int i = 0; i < n; i++)
1247 int idx = tempBuf[i];
1261 for( int vi = 0; vi < data->var_count; vi++ )
1263 left->set_num_valid(vi, (int)(nl));
1264 right->set_num_valid(vi, (int)(nr));
1267 // deallocate the parent node data that is not needed anymore
1268 data->free_node_data(node);
1271 static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
1273 if ( node && node->split )
1275 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
1276 auxMarkFeaturesInMap( node->left, featureMap );
1277 auxMarkFeaturesInMap( node->right, featureMap );
1281 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
1283 auxMarkFeaturesInMap( root, featureMap );
1286 //----------------------------------- CascadeBoost --------------------------------------
1288 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
1290 int _precalcValBufSize, int _precalcIdxBufSize,
1291 const CvCascadeBoostParams& _params )
1293 bool isTrained = false;
1296 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
1297 _precalcValBufSize, _precalcIdxBufSize, _params );
1298 CvMemStorage *storage = cvCreateMemStorage();
1299 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1302 set_params( _params );
1303 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
1304 data->do_responses_copy();
1306 update_weights( 0 );
1308 cout << "+----+---------+---------+" << endl;
1309 cout << "| N | HR | FA |" << endl;
1310 cout << "+----+---------+---------+" << endl;
1314 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
1315 if( !tree->train( data, subsample_mask, this ) )
1320 cvSeqPush( weak, &tree );
1321 update_weights( tree );
1323 if( cvCountNonZero(subsample_mask) == 0 )
1326 while( !isErrDesired() && (weak->total < params.weak_count) );
1330 data->is_classifier = true;
1331 data->free_train_data();
1340 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1345 cvStartReadSeq( weak, &reader );
1346 cvSetSeqReaderPos( &reader, 0 );
1347 for( int i = 0; i < weak->total; i++ )
1350 CV_READ_SEQ_ELEM( wtree, reader );
1351 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1354 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1358 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1360 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1361 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1362 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1363 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1364 CvBoost::set_params( _params ));
1367 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1369 int n = data->sample_count;
1374 const int* sampleIdx = 0;
1375 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1376 ( !tree ? n*sizeof(int) : 0 );
1377 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1378 uchar* cur_inn_buf_pos = (uchar*)inn_buf;
1379 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1381 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1382 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1383 fdata = data->responses_copy->data.fl;
1384 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1385 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1387 CvMat* buf = data->buf;
1388 size_t length_buf_row = data->get_length_subbuf();
1389 if( !tree ) // before training the first tree, initialize weights and other parameters
1391 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1392 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1393 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1394 // so we need to convert class labels to floating-point values
1396 double p[2] = { 1, 1 };
1398 cvReleaseMat( &orig_response );
1399 cvReleaseMat( &sum_response );
1400 cvReleaseMat( &weak_eval );
1401 cvReleaseMat( &subsample_mask );
1402 cvReleaseMat( &weights );
1404 orig_response = cvCreateMat( 1, n, CV_32S );
1405 weak_eval = cvCreateMat( 1, n, CV_64F );
1406 subsample_mask = cvCreateMat( 1, n, CV_8U );
1407 weights = cvCreateMat( 1, n, CV_64F );
1408 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1410 if (data->is_buf_16u)
1412 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
1413 data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1414 for( int i = 0; i < n; i++ )
1416 // save original categorical responses {0,1}, convert them to {-1,1}
1417 orig_response->data.i[i] = classLabels[i]*2 - 1;
1418 // make all the samples active at start.
1419 // later, in trim_weights() deactivate/reactive again some, if need
1420 subsample_mask->data.ptr[i] = (uchar)1;
1421 // make all the initial weights the same.
1422 weights->data.db[i] = w0*p[classLabels[i]];
1423 // set the labels to find (from within weak tree learning proc)
1424 // the particular sample weight, and where to store the response.
1425 labels[i] = (unsigned short)i;
1430 int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
1431 data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1433 for( int i = 0; i < n; i++ )
1435 // save original categorical responses {0,1}, convert them to {-1,1}
1436 orig_response->data.i[i] = classLabels[i]*2 - 1;
1437 subsample_mask->data.ptr[i] = (uchar)1;
1438 weights->data.db[i] = w0*p[classLabels[i]];
1443 if( params.boost_type == LOGIT )
1445 sum_response = cvCreateMat( 1, n, CV_64F );
1447 for( int i = 0; i < n; i++ )
1449 sum_response->data.db[i] = 0;
1450 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1453 // in case of logitboost each weak tree is a regression tree.
1454 // the target function values are recalculated for each of the trees
1455 data->is_classifier = false;
1457 else if( params.boost_type == GENTLE )
1459 for( int i = 0; i < n; i++ )
1460 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1462 data->is_classifier = false;
1467 // at this moment, for all the samples that participated in the training of the most
1468 // recent weak classifier we know the responses. For other samples we need to compute them
1469 if( have_subsample )
1471 // invert the subsample mask
1472 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1474 // run tree through all the non-processed samples
1475 for( int i = 0; i < n; i++ )
1476 if( subsample_mask->data.ptr[i] )
1478 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1482 // now update weights and other parameters for each type of boosting
1483 if( params.boost_type == DISCRETE )
1485 // Discrete AdaBoost:
1486 // weak_eval[i] (=f(x_i)) is in {-1,1}
1487 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1488 // C = log((1-err)/err)
1489 // w_i *= exp(C*(f(x_i) != y_i))
1492 double scale[] = { 1., 0. };
1494 for( int i = 0; i < n; i++ )
1496 double w = weights->data.db[i];
1498 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1503 C = err = -logRatio( err );
1504 scale[1] = exp(err);
1507 for( int i = 0; i < n; i++ )
1509 double w = weights->data.db[i]*
1510 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1512 weights->data.db[i] = w;
1517 else if( params.boost_type == REAL )
1520 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1521 // w_i *= exp(-y_i*f(x_i))
1523 for( int i = 0; i < n; i++ )
1524 weak_eval->data.db[i] *= -orig_response->data.i[i];
1526 cvExp( weak_eval, weak_eval );
1528 for( int i = 0; i < n; i++ )
1530 double w = weights->data.db[i]*weak_eval->data.db[i];
1532 weights->data.db[i] = w;
1535 else if( params.boost_type == LOGIT )
1538 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1539 // sum_response = F(x_i).
1540 // F(x_i) += 0.5*f(x_i)
1541 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1542 // reuse weak_eval: weak_eval[i] <- p(x_i)
1543 // w_i = p(x_i)*1(1 - p(x_i))
1544 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1545 // store z_i to the data->data_root as the new target responses
1547 const double lbWeightThresh = FLT_EPSILON;
1548 const double lbZMax = 10.;
1550 for( int i = 0; i < n; i++ )
1552 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1553 sum_response->data.db[i] = s;
1554 weak_eval->data.db[i] = -2*s;
1557 cvExp( weak_eval, weak_eval );
1559 for( int i = 0; i < n; i++ )
1561 double p = 1./(1. + weak_eval->data.db[i]);
1562 double w = p*(1 - p), z;
1563 w = MAX( w, lbWeightThresh );
1564 weights->data.db[i] = w;
1566 if( orig_response->data.i[i] > 0 )
1569 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1574 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1581 // weak_eval[i] = f(x_i) in [-1,1]
1582 // w_i *= exp(-y_i*f(x_i))
1583 assert( params.boost_type == GENTLE );
1585 for( int i = 0; i < n; i++ )
1586 weak_eval->data.db[i] *= -orig_response->data.i[i];
1588 cvExp( weak_eval, weak_eval );
1590 for( int i = 0; i < n; i++ )
1592 double w = weights->data.db[i] * weak_eval->data.db[i];
1593 weights->data.db[i] = w;
1599 // renormalize weights
1600 if( sumW > FLT_EPSILON )
1603 for( int i = 0; i < n; ++i )
1604 weights->data.db[i] *= sumW;
1608 bool CvCascadeBoost::isErrDesired()
1610 int sCount = data->sample_count,
1611 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1612 vector<float> eval(sCount);
1614 for( int i = 0; i < sCount; i++ )
1615 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1616 eval[numPos++] = predict( i, true );
1617 icvSortFlt( &eval[0], numPos, 0 );
1618 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1619 threshold = eval[ thresholdIdx ];
1620 numPosTrue = numPos - thresholdIdx;
1621 for( int i = thresholdIdx - 1; i >= 0; i--)
1622 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1624 float hitRate = ((float) numPosTrue) / ((float) numPos);
1626 for( int i = 0; i < sCount; i++ )
1628 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1635 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1637 cout << "|"; cout.width(4); cout << right << weak->total;
1638 cout << "|"; cout.width(9); cout << right << hitRate;
1639 cout << "|"; cout.width(9); cout << right << falseAlarm;
1640 cout << "|" << endl;
1641 cout << "+----+---------+---------+" << endl;
1643 return falseAlarm <= maxFalseAlarm;
1646 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1649 CvCascadeBoostTree* weakTree;
1650 fs << CC_WEAK_COUNT << weak->total;
1651 fs << CC_STAGE_THRESHOLD << threshold;
1652 fs << CC_WEAK_CLASSIFIERS << "[";
1653 for( int wi = 0; wi < weak->total; wi++)
1655 /*sprintf( cmnt, "tree %i", wi );
1656 cvWriteComment( fs, cmnt, 0 );*/
1657 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1658 weakTree->write( fs, featureMap );
1663 bool CvCascadeBoost::read( const FileNode &node,
1664 const CvFeatureEvaluator* _featureEvaluator,
1665 const CvCascadeBoostParams& _params )
1667 CvMemStorage* storage;
1669 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1670 set_params( _params );
1672 node[CC_STAGE_THRESHOLD] >> threshold;
1673 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1675 storage = cvCreateMemStorage();
1676 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1677 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1679 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1680 tree->read( *it, this, data );
1681 cvSeqPush( weak, &tree );
1686 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1688 for( int wi = 0; wi < weak->total; wi++ )
1690 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1691 weakTree->markFeaturesInMap( featureMap );