static const int MinBlockSize = 1 << 16;
static const int BlockSizeDelta = 1 << 10;
+// TODO remove this code duplication with ml/precomp.hpp
+
+static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
+{
+ return *(const int*)a - *(const int*)b;
+}
+
+static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
+{
+ CvMat* idx = 0;
+
+ CV_FUNCNAME( "cvPreprocessIndexArray" );
+
+ __BEGIN__;
+
+ int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
+ uchar* srcb = 0;
+ int* srci = 0;
+ int* dsti;
+
+ if( !CV_IS_MAT(idx_arr) )
+ CV_ERROR( CV_StsBadArg, "Invalid index array" );
+
+ if( idx_arr->rows != 1 && idx_arr->cols != 1 )
+ CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
+
+ idx_total = idx_arr->rows + idx_arr->cols - 1;
+ srcb = idx_arr->data.ptr;
+ srci = idx_arr->data.i;
+
+ type = CV_MAT_TYPE(idx_arr->type);
+ step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
+
+ switch( type )
+ {
+ case CV_8UC1:
+ case CV_8SC1:
+ // idx_arr is array of 1's and 0's -
+ // i.e. it is a mask of the selected components
+ if( idx_total != data_arr_size )
+ CV_ERROR( CV_StsUnmatchedSizes,
+ "Component mask should contain as many elements as the total number of input variables" );
+
+ for( i = 0; i < idx_total; i++ )
+ idx_selected += srcb[i*step] != 0;
+
+ if( idx_selected == 0 )
+ CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
+
+ break;
+ case CV_32SC1:
+ // idx_arr is array of integer indices of selected components
+ if( idx_total > data_arr_size )
+ CV_ERROR( CV_StsOutOfRange,
+ "index array may not contain more elements than the total number of input variables" );
+ idx_selected = idx_total;
+ // check if sorted already
+ for( i = 0; i < idx_total; i++ )
+ {
+ int val = srci[i*step];
+ if( val >= prev )
+ {
+ is_sorted = 0;
+ break;
+ }
+ prev = val;
+ }
+ break;
+ default:
+ CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
+ "(it should be 8uC1, 8sC1 or 32sC1)" );
+ }
+
+ CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
+ dsti = idx->data.i;
+
+ if( type < CV_32SC1 )
+ {
+ for( i = 0; i < idx_total; i++ )
+ if( srcb[i*step] )
+ *dsti++ = i;
+ }
+ else
+ {
+ for( i = 0; i < idx_total; i++ )
+ dsti[i] = srci[i*step];
+
+ if( !is_sorted )
+ qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
+
+ if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
+ CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
+
+ if( check_for_duplicates )
+ {
+ for( i = 1; i < idx_total; i++ )
+ if( dsti[i] <= dsti[i-1] )
+ CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
+ }
+ }
+
+ __END__;
+
+ if( cvGetErrStatus() < 0 )
+ cvReleaseMat( &idx );
+
+ return idx;
+}
+
//----------------------------- CascadeBoostParams -------------------------------------------------
CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
return res;
}
+CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
+{
+ CvDTreeNode* root = 0;
+ CvMat* isubsample_idx = 0;
+ CvMat* subsample_co = 0;
+
+ bool isMakeRootCopy = true;
+
+ if( !data_root )
+ CV_Error( CV_StsError, "No training data has been set" );
+
+ if( _subsample_idx )
+ {
+ CV_Assert( isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count ) );
+
+ if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
+ {
+ const int* sidx = isubsample_idx->data.i;
+ for( int i = 0; i < sample_count; i++ )
+ {
+ if( sidx[i] != i )
+ {
+ isMakeRootCopy = false;
+ break;
+ }
+ }
+ }
+ else
+ isMakeRootCopy = false;
+ }
+
+ if( isMakeRootCopy )
+ {
+ // make a copy of the root node
+ CvDTreeNode temp;
+ int i;
+ root = new_node( 0, 1, 0, 0 );
+ temp = *root;
+ *root = *data_root;
+ root->num_valid = temp.num_valid;
+ if( root->num_valid )
+ {
+ for( i = 0; i < var_count; i++ )
+ root->num_valid[i] = data_root->num_valid[i];
+ }
+ root->cv_Tn = temp.cv_Tn;
+ root->cv_node_risk = temp.cv_node_risk;
+ root->cv_node_error = temp.cv_node_error;
+ }
+ else
+ {
+ int* sidx = isubsample_idx->data.i;
+ // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
+ int* co, cur_ofs = 0;
+ int workVarCount = get_work_var_count();
+ int count = isubsample_idx->rows + isubsample_idx->cols - 1;
+
+ root = new_node( 0, count, 1, 0 );
+
+ CV_Assert( subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 ));
+ cvZero( subsample_co );
+ co = subsample_co->data.i;
+ for( int i = 0; i < count; i++ )
+ co[sidx[i]*2]++;
+ for( int i = 0; i < sample_count; i++ )
+ {
+ if( co[i*2] )
+ {
+ co[i*2+1] = cur_ofs;
+ cur_ofs += co[i*2];
+ }
+ else
+ co[i*2+1] = -1;
+ }
+
+ cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
+ // subsample ordered variables
+ for( int vi = 0; vi < numPrecalcIdx; vi++ )
+ {
+ int ci = get_var_type(vi);
+ CV_Assert( ci < 0 );
+
+ int *src_idx_buf = (int*)(uchar*)inn_buf;
+ float *src_val_buf = (float*)(src_idx_buf + sample_count);
+ int* sample_indices_buf = (int*)(src_val_buf + sample_count);
+ const int* src_idx = 0;
+ const float* src_val = 0;
+ get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
+
+ int j = 0, idx, count_i;
+ int num_valid = data_root->get_num_valid(vi);
+ CV_Assert( num_valid == sample_count );
+
+ if (is_buf_16u)
+ {
+ unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ vi*sample_count + data_root->offset);
+ for( int i = 0; i < num_valid; i++ )
+ {
+ idx = src_idx[i];
+ count_i = co[idx*2];
+ if( count_i )
+ for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
+ udst_idx[j] = (unsigned short)cur_ofs;
+ }
+ }
+ else
+ {
+ int* idst_idx = buf->data.i + root->buf_idx*buf->cols +
+ vi*sample_count + root->offset;
+ for( int i = 0; i < num_valid; i++ )
+ {
+ idx = src_idx[i];
+ count_i = co[idx*2];
+ if( count_i )
+ for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
+ idst_idx[j] = cur_ofs;
+ }
+ }
+ }
+
+ // subsample cv_lables
+ const int* src_lbls = get_cv_labels(data_root, (int*)(uchar*)inn_buf);
+ if (is_buf_16u)
+ {
+ unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ (workVarCount-1)*sample_count + root->offset);
+ for( int i = 0; i < count; i++ )
+ udst[i] = (unsigned short)src_lbls[sidx[i]];
+ }
+ else
+ {
+ int* idst = buf->data.i + root->buf_idx*buf->cols +
+ (workVarCount-1)*sample_count + root->offset;
+ for( int i = 0; i < count; i++ )
+ idst[i] = src_lbls[sidx[i]];
+ }
+
+ // subsample sample_indices
+ const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
+ if (is_buf_16u)
+ {
+ unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
+ workVarCount*sample_count + root->offset);
+ for( int i = 0; i < count; i++ )
+ sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
+ }
+ else
+ {
+ int* sample_idx_dst = buf->data.i + root->buf_idx*buf->cols +
+ workVarCount*sample_count + root->offset;
+ for( int i = 0; i < count; i++ )
+ sample_idx_dst[i] = sample_idx_src[sidx[i]];
+ }
+
+ for( int vi = 0; vi < var_count; vi++ )
+ root->set_num_valid(vi, count);
+ }
+
+ cvReleaseMat( &isubsample_idx );
+ cvReleaseMat( &subsample_co );
+
+ return root;
+}
+
//---------------------------- CascadeBoostTrainData -----------------------------
CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
}
var_type->data.i[var_count] = cat_var_count;
var_type->data.i[var_count+1] = cat_var_count+1;
- work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1;
- buf_size = (work_var_count + 1) * sample_count;
+ work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
+ buf_size = (work_var_count + 1) * sample_count/*sample_indices*/;
buf_count = 2;
if ( is_buf_16u )
ldst++;
}
}
- assert( n1 == n);
-
- left->set_num_valid(vi, (int)(ldst - ldst0));
- right->set_num_valid(vi, (int)(rdst - rdst0));
+ CV_Assert( n1 == n );
}
else
{
ldst++;
}
}
-
- left->set_num_valid(vi, (int)(ldst - ldst0));
- right->set_num_valid(vi, (int)(rdst - rdst0));
- CV_Assert( n1 == n);
+ CV_Assert( n1 == n );
}
}
if (data->is_buf_16u)
{
- unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
+ unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
(workVarCount-1)*scount + left->offset);
- unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
- (workVarCount-1)*scount + right->offset);
-
+ unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
+ (workVarCount-1)*scount + right->offset);
+
for( int i = 0; i < n; i++ )
{
int idx = tempBuf[i];
}
else
{
- int *ldst = buf->data.i + left->buf_idx*buf->cols +
+ int *ldst = buf->data.i + left->buf_idx*buf->cols +
(workVarCount-1)*scount + left->offset;
- int *rdst = buf->data.i + right->buf_idx*buf->cols +
+ int *rdst = buf->data.i + right->buf_idx*buf->cols +
(workVarCount-1)*scount + right->offset;
-
+
for( int i = 0; i < n; i++ )
{
int idx = tempBuf[i];
ldst++;
}
}
- }
- for( int vi = 0; vi < data->var_count; vi++ )
- {
- left->set_num_valid(vi, (int)(nl));
- right->set_num_valid(vi, (int)(nr));
}
-
+
// split sample indices
int *sampleIdx_src_buf = tempBuf + n;
const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
}
}
+ for( int vi = 0; vi < data->var_count; vi++ )
+ {
+ left->set_num_valid(vi, (int)(nl));
+ right->set_num_valid(vi, (int)(nr));
+ }
+
// deallocate the parent node data that is not needed anymore
data->free_node_data(node);
}
CvCascadeBoostTree* tree = new CvCascadeBoostTree;
if( !tree->train( data, subsample_mask, this ) )
{
- // TODO: may be should finish the loop (!!!)
- assert(0);
delete tree;
- continue;
+ break;
}
cvSeqPush( weak, &tree );
update_weights( tree );