Warning fixes continued
[platform/upstream/opencv.git] / modules / ml / src / gbt.cpp
1 \r
2 #include "precomp.hpp"\r
3 #include <string>\r
4 #include <time.h>\r
5 \r
6 using namespace std;\r
7 \r
8 #define pCvSeq CvSeq*\r
9 #define pCvDTreeNode CvDTreeNode*\r
10 \r
11 #define CV_CMP_FLOAT(a,b) ((a) < (b))\r
12 static CV_IMPLEMENT_QSORT_EX( icvSortFloat, float, CV_CMP_FLOAT, float)\r
13 \r
14 //===========================================================================\r
15 static string ToString(int i)\r
16 {\r
17     stringstream tmp;\r
18     tmp << i;\r
19 \r
20     return tmp.str();\r
21 }\r
22 \r
23 \r
24 //===========================================================================\r
25 //----------------------------- CvGBTreesParams -----------------------------\r
26 //===========================================================================\r
27 \r
28 CvGBTreesParams::CvGBTreesParams()\r
29             : CvDTreeParams( 3, 10, 0, false, 10, 0, false, false, 0 )\r
30 {\r
31     weak_count = 200;\r
32     loss_function_type = CvGBTrees::SQUARED_LOSS;\r
33     subsample_portion = 0.8f;\r
34     shrinkage = 0.01f;\r
35 }\r
36 \r
37 //===========================================================================\r
38 \r
39 CvGBTreesParams::CvGBTreesParams( int _loss_function_type, int _weak_count,\r
40                          float _shrinkage, float _subsample_portion,\r
41                          int _max_depth, bool _use_surrogates )\r
42             : CvDTreeParams( 3, 10, 0, false, 10, 0, false, false, 0 )\r
43 {\r
44     loss_function_type = _loss_function_type;\r
45     weak_count = _weak_count;\r
46     shrinkage = _shrinkage;\r
47     subsample_portion = _subsample_portion;\r
48     max_depth = _max_depth;\r
49     use_surrogates = _use_surrogates;\r
50 }\r
51 \r
52 //===========================================================================\r
53 //------------------------------- CvGBTrees ---------------------------------\r
54 //===========================================================================\r
55 \r
56 CvGBTrees::CvGBTrees()\r
57 {\r
58     data = 0;\r
59     weak = 0;\r
60     default_model_name = "my_boost_tree";\r
61     orig_response = sum_response = sum_response_tmp = 0;\r
62     subsample_train = subsample_test = 0;\r
63     missing = sample_idx = 0;\r
64     class_labels = 0;\r
65     class_count = 1;\r
66     delta = 0.0f;\r
67 \r
68     clear();\r
69 }\r
70 \r
71 //===========================================================================\r
72 \r
73 int CvGBTrees::get_len(const CvMat* mat) const\r
74 {\r
75     return (mat->cols > mat->rows) ? mat->cols : mat->rows;\r
76 }\r
77 \r
78 //===========================================================================\r
79 \r
80 void CvGBTrees::clear()\r
81 {\r
82     if( weak )\r
83     {\r
84         CvSeqReader reader;\r
85         CvSlice slice = CV_WHOLE_SEQ;\r
86         CvDTree* tree;\r
87 \r
88         //data->shared = false;\r
89         for (int i=0; i<class_count; ++i)\r
90         {\r
91             int weak_count = cvSliceLength( slice, weak[i] );\r
92             if ((weak[i]) && (weak_count))\r
93             {\r
94                 cvStartReadSeq( weak[i], &reader );\r
95                 cvSetSeqReaderPos( &reader, slice.start_index );\r
96                 for (int j=0; j<weak_count; ++j)\r
97                 {\r
98                     CV_READ_SEQ_ELEM( tree, reader );\r
99                     //tree->clear();\r
100                     delete tree;\r
101                     tree = 0;\r
102                 }\r
103             }\r
104         }\r
105         for (int i=0; i<class_count; ++i)\r
106             if (weak[i]) cvReleaseMemStorage( &(weak[i]->storage) );\r
107         delete[] weak;\r
108     }\r
109     if (data)\r
110     {\r
111         data->shared = false;\r
112         delete data;\r
113     }\r
114     weak = 0;\r
115     data = 0;\r
116     delta = 0.0f;\r
117     cvReleaseMat( &orig_response );\r
118     cvReleaseMat( &sum_response );\r
119     cvReleaseMat( &sum_response_tmp );\r
120     cvReleaseMat( &subsample_train );\r
121     cvReleaseMat( &subsample_test );\r
122     cvReleaseMat( &sample_idx );\r
123     cvReleaseMat( &missing );\r
124     cvReleaseMat( &class_labels );\r
125 }\r
126 \r
127 //===========================================================================\r
128 \r
129 CvGBTrees::~CvGBTrees()\r
130 {\r
131     clear();\r
132 }\r
133 \r
134 //===========================================================================\r
135 \r
136 CvGBTrees::CvGBTrees( const CvMat* _train_data, int _tflag,\r
137                   const CvMat* _responses, const CvMat* _var_idx,\r
138                   const CvMat* _sample_idx, const CvMat* _var_type,\r
139                   const CvMat* _missing_mask, CvGBTreesParams _params )\r
140 {\r
141     weak = 0;\r
142     data = 0;\r
143     default_model_name = "my_boost_tree";\r
144     orig_response = sum_response = sum_response_tmp = 0;\r
145     subsample_train = subsample_test = 0;\r
146     missing = sample_idx = 0;\r
147     class_labels = 0;\r
148     class_count = 1;\r
149     delta = 0.0f;\r
150 \r
151     train( _train_data, _tflag, _responses, _var_idx, _sample_idx,\r
152            _var_type, _missing_mask, _params );\r
153 }\r
154 \r
155 //===========================================================================\r
156 \r
157 bool CvGBTrees::problem_type() const\r
158 {\r
159     switch (params.loss_function_type)\r
160     {\r
161     case DEVIANCE_LOSS: return false;\r
162     default: return true;\r
163     }\r
164 }\r
165 \r
166 //===========================================================================\r
167 \r
168 bool\r
169 CvGBTrees::train( CvMLData* _data, CvGBTreesParams _params, bool update )\r
170 {\r
171     bool result;\r
172     result = train ( _data->get_values(), CV_ROW_SAMPLE,\r
173             _data->get_responses(), _data->get_var_idx(),\r
174             _data->get_train_sample_idx(), _data->get_var_types(),\r
175             _data->get_missing(), _params, update);\r
176                                          //update is not supported\r
177     return result;\r
178 }\r
179 \r
180 //===========================================================================\r
181 \r
182 \r
183 bool\r
184 CvGBTrees::train( const CvMat* _train_data, int _tflag,\r
185               const CvMat* _responses, const CvMat* _var_idx,\r
186               const CvMat* _sample_idx, const CvMat* _var_type,\r
187               const CvMat* _missing_mask,\r
188               CvGBTreesParams _params, bool /*_update*/ ) //update is not supported\r
189 {\r
190     CvMemStorage* storage = 0;\r
191 \r
192     params = _params;\r
193     bool is_regression = problem_type();\r
194 \r
195     clear();\r
196     /*\r
197       n - count of samples\r
198       m - count of variables\r
199     */\r
200     int n = _train_data->rows;\r
201     int m = _train_data->cols;\r
202     if (_tflag != CV_ROW_SAMPLE)\r
203     {\r
204         int tmp;\r
205         CV_SWAP(n,m,tmp);\r
206     }\r
207 \r
208     CvMat* new_responses = cvCreateMat( n, 1, CV_32F);\r
209     cvZero(new_responses);\r
210 \r
211     data = new CvDTreeTrainData( _train_data, _tflag, new_responses, _var_idx,\r
212         _sample_idx, _var_type, _missing_mask, _params, true, true );\r
213     if (_missing_mask)\r
214     {\r
215         missing = cvCreateMat(_missing_mask->rows, _missing_mask->cols,\r
216                               _missing_mask->type);\r
217         cvCopy( _missing_mask, missing);\r
218     }\r
219 \r
220     orig_response = cvCreateMat( 1, n, CV_32F );\r
221     int step = (_responses->cols > _responses->rows) ? 1 : _responses->step / CV_ELEM_SIZE(_responses->type);\r
222     switch (CV_MAT_TYPE(_responses->type))\r
223     {\r
224         case CV_32FC1:\r
225         {\r
226             for (int i=0; i<n; ++i)\r
227                 orig_response->data.fl[i] = _responses->data.fl[i*step];\r
228         }; break;\r
229         case CV_32SC1:\r
230         {\r
231             for (int i=0; i<n; ++i)\r
232                 orig_response->data.fl[i] = (float) _responses->data.i[i*step];\r
233         }; break;\r
234         default:\r
235             CV_Error(CV_StsUnmatchedFormats, "Response should be a 32fC1 or 32sC1 vector.");\r
236     }\r
237 \r
238     if (!is_regression)\r
239     {\r
240         class_count = 0;\r
241         unsigned char * mask = new unsigned char[n];\r
242         memset(mask, 0, n);\r
243         // compute the count of different output classes\r
244         for (int i=0; i<n; ++i)\r
245             if (!mask[i])\r
246             {\r
247                 class_count++;\r
248                 for (int j=i; j<n; ++j)\r
249                     if (int(orig_response->data.fl[j]) == int(orig_response->data.fl[i]))\r
250                         mask[j] = 1;\r
251             }\r
252         delete[] mask;\r
253 \r
254         class_labels = cvCreateMat(1, class_count, CV_32S);\r
255         class_labels->data.i[0] = int(orig_response->data.fl[0]);\r
256         int j = 1;\r
257         for (int i=1; i<n; ++i)\r
258         {\r
259             int k = 0;\r
260             while ((int(orig_response->data.fl[i]) - class_labels->data.i[k]) && (k<j))\r
261                 k++;\r
262             if (k == j)\r
263             {\r
264                 class_labels->data.i[k] = int(orig_response->data.fl[i]);\r
265                 j++;\r
266             }\r
267         }\r
268     }\r
269 \r
270     // inside gbt learning proccess only regression decision trees are built\r
271     data->is_classifier = false;\r
272 \r
273     // preproccessing sample indices\r
274     if (_sample_idx)\r
275     {\r
276         int sample_idx_len = get_len(_sample_idx);\r
277 \r
278         switch (CV_MAT_TYPE(_sample_idx->type))\r
279         {\r
280             case CV_32SC1:\r
281             {\r
282                 sample_idx = cvCreateMat( 1, sample_idx_len, CV_32S );\r
283                 for (int i=0; i<sample_idx_len; ++i)\r
284                     sample_idx->data.i[i] = _sample_idx->data.i[i];\r
285             } break;\r
286             case CV_8S:\r
287             case CV_8U:\r
288             {\r
289                 int active_samples_count = 0;\r
290                 for (int i=0; i<sample_idx_len; ++i)\r
291                     active_samples_count += int( _sample_idx->data.ptr[i] );\r
292                 sample_idx = cvCreateMat( 1, active_samples_count, CV_32S );\r
293                 active_samples_count = 0;\r
294                 for (int i=0; i<sample_idx_len; ++i)\r
295                     if (int( _sample_idx->data.ptr[i] ))\r
296                         sample_idx->data.i[active_samples_count++] = i;\r
297 \r
298             } break;\r
299             default: CV_Error(CV_StsUnmatchedFormats, "_sample_idx should be a 32sC1, 8sC1 or 8uC1 vector.");\r
300         }\r
301         icvSortFloat(sample_idx->data.fl, sample_idx_len, 0);\r
302     }\r
303     else\r
304     {\r
305         sample_idx = cvCreateMat( 1, n, CV_32S );\r
306         for (int i=0; i<n; ++i)\r
307             sample_idx->data.i[i] = i;\r
308     }\r
309 \r
310     sum_response = cvCreateMat(class_count, n, CV_32F);\r
311     sum_response_tmp = cvCreateMat(class_count, n, CV_32F);\r
312     cvZero(sum_response);\r
313 \r
314     delta = 0.0f;\r
315     /*\r
316       in the case of a regression problem the initial guess (the zero term\r
317       in the sum) is set to the mean of all the training responses, that is\r
318       the best constant model\r
319     */\r
320     if (is_regression) base_value = find_optimal_value(sample_idx);\r
321     /*\r
322       in the case of a classification problem the initial guess (the zero term\r
323       in the sum) is set to zero for all the trees sequences\r
324     */\r
325     else base_value = 0.0f;\r
326     /*\r
327       current predicition on all training samples is set to be\r
328       equal to the base_value\r
329     */\r
330     cvSet( sum_response, cvScalar(base_value) );\r
331 \r
332     weak = new pCvSeq[class_count];\r
333     for (int i=0; i<class_count; ++i)\r
334     {\r
335         storage = cvCreateMemStorage();\r
336         weak[i] = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvDTree*), storage );\r
337         storage = 0;\r
338     }\r
339 \r
340     // subsample params and data\r
341     rng = &cv::theRNG();\r
342 \r
343     int samples_count = get_len(sample_idx);\r
344 \r
345     params.subsample_portion = params.subsample_portion <= FLT_EPSILON ||\r
346         1 - params.subsample_portion <= FLT_EPSILON\r
347         ? 1 : params.subsample_portion;\r
348     int train_sample_count = cvFloor(params.subsample_portion * samples_count);\r
349     if (train_sample_count == 0)\r
350         train_sample_count = samples_count;\r
351     int test_sample_count = samples_count - train_sample_count;\r
352     int* idx_data = new int[samples_count];\r
353     subsample_train = cvCreateMatHeader( 1, train_sample_count, CV_32SC1 );\r
354     *subsample_train = cvMat( 1, train_sample_count, CV_32SC1, idx_data );\r
355     if (test_sample_count)\r
356     {\r
357         subsample_test  = cvCreateMatHeader( 1, test_sample_count, CV_32SC1 );\r
358         *subsample_test = cvMat( 1, test_sample_count, CV_32SC1,\r
359                                  idx_data + train_sample_count );\r
360     }\r
361 \r
362     // training procedure\r
363 \r
364     for ( int i=0; i < params.weak_count; ++i )\r
365     {\r
366         do_subsample();\r
367         for ( int k=0; k < class_count; ++k )\r
368         {\r
369             find_gradient(k);\r
370             CvDTree* tree = new CvDTree;\r
371             tree->train( data, subsample_train );\r
372             change_values(tree, k);\r
373 \r
374             if (subsample_test)\r
375             {\r
376                 CvMat x;\r
377                 CvMat x_miss;\r
378                 int* sample_data = sample_idx->data.i;\r
379                 int* subsample_data = subsample_test->data.i;\r
380                 int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
381                              : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
382                 for (int j=0; j<get_len(subsample_test); ++j)\r
383                 {\r
384                     int idx = *(sample_data + subsample_data[j]*s_step);\r
385                     float res = 0.0f;\r
386                     if (_tflag == CV_ROW_SAMPLE)\r
387                         cvGetRow( data->train_data, &x, idx);\r
388                     else\r
389                         cvGetCol( data->train_data, &x, idx);\r
390 \r
391                     if (missing)\r
392                     {\r
393                         if (_tflag == CV_ROW_SAMPLE)\r
394                             cvGetRow( missing, &x_miss, idx);\r
395                         else\r
396                             cvGetCol( missing, &x_miss, idx);\r
397 \r
398                         res = (float)tree->predict(&x, &x_miss)->value;\r
399                     }\r
400                     else\r
401                     {\r
402                         res = (float)tree->predict(&x)->value;\r
403                     }\r
404                     sum_response_tmp->data.fl[idx + k*n] =\r
405                                     sum_response->data.fl[idx + k*n] +\r
406                                     params.shrinkage * res;\r
407                 }\r
408             }\r
409 \r
410             cvSeqPush( weak[k], &tree );\r
411             tree = 0;\r
412         } // k=0..class_count\r
413     CvMat* tmp;\r
414     tmp = sum_response_tmp;\r
415     sum_response_tmp = sum_response;\r
416     sum_response = tmp;\r
417     tmp = 0;\r
418     } // i=0..params.weak_count\r
419 \r
420     delete[] idx_data;\r
421     cvReleaseMat(&new_responses);\r
422     data->free_train_data();\r
423 \r
424     return true;\r
425 \r
426 } // CvGBTrees::train(...)\r
427 \r
428 //===========================================================================\r
429 \r
430 inline float Sign(float x)\r
431   {\r
432   if (x<0.0f) return -1.0f;\r
433   else if (x>0.0f) return 1.0f;\r
434   return 0.0f;\r
435   }\r
436 \r
437 //===========================================================================\r
438 \r
439 void CvGBTrees::find_gradient(const int k)\r
440 {\r
441     int* sample_data = sample_idx->data.i;\r
442     int* subsample_data = subsample_train->data.i;\r
443     float* grad_data = data->responses->data.fl;\r
444     float* resp_data = orig_response->data.fl;\r
445     float* current_data = sum_response->data.fl;\r
446 \r
447     switch (params.loss_function_type)\r
448     // loss_function_type in\r
449     // {SQUARED_LOSS, ABSOLUTE_LOSS, HUBER_LOSS, DEVIANCE_LOSS}\r
450     {\r
451         case SQUARED_LOSS:\r
452         {\r
453             for (int i=0; i<get_len(subsample_train); ++i)\r
454             {\r
455                 int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
456                              : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
457                 int idx = *(sample_data + subsample_data[i]*s_step);\r
458                 grad_data[idx] = resp_data[idx] - current_data[idx];\r
459             }\r
460         }; break;\r
461 \r
462         case ABSOLUTE_LOSS:\r
463         {\r
464             for (int i=0; i<get_len(subsample_train); ++i)\r
465             {\r
466                 int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
467                              : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
468                 int idx = *(sample_data + subsample_data[i]*s_step);\r
469                 grad_data[idx] = Sign(resp_data[idx] - current_data[idx]);\r
470             }\r
471         }; break;\r
472 \r
473         case HUBER_LOSS:\r
474         {\r
475             float alpha = 0.2f;\r
476             int n = get_len(subsample_train);\r
477             int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
478                          : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
479 \r
480             float* residuals = new float[n];\r
481             for (int i=0; i<n; ++i)\r
482             {\r
483                 int idx = *(sample_data + subsample_data[i]*s_step);\r
484                 residuals[i] = fabs(resp_data[idx] - current_data[idx]);\r
485             }\r
486             icvSortFloat(residuals, n, 0.0f);\r
487 \r
488             delta = residuals[int(ceil(n*alpha))];\r
489 \r
490             for (int i=0; i<n; ++i)\r
491             {\r
492                 int idx = *(sample_data + subsample_data[i]*s_step);\r
493                 float r = resp_data[idx] - current_data[idx];\r
494                 grad_data[idx] = (fabs(r) > delta) ? delta*Sign(r) : r;\r
495             }\r
496             delete[] residuals;\r
497 \r
498         }; break;\r
499 \r
500         case DEVIANCE_LOSS:\r
501         {\r
502             for (int i=0; i<get_len(subsample_train); ++i)\r
503             {\r
504                 double exp_fk = 0;\r
505                 double exp_sfi = 0;\r
506                 int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
507                              : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
508                 int idx = *(sample_data + subsample_data[i]*s_step);\r
509 \r
510                 for (int j=0; j<class_count; ++j)\r
511                 {\r
512                     double res;\r
513                     res = current_data[idx + j*sum_response->cols];\r
514                     res = exp(res);\r
515                     if (j == k) exp_fk = res;\r
516                     exp_sfi += res;\r
517                 }\r
518                 int orig_label = int(resp_data[idx]);\r
519                 /*\r
520                 grad_data[idx] = (float)(!(k-class_labels->data.i[orig_label]+1)) -\r
521                                  (float)(exp_fk / exp_sfi);\r
522                 */\r
523                 int ensemble_label = 0;\r
524                 while (class_labels->data.i[ensemble_label] - orig_label)\r
525                     ensemble_label++;\r
526 \r
527                 grad_data[idx] = (float)(!(k-ensemble_label)) -\r
528                                  (float)(exp_fk / exp_sfi);\r
529             }\r
530         }; break;\r
531 \r
532         default: break;\r
533     }\r
534 \r
535 } // CvGBTrees::find_gradient(...)\r
536 \r
537 //===========================================================================\r
538 \r
539 void CvGBTrees::change_values(CvDTree* tree, const int _k)\r
540 {\r
541     CvDTreeNode** predictions = new pCvDTreeNode[get_len(subsample_train)];\r
542 \r
543     int* sample_data = sample_idx->data.i;\r
544     int* subsample_data = subsample_train->data.i;\r
545     int s_step = (sample_idx->cols > sample_idx->rows) ? 1\r
546                  : sample_idx->step/CV_ELEM_SIZE(sample_idx->type);\r
547 \r
548     CvMat x;\r
549     CvMat miss_x;\r
550 \r
551     for (int i=0; i<get_len(subsample_train); ++i)\r
552     {\r
553         int idx = *(sample_data + subsample_data[i]*s_step);\r
554         if (data->tflag == CV_ROW_SAMPLE)\r
555             cvGetRow( data->train_data, &x, idx);\r
556         else\r
557             cvGetCol( data->train_data, &x, idx);\r
558 \r
559         if (missing)\r
560         {\r
561             if (data->tflag == CV_ROW_SAMPLE)\r
562                 cvGetRow( missing, &miss_x, idx);\r
563             else\r
564                 cvGetCol( missing, &miss_x, idx);\r
565 \r
566             predictions[i] = tree->predict(&x, &miss_x);\r
567         }\r
568         else\r
569             predictions[i] = tree->predict(&x);\r
570     }\r
571 \r
572 \r
573     CvDTreeNode** leaves;\r
574     int leaves_count = 0;\r
575     leaves = GetLeaves( tree, leaves_count);\r
576 \r
577     for (int i=0; i<leaves_count; ++i)\r
578     {\r
579         int samples_in_leaf = 0;\r
580         for (int j=0; j<get_len(subsample_train); ++j)\r
581         {\r
582             if (leaves[i] == predictions[j]) samples_in_leaf++;\r
583         }\r
584 \r
585         if (!samples_in_leaf) // It should not be done anyways! but...\r
586         {\r
587             leaves[i]->value = 0.0;\r
588             continue;\r
589         }\r
590 \r
591         CvMat* leaf_idx = cvCreateMat(1, samples_in_leaf, CV_32S);\r
592         int* leaf_idx_data = leaf_idx->data.i;\r
593 \r
594         for (int j=0; j<get_len(subsample_train); ++j)\r
595         {\r
596             int idx = *(sample_data + subsample_data[j]*s_step);\r
597             if (leaves[i] == predictions[j])\r
598                 *leaf_idx_data++ = idx;\r
599         }\r
600 \r
601         float value = find_optimal_value(leaf_idx);\r
602         leaves[i]->value = value;\r
603 \r
604         leaf_idx_data = leaf_idx->data.i;\r
605 \r
606         int len = sum_response_tmp->cols;\r
607         for (int j=0; j<get_len(leaf_idx); ++j)\r
608         {\r
609             int idx = leaf_idx_data[j];\r
610             sum_response_tmp->data.fl[idx + _k*len] =\r
611                                     sum_response->data.fl[idx + _k*len] +\r
612                                     params.shrinkage * value;\r
613         }\r
614         leaf_idx_data = 0;\r
615         cvReleaseMat(&leaf_idx);\r
616     }\r
617 \r
618     // releasing the memory\r
619     for (int i=0; i<get_len(subsample_train); ++i)\r
620     {\r
621         predictions[i] = 0;\r
622     }\r
623     delete[] predictions;\r
624 \r
625     for (int i=0; i<leaves_count; ++i)\r
626     {\r
627         leaves[i] = 0;\r
628     }\r
629     delete[] leaves;\r
630 \r
631 }\r
632 \r
633 //===========================================================================\r
634 /*\r
635 void CvGBTrees::change_values(CvDTree* tree, const int _k)\r
636 {\r
637 \r
638     CvDTreeNode** leaves;\r
639     int leaves_count = 0;\r
640     int offset = _k*sum_response_tmp->cols;\r
641     CvMat leaf_idx;\r
642     leaf_idx.rows = 1;\r
643 \r
644     leaves = GetLeaves( tree, leaves_count);\r
645 \r
646     for (int i=0; i<leaves_count; ++i)\r
647     {\r
648         int n = leaves[i]->sample_count;\r
649         int* leaf_idx_data = new int[n];\r
650         data->get_sample_indices(leaves[i], leaf_idx_data);\r
651         //CvMat* leaf_idx = new CvMat();\r
652         //cvInitMatHeader(leaf_idx, n, 1, CV_32S, leaf_idx_data);\r
653         leaf_idx.cols = n;\r
654         leaf_idx.data.i = leaf_idx_data;\r
655 \r
656         float value = find_optimal_value(&leaf_idx);\r
657         leaves[i]->value = value;\r
658         float val = params.shrinkage * value;\r
659 \r
660 \r
661         for (int j=0; j<n; ++j)\r
662         {\r
663             int idx = leaf_idx_data[j] + offset;\r
664             sum_response_tmp->data.fl[idx] = sum_response->data.fl[idx] + val;\r
665         }\r
666         //leaf_idx_data = 0;\r
667         //cvReleaseMat(&leaf_idx);\r
668         leaf_idx.data.i = 0;\r
669         //delete leaf_idx;\r
670         delete[] leaf_idx_data;\r
671     }\r
672 \r
673     // releasing the memory\r
674     for (int i=0; i<leaves_count; ++i)\r
675     {\r
676         leaves[i] = 0;\r
677     }\r
678     delete[] leaves;\r
679 \r
680 }    //change_values(...);\r
681 */\r
682 //===========================================================================\r
683 \r
684 float CvGBTrees::find_optimal_value( const CvMat* _Idx )\r
685 {\r
686 \r
687     double gamma = (double)0.0;\r
688 \r
689     int* idx = _Idx->data.i;\r
690     float* resp_data = orig_response->data.fl;\r
691     float* cur_data = sum_response->data.fl;\r
692     int n = get_len(_Idx);\r
693 \r
694     switch (params.loss_function_type)\r
695     // SQUARED_LOSS=0, ABSOLUTE_LOSS=1, HUBER_LOSS=3, DEVIANCE_LOSS=4\r
696     {\r
697     case SQUARED_LOSS:\r
698         {\r
699             for (int i=0; i<n; ++i)\r
700                 gamma += resp_data[idx[i]] - cur_data[idx[i]];\r
701             gamma /= (double)n;\r
702         }; break;\r
703 \r
704     case ABSOLUTE_LOSS:\r
705         {\r
706             float* residuals = new float[n];\r
707             for (int i=0; i<n; ++i, ++idx)\r
708                 residuals[i] = (resp_data[*idx] - cur_data[*idx]);\r
709             icvSortFloat(residuals, n, 0.0f);\r
710             if (n % 2)\r
711                 gamma = residuals[n/2];\r
712             else gamma = (residuals[n/2-1] + residuals[n/2]) / 2.0f;\r
713             delete[] residuals;\r
714         }; break;\r
715 \r
716     case HUBER_LOSS:\r
717         {\r
718             float* residuals = new float[n];\r
719             for (int i=0; i<n; ++i, ++idx)\r
720                 residuals[i] = (resp_data[*idx] - cur_data[*idx]);\r
721             icvSortFloat(residuals, n, 0.0f);\r
722 \r
723             int n_half = n >> 1;\r
724             float r_median = (n == n_half<<1) ?\r
725                         (residuals[n_half-1] + residuals[n_half]) / 2.0f :\r
726                         residuals[n_half];\r
727 \r
728             for (int i=0; i<n; ++i)\r
729             {\r
730                 float dif = residuals[i] - r_median;\r
731                 gamma += (delta < fabs(dif)) ? Sign(dif)*delta : dif;\r
732             }\r
733             gamma /= (double)n;\r
734             gamma += r_median;\r
735             delete[] residuals;\r
736 \r
737         }; break;\r
738 \r
739     case DEVIANCE_LOSS:\r
740         {\r
741             float* grad_data = data->responses->data.fl;\r
742             double tmp1 = 0;\r
743             double tmp2 = 0;\r
744             double tmp  = 0;\r
745             for (int i=0; i<n; ++i)\r
746             {\r
747                 tmp = grad_data[idx[i]];\r
748                 tmp1 += tmp;\r
749                 tmp2 += fabs(tmp)*(1-fabs(tmp));\r
750             };\r
751             if (tmp2 == 0)\r
752             {\r
753                 tmp2 = 1;\r
754             }\r
755 \r
756             gamma = ((double)(class_count-1)) / (double)class_count * (tmp1/tmp2);\r
757         }; break;\r
758 \r
759     default: break;\r
760     }\r
761 \r
762     return float(gamma);\r
763 \r
764 } // CvGBTrees::find_optimal_value\r
765 \r
766 //===========================================================================\r
767 \r
768 \r
769 void CvGBTrees::leaves_get( CvDTreeNode** leaves, int& count, CvDTreeNode* node )\r
770 {\r
771     if (node->left != NULL)  leaves_get(leaves, count, node->left);\r
772     if (node->right != NULL) leaves_get(leaves, count, node->right);\r
773     if ((node->left == NULL) && (node->right == NULL))\r
774         leaves[count++] = node;\r
775 }\r
776 \r
777 //---------------------------------------------------------------------------\r
778 \r
779 CvDTreeNode** CvGBTrees::GetLeaves( const CvDTree* dtree, int& len )\r
780 {\r
781     len = 0;\r
782     CvDTreeNode** leaves = new pCvDTreeNode[(size_t)1 << params.max_depth];\r
783     leaves_get(leaves, len, const_cast<pCvDTreeNode>(dtree->get_root()));\r
784     return leaves;\r
785 }\r
786 \r
787 //===========================================================================\r
788 \r
789 void CvGBTrees::do_subsample()\r
790 {\r
791 \r
792     int n = get_len(sample_idx);\r
793     int* idx = subsample_train->data.i;\r
794 \r
795     for (int i = 0; i < n; i++ )\r
796         idx[i] = i;\r
797 \r
798     if (subsample_test)\r
799         for (int i = 0; i < n; i++)\r
800         {\r
801             int a = (*rng)(n);\r
802             int b = (*rng)(n);\r
803             int t;\r
804             CV_SWAP( idx[a], idx[b], t );\r
805         }\r
806 \r
807 /*\r
808     int n = get_len(sample_idx);\r
809     if (subsample_train == 0)\r
810         subsample_train = cvCreateMat(1, n, CV_32S);\r
811     int* subsample_data = subsample_train->data.i;\r
812     for (int i=0; i<n; ++i)\r
813         subsample_data[i] = i;\r
814     subsample_test = 0;\r
815 */\r
816 }\r
817 \r
818 //===========================================================================\r
819 \r
820 float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,\r
821         CvMat* weak_responses, CvSlice slice, int k) const\r
822 {\r
823     float result = 0.0f;\r
824 \r
825     if (!weak) return 0.0f;\r
826 \r
827     CvSeqReader reader;\r
828     int weak_count = cvSliceLength( slice, weak[class_count-1] );\r
829     CvDTree* tree;\r
830 \r
831     if (weak_responses)\r
832     {\r
833         if (CV_MAT_TYPE(weak_responses->type) != CV_32F)\r
834             return 0.0f;\r
835         if ((k >= 0) && (k<class_count) && (weak_responses->rows != 1))\r
836             return 0.0f;\r
837         if ((k == -1) && (weak_responses->rows != class_count))\r
838             return 0.0f;\r
839         if (weak_responses->cols != weak_count)\r
840             return 0.0f;\r
841     }\r
842 \r
843     float* sum = new float[class_count];\r
844     memset(sum, 0, class_count*sizeof(float));\r
845 \r
846     for (int i=0; i<class_count; ++i)\r
847     {\r
848         if ((weak[i]) && (weak_count))\r
849         {\r
850             cvStartReadSeq( weak[i], &reader );\r
851             cvSetSeqReaderPos( &reader, slice.start_index );\r
852             for (int j=0; j<weak_count; ++j)\r
853             {\r
854                 CV_READ_SEQ_ELEM( tree, reader );\r
855                 float p = (float)(tree->predict(_sample, _missing)->value);\r
856                 sum[i] += params.shrinkage * p;\r
857                 if (weak_responses)\r
858                     weak_responses->data.fl[i*weak_count+j] = p;\r
859             }\r
860         }\r
861     }\r
862 \r
863     for (int i=0; i<class_count; ++i)\r
864         sum[i] += base_value;\r
865 \r
866     if (class_count == 1)\r
867     {\r
868         result = sum[0];\r
869         delete[] sum;\r
870         return result;\r
871     }\r
872 \r
873     if ((k>=0) && (k<class_count))\r
874     {\r
875         result = sum[k];\r
876         delete[] sum;\r
877         return result;\r
878     }\r
879 \r
880     float max = sum[0];\r
881     int class_label = 0;\r
882     for (int i=1; i<class_count; ++i)\r
883         if (sum[i] > max)\r
884         {\r
885             max = sum[i];\r
886             class_label = i;\r
887         }\r
888 \r
889     delete[] sum;\r
890 \r
891     /*\r
892     int orig_class_label = -1;\r
893     for (int i=0; i<get_len(class_labels); ++i)\r
894         if (class_labels->data.i[i] == class_label+1)\r
895             orig_class_label = i;\r
896     */\r
897     int orig_class_label = class_labels->data.i[class_label];\r
898 \r
899     return float(orig_class_label);\r
900 }\r
901 \r
902 \r
903 class Tree_predictor\r
904 {\r
905 private:\r
906     pCvSeq* weak;\r
907     float* sum;\r
908     const int k;\r
909     const CvMat* sample;\r
910     const CvMat* missing;\r
911     const float shrinkage;\r
912 \r
913 #ifdef HAVE_TBB\r
914     static tbb::spin_mutex SumMutex;\r
915 #endif\r
916 \r
917 \r
918 public:\r
919     Tree_predictor() : weak(0), sum(0), k(0), sample(0), missing(0), shrinkage(1.0f) {}\r
920     Tree_predictor(pCvSeq* _weak, const int _k, const float _shrinkage,\r
921                    const CvMat* _sample, const CvMat* _missing, float* _sum ) :\r
922                    weak(_weak), sum(_sum), k(_k), sample(_sample),\r
923                    missing(_missing), shrinkage(_shrinkage)\r
924     {}\r
925 \r
926     Tree_predictor( const Tree_predictor& p, cv::Split ) :\r
927             weak(p.weak), sum(p.sum), k(p.k), sample(p.sample),\r
928             missing(p.missing), shrinkage(p.shrinkage)\r
929     {}\r
930 \r
931     Tree_predictor& operator=( const Tree_predictor& )\r
932     { return *this; }\r
933 \r
934     virtual void operator()(const cv::BlockedRange& range) const\r
935     {\r
936 #ifdef HAVE_TBB\r
937         tbb::spin_mutex::scoped_lock lock;\r
938 #endif\r
939         CvSeqReader reader;\r
940         int begin = range.begin();\r
941         int end = range.end();\r
942 \r
943         int weak_count = end - begin;\r
944         CvDTree* tree;\r
945 \r
946         for (int i=0; i<k; ++i)\r
947         {\r
948             float tmp_sum = 0.0f;\r
949             if ((weak[i]) && (weak_count))\r
950             {\r
951                 cvStartReadSeq( weak[i], &reader );\r
952                 cvSetSeqReaderPos( &reader, begin );\r
953                 for (int j=0; j<weak_count; ++j)\r
954                 {\r
955                     CV_READ_SEQ_ELEM( tree, reader );\r
956                     tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);\r
957                 }\r
958             }\r
959 #ifdef HAVE_TBB\r
960             lock.acquire(SumMutex);\r
961             sum[i] += tmp_sum;\r
962             lock.release();\r
963 #else\r
964             sum[i] += tmp_sum;\r
965 #endif\r
966         }\r
967     } // Tree_predictor::operator()\r
968 \r
969     virtual ~Tree_predictor() {}\r
970 \r
971 }; // class Tree_predictor\r
972 \r
973 \r
974 #ifdef HAVE_TBB\r
975 tbb::spin_mutex Tree_predictor::SumMutex;\r
976 #endif\r
977 \r
978 \r
979 \r
980 float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,\r
981             CvMat* /*weak_responses*/, CvSlice slice, int k) const\r
982     {\r
983         float result = 0.0f;\r
984         if (!weak) return 0.0f;\r
985         float* sum = new float[class_count];\r
986         for (int i=0; i<class_count; ++i)\r
987             sum[i] = 0.0f;\r
988         int begin = slice.start_index;\r
989         int end = begin + cvSliceLength( slice, weak[0] );\r
990 \r
991         pCvSeq* weak_seq = weak;\r
992         Tree_predictor predictor = Tree_predictor(weak_seq, class_count,\r
993                                     params.shrinkage, _sample, _missing, sum);\r
994 \r
995 //#ifdef HAVE_TBB\r
996 //      tbb::parallel_for(cv::BlockedRange(begin, end), predictor,\r
997 //                          tbb::auto_partitioner());\r
998 //#else\r
999         cv::parallel_for(cv::BlockedRange(begin, end), predictor);\r
1000 //#endif\r
1001 \r
1002         for (int i=0; i<class_count; ++i)\r
1003             sum[i] = sum[i] /** params.shrinkage*/ + base_value;\r
1004 \r
1005         if (class_count == 1)\r
1006         {\r
1007             result = sum[0];\r
1008             delete[] sum;\r
1009             return result;\r
1010         }\r
1011 \r
1012         if ((k>=0) && (k<class_count))\r
1013         {\r
1014             result = sum[k];\r
1015             delete[] sum;\r
1016             return result;\r
1017         }\r
1018 \r
1019         float max = sum[0];\r
1020         int class_label = 0;\r
1021         for (int i=1; i<class_count; ++i)\r
1022             if (sum[i] > max)\r
1023             {\r
1024                 max = sum[i];\r
1025                 class_label = i;\r
1026             }\r
1027 \r
1028         delete[] sum;\r
1029         int orig_class_label = class_labels->data.i[class_label];\r
1030 \r
1031         return float(orig_class_label);\r
1032     }\r
1033 \r
1034 \r
1035 //===========================================================================\r
1036 \r
1037 void CvGBTrees::write_params( CvFileStorage* fs ) const\r
1038 {\r
1039     const char* loss_function_type_str =\r
1040         params.loss_function_type == SQUARED_LOSS ? "SquaredLoss" :\r
1041         params.loss_function_type == ABSOLUTE_LOSS ? "AbsoluteLoss" :\r
1042         params.loss_function_type == HUBER_LOSS ? "HuberLoss" :\r
1043         params.loss_function_type == DEVIANCE_LOSS ? "DevianceLoss" : 0;\r
1044 \r
1045 \r
1046     if( loss_function_type_str )\r
1047         cvWriteString( fs, "loss_function", loss_function_type_str );\r
1048     else\r
1049         cvWriteInt( fs, "loss_function", params.loss_function_type );\r
1050 \r
1051     cvWriteInt( fs, "ensemble_length", params.weak_count );\r
1052     cvWriteReal( fs, "shrinkage", params.shrinkage );\r
1053     cvWriteReal( fs, "subsample_portion", params.subsample_portion );\r
1054     //cvWriteInt( fs, "max_tree_depth", params.max_depth );\r
1055     //cvWriteString( fs, "use_surrogate_splits", params.use_surrogates ? "true" : "false");\r
1056     if (class_labels) cvWrite( fs, "class_labels", class_labels);\r
1057 \r
1058     data->is_classifier = !problem_type();\r
1059     data->write_params( fs );\r
1060     data->is_classifier = 0;\r
1061 }\r
1062 \r
1063 \r
1064 //===========================================================================\r
1065 \r
1066 void CvGBTrees::read_params( CvFileStorage* fs, CvFileNode* fnode )\r
1067 {\r
1068     CV_FUNCNAME( "CvGBTrees::read_params" );\r
1069     __BEGIN__;\r
1070 \r
1071 \r
1072     CvFileNode* temp;\r
1073 \r
1074     if( !fnode || !CV_NODE_IS_MAP(fnode->tag) )\r
1075         return;\r
1076 \r
1077     data = new CvDTreeTrainData();\r
1078     CV_CALL( data->read_params(fs, fnode));\r
1079     data->shared = true;\r
1080 \r
1081     params.max_depth = data->params.max_depth;\r
1082     params.min_sample_count = data->params.min_sample_count;\r
1083     params.max_categories = data->params.max_categories;\r
1084     params.priors = data->params.priors;\r
1085     params.regression_accuracy = data->params.regression_accuracy;\r
1086     params.use_surrogates = data->params.use_surrogates;\r
1087 \r
1088     temp = cvGetFileNodeByName( fs, fnode, "loss_function" );\r
1089     if( !temp )\r
1090         EXIT;\r
1091 \r
1092     if( temp && CV_NODE_IS_STRING(temp->tag) )\r
1093     {\r
1094         const char* loss_function_type_str = cvReadString( temp, "" );\r
1095         params.loss_function_type = strcmp( loss_function_type_str, "SquaredLoss" ) == 0 ? SQUARED_LOSS :\r
1096                             strcmp( loss_function_type_str, "AbsoluteLoss" ) == 0 ? ABSOLUTE_LOSS :\r
1097                             strcmp( loss_function_type_str, "HuberLoss" ) == 0 ? HUBER_LOSS :\r
1098                             strcmp( loss_function_type_str, "DevianceLoss" ) == 0 ? DEVIANCE_LOSS : -1;\r
1099     }\r
1100     else\r
1101         params.loss_function_type = cvReadInt( temp, -1 );\r
1102 \r
1103 \r
1104     if( params.loss_function_type < SQUARED_LOSS || params.loss_function_type > DEVIANCE_LOSS ||  params.loss_function_type == 2)\r
1105         CV_ERROR( CV_StsBadArg, "Unknown loss function" );\r
1106 \r
1107     params.weak_count = cvReadIntByName( fs, fnode, "ensemble_length" );\r
1108     params.shrinkage = (float)cvReadRealByName( fs, fnode, "shrinkage", 0.1 );\r
1109     params.subsample_portion = (float)cvReadRealByName( fs, fnode, "subsample_portion", 1.0 );\r
1110 \r
1111     if (data->is_classifier)\r
1112     {\r
1113         class_labels = (CvMat*)cvReadByName( fs, fnode, "class_labels" );\r
1114         if( class_labels && !CV_IS_MAT(class_labels))\r
1115             CV_ERROR( CV_StsParseError, "class_labels must stored as a matrix");\r
1116     }\r
1117     data->is_classifier = 0;\r
1118 \r
1119     __END__;\r
1120 }\r
1121 \r
1122 \r
1123 \r
1124 \r
1125 void CvGBTrees::write( CvFileStorage* fs, const char* name ) const\r
1126 {\r
1127     CV_FUNCNAME( "CvGBTrees::write" );\r
1128 \r
1129     __BEGIN__;\r
1130 \r
1131     CvSeqReader reader;\r
1132     int i;\r
1133     std::string s;\r
1134 \r
1135     cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_ML_GBT );\r
1136 \r
1137     if( !weak )\r
1138         CV_ERROR( CV_StsBadArg, "The model has not been trained yet" );\r
1139 \r
1140     write_params( fs );\r
1141     cvWriteReal( fs, "base_value", base_value);\r
1142     cvWriteInt( fs, "class_count", class_count);\r
1143 \r
1144     for ( int j=0; j < class_count; ++j )\r
1145     {\r
1146         s = "trees_";\r
1147         s += ToString(j);\r
1148         cvStartWriteStruct( fs, s.c_str(), CV_NODE_SEQ );\r
1149 \r
1150         cvStartReadSeq( weak[j], &reader );\r
1151 \r
1152         for( i = 0; i < weak[j]->total; i++ )\r
1153         {\r
1154             CvDTree* tree;\r
1155             CV_READ_SEQ_ELEM( tree, reader );\r
1156             cvStartWriteStruct( fs, 0, CV_NODE_MAP );\r
1157             tree->write( fs );\r
1158             cvEndWriteStruct( fs );\r
1159         }\r
1160 \r
1161         cvEndWriteStruct( fs );\r
1162     }\r
1163 \r
1164     cvEndWriteStruct( fs );\r
1165 \r
1166     __END__;\r
1167 }\r
1168 \r
1169 \r
1170 //===========================================================================\r
1171 \r
1172 \r
1173 void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )\r
1174 {\r
1175 \r
1176     CV_FUNCNAME( "CvGBTrees::read" );\r
1177 \r
1178     __BEGIN__;\r
1179 \r
1180     CvSeqReader reader;\r
1181     CvFileNode* trees_fnode;\r
1182     CvMemStorage* storage;\r
1183     int i, ntrees;\r
1184     std::string s;\r
1185 \r
1186     clear();\r
1187     read_params( fs, node );\r
1188 \r
1189     if( !data )\r
1190         EXIT;\r
1191 \r
1192     base_value = (float)cvReadRealByName( fs, node, "base_value", 0.0 );\r
1193     class_count = cvReadIntByName( fs, node, "class_count", 1 );\r
1194 \r
1195     weak = new pCvSeq[class_count];\r
1196 \r
1197 \r
1198     for (int j=0; j<class_count; ++j)\r
1199     {\r
1200         s = "trees_";\r
1201         s += ToString(j);\r
1202 \r
1203         trees_fnode = cvGetFileNodeByName( fs, node, s.c_str() );\r
1204         if( !trees_fnode || !CV_NODE_IS_SEQ(trees_fnode->tag) )\r
1205             CV_ERROR( CV_StsParseError, "<trees_x> tag is missing" );\r
1206 \r
1207         cvStartReadSeq( trees_fnode->data.seq, &reader );\r
1208         ntrees = trees_fnode->data.seq->total;\r
1209 \r
1210         if( ntrees != params.weak_count )\r
1211             CV_ERROR( CV_StsUnmatchedSizes,\r
1212             "The number of trees stored does not match <ntrees> tag value" );\r
1213 \r
1214         CV_CALL( storage = cvCreateMemStorage() );\r
1215         weak[j] = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvDTree*), storage );\r
1216 \r
1217         for( i = 0; i < ntrees; i++ )\r
1218         {\r
1219             CvDTree* tree = new CvDTree();\r
1220             CV_CALL(tree->read( fs, (CvFileNode*)reader.ptr, data ));\r
1221             CV_NEXT_SEQ_ELEM( reader.seq->elem_size, reader );\r
1222             cvSeqPush( weak[j], &tree );\r
1223         }\r
1224     }\r
1225 \r
1226     __END__;\r
1227 }\r
1228 \r
1229 //===========================================================================\r
1230 \r
1231 class Sample_predictor\r
1232 {\r
1233 private:\r
1234     const CvGBTrees* gbt;\r
1235     float* predictions;\r
1236     const CvMat* samples;\r
1237     const CvMat* missing;\r
1238     const CvMat* idx;\r
1239     CvSlice slice;\r
1240 \r
1241 public:\r
1242     Sample_predictor() : gbt(0), predictions(0), samples(0), missing(0),\r
1243                          idx(0), slice(CV_WHOLE_SEQ)\r
1244     {}\r
1245 \r
1246     Sample_predictor(const CvGBTrees* _gbt, float* _predictions,\r
1247                    const CvMat* _samples, const CvMat* _missing,\r
1248                    const CvMat* _idx, CvSlice _slice=CV_WHOLE_SEQ) :\r
1249                    gbt(_gbt), predictions(_predictions), samples(_samples),\r
1250                    missing(_missing), idx(_idx), slice(_slice)\r
1251     {}\r
1252 \r
1253 \r
1254     Sample_predictor( const Sample_predictor& p, cv::Split ) :\r
1255             gbt(p.gbt), predictions(p.predictions),\r
1256             samples(p.samples), missing(p.missing), idx(p.idx),\r
1257             slice(p.slice)\r
1258     {}\r
1259 \r
1260 \r
1261     virtual void operator()(const cv::BlockedRange& range) const\r
1262     {\r
1263         int begin = range.begin();\r
1264         int end = range.end();\r
1265 \r
1266         CvMat x;\r
1267         CvMat miss;\r
1268 \r
1269         for (int i=begin; i<end; ++i)\r
1270         {\r
1271             int j = idx ? idx->data.i[i] : i;\r
1272             cvGetRow(samples, &x, j);\r
1273             if (!missing)\r
1274             {\r
1275                 predictions[i] = gbt->predict_serial(&x,0,0,slice);\r
1276             }\r
1277             else\r
1278             {\r
1279                 cvGetRow(missing, &miss, j);\r
1280                 predictions[i] = gbt->predict_serial(&x,&miss,0,slice);\r
1281             }\r
1282         }\r
1283     } // Sample_predictor::operator()\r
1284 \r
1285     virtual ~Sample_predictor() {}\r
1286 \r
1287 }; // class Sample_predictor\r
1288 \r
1289 \r
1290 \r
1291 // type in {CV_TRAIN_ERROR, CV_TEST_ERROR}\r
1292 float\r
1293 CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )\r
1294 {\r
1295 \r
1296     float err = 0.0f;\r
1297     const CvMat* _sample_idx = (type == CV_TRAIN_ERROR) ?\r
1298                               _data->get_train_sample_idx() :\r
1299                               _data->get_test_sample_idx();\r
1300     const CvMat* response = _data->get_responses();\r
1301 \r
1302     int n = _sample_idx ? get_len(_sample_idx) : 0;\r
1303     n = (type == CV_TRAIN_ERROR && n == 0) ? _data->get_values()->rows : n;\r
1304 \r
1305     if (!n)\r
1306         return -FLT_MAX;\r
1307 \r
1308     float* pred_resp = 0;\r
1309     if (resp)\r
1310     {\r
1311         resp->resize(n);\r
1312         pred_resp = &((*resp)[0]);\r
1313     }\r
1314     else\r
1315         pred_resp = new float[n];\r
1316 \r
1317     Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),\r
1318             _data->get_missing(), _sample_idx);\r
1319 \r
1320 //#ifdef HAVE_TBB\r
1321 //    tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());\r
1322 //#else\r
1323     cv::parallel_for(cv::BlockedRange(0,n), predictor);\r
1324 //#endif\r
1325 \r
1326     int* sidx = _sample_idx ? _sample_idx->data.i : 0;\r
1327     int r_step = CV_IS_MAT_CONT(response->type) ?\r
1328                 1 : response->step / CV_ELEM_SIZE(response->type);\r
1329 \r
1330 \r
1331     if ( !problem_type() )\r
1332     {\r
1333         for( int i = 0; i < n; i++ )\r
1334         {\r
1335             int si = sidx ? sidx[i] : i;\r
1336             int d = fabs((double)pred_resp[i] - response->data.fl[si*r_step]) <= FLT_EPSILON ? 0 : 1;\r
1337             err += d;\r
1338         }\r
1339         err = err / (float)n * 100.0f;\r
1340     }\r
1341     else\r
1342     {\r
1343         for( int i = 0; i < n; i++ )\r
1344         {\r
1345             int si = sidx ? sidx[i] : i;\r
1346             float d = pred_resp[i] - response->data.fl[si*r_step];\r
1347             err += d*d;\r
1348         }\r
1349         err = err / (float)n;\r
1350     }\r
1351 \r
1352     return err;\r
1353 }\r
1354 \r
1355 \r
1356 CvGBTrees::CvGBTrees( const cv::Mat& trainData, int tflag,\r
1357           const cv::Mat& responses, const cv::Mat& varIdx,\r
1358           const cv::Mat& sampleIdx, const cv::Mat& varType,\r
1359           const cv::Mat& missingDataMask,\r
1360           CvGBTreesParams _params )\r
1361 {\r
1362     data = 0;\r
1363     weak = 0;\r
1364     default_model_name = "my_boost_tree";\r
1365     orig_response = sum_response = sum_response_tmp = 0;\r
1366     subsample_train = subsample_test = 0;\r
1367     missing = sample_idx = 0;\r
1368     class_labels = 0;\r
1369     class_count = 1;\r
1370     delta = 0.0f;\r
1371 \r
1372     clear();\r
1373 \r
1374     train(trainData, tflag, responses, varIdx, sampleIdx, varType, missingDataMask, _params, false);\r
1375 }\r
1376 \r
1377 bool CvGBTrees::train( const cv::Mat& trainData, int tflag,\r
1378                    const cv::Mat& responses, const cv::Mat& varIdx,\r
1379                    const cv::Mat& sampleIdx, const cv::Mat& varType,\r
1380                    const cv::Mat& missingDataMask,\r
1381                    CvGBTreesParams _params,\r
1382                    bool update )\r
1383 {\r
1384     CvMat _trainData = trainData, _responses = responses;\r
1385     CvMat _varIdx = varIdx, _sampleIdx = sampleIdx, _varType = varType;\r
1386     CvMat _missingDataMask = missingDataMask;\r
1387 \r
1388     return train( &_trainData, tflag, &_responses, varIdx.empty() ? 0 : &_varIdx,\r
1389                   sampleIdx.empty() ? 0 : &_sampleIdx, varType.empty() ? 0 : &_varType,\r
1390                   missingDataMask.empty() ? 0 : &_missingDataMask, _params, update);\r
1391 }\r
1392 \r
1393 float CvGBTrees::predict( const cv::Mat& sample, const cv::Mat& _missing,\r
1394                           const cv::Range& slice, int k ) const\r
1395 {\r
1396     CvMat _sample = sample, miss = _missing;\r
1397     return predict(&_sample, _missing.empty() ? 0 : &miss, 0,\r
1398                    slice==cv::Range::all() ? CV_WHOLE_SEQ : cvSlice(slice.start, slice.end), k);\r
1399 }\r