modules/ml/src/ann_mlp.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //
  12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  13 // Third party copyrights are property of their respective owners.
  14 //
  15 // Redistribution and use in source and binary forms, with or without modification,
  16 // are permitted provided that the following conditions are met:
  17 //
  18 //   * Redistribution's of source code must retain the above copyright notice,
  19 //     this list of conditions and the following disclaimer.
  20 //
  21 //   * Redistribution's in binary form must reproduce the above copyright notice,
  22 //     this list of conditions and the following disclaimer in the documentation
  23 //     and/or other materials provided with the distribution.
  24 //
  25 //   * The name of Intel Corporation may not be used to endorse or promote products
  26 //     derived from this software without specific prior written permission.
  27 //
  28 // This software is provided by the copyright holders and contributors "as is" and
  29 // any express or implied warranties, including, but not limited to, the implied
  30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  31 // In no event shall the Intel Corporation or contributors be liable for any direct,
  32 // indirect, incidental, special, exemplary, or consequential damages
  33 // (including, but not limited to, procurement of substitute goods or services;
  34 // loss of use, data, or profits; or business interruption) however caused
  35 // and on any theory of liability, whether in contract, strict liability,
  36 // or tort (including negligence or otherwise) arising in any way out of
  37 // the use of this software, even if advised of the possibility of such damage.
  38 //
  39 //M*/
  40
  41 #include "precomp.hpp"
  42
  43 namespace cv { namespace ml {
  44
  45 struct AnnParams
  46 {
  47     AnnParams()
  48     {
  49         termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
  50         trainMethod = ANN_MLP::RPROP;
  51         bpDWScale = bpMomentScale = 0.1;
  52         rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
  53         rpDWMin = FLT_EPSILON; rpDWMax = 50.;
  54     }
  55
  56     TermCriteria termCrit;
  57     int trainMethod;
  58
  59     double bpDWScale;
  60     double bpMomentScale;
  61
  62     double rpDW0;
  63     double rpDWPlus;
  64     double rpDWMinus;
  65     double rpDWMin;
  66     double rpDWMax;
  67 };
  68
  69 template <typename T>
  70 inline T inBounds(T val, T min_val, T max_val)
  71 {
  72     return std::min(std::max(val, min_val), max_val);
  73 }
  74
  75 class ANN_MLPImpl : public ANN_MLP
  76 {
  77 public:
  78     ANN_MLPImpl()
  79     {
  80         clear();
  81         setActivationFunction( SIGMOID_SYM, 0, 0 );
  82         setLayerSizes(Mat());
  83         setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
  84     }
  85
  86     virtual ~ANN_MLPImpl() {}
  87
  88     CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.termCrit)
  89     CV_IMPL_PROPERTY(double, BackpropWeightScale, params.bpDWScale)
  90     CV_IMPL_PROPERTY(double, BackpropMomentumScale, params.bpMomentScale)
  91     CV_IMPL_PROPERTY(double, RpropDW0, params.rpDW0)
  92     CV_IMPL_PROPERTY(double, RpropDWPlus, params.rpDWPlus)
  93     CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
  94     CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
  95     CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
  96
  97     void clear()
  98     {
  99         min_val = max_val = min_val1 = max_val1 = 0.;
 100         rng = RNG((uint64)-1);
 101         weights.clear();
 102         trained = false;
 103         max_buf_sz = 1 << 12;
 104     }
 105
 106     int layer_count() const { return (int)layer_sizes.size(); }
 107
 108     void setTrainMethod(int method, double param1, double param2)
 109     {
 110         if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP)
 111             method = ANN_MLP::RPROP;
 112         params.trainMethod = method;
 113         if(method == ANN_MLP::RPROP )
 114         {
 115             if( param1 < FLT_EPSILON )
 116                 param1 = 1.;
 117             params.rpDW0 = param1;
 118             params.rpDWMin = std::max( param2, 0. );
 119         }
 120         else if(method == ANN_MLP::BACKPROP )
 121         {
 122             if( param1 <= 0 )
 123                 param1 = 0.1;
 124             params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
 125             if( param2 < 0 )
 126                 param2 = 0.1;
 127             params.bpMomentScale = std::min( param2, 1. );
 128         }
 129     }
 130
 131     int getTrainMethod() const
 132     {
 133         return params.trainMethod;
 134     }
 135
 136     void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 )
 137     {
 138         if( _activ_func < 0 || _activ_func > GAUSSIAN )
 139             CV_Error( CV_StsOutOfRange, "Unknown activation function" );
 140
 141         activ_func = _activ_func;
 142
 143         switch( activ_func )
 144         {
 145         case SIGMOID_SYM:
 146             max_val = 0.95; min_val = -max_val;
 147             max_val1 = 0.98; min_val1 = -max_val1;
 148             if( fabs(_f_param1) < FLT_EPSILON )
 149                 _f_param1 = 2./3;
 150             if( fabs(_f_param2) < FLT_EPSILON )
 151                 _f_param2 = 1.7159;
 152             break;
 153         case GAUSSIAN:
 154             max_val = 1.; min_val = 0.05;
 155             max_val1 = 1.; min_val1 = 0.02;
 156             if( fabs(_f_param1) < FLT_EPSILON )
 157                 _f_param1 = 1.;
 158             if( fabs(_f_param2) < FLT_EPSILON )
 159                 _f_param2 = 1.;
 160             break;
 161         default:
 162             min_val = max_val = min_val1 = max_val1 = 0.;
 163             _f_param1 = 1.;
 164             _f_param2 = 0.;
 165         }
 166
 167         f_param1 = _f_param1;
 168         f_param2 = _f_param2;
 169     }
 170
 171
 172     void init_weights()
 173     {
 174         int i, j, k, l_count = layer_count();
 175
 176         for( i = 1; i < l_count; i++ )
 177         {
 178             int n1 = layer_sizes[i-1];
 179             int n2 = layer_sizes[i];
 180             double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
 181             double* w = weights[i].ptr<double>();
 182
 183             // initialize weights using Nguyen-Widrow algorithm
 184             for( j = 0; j < n2; j++ )
 185             {
 186                 double s = 0;
 187                 for( k = 0; k <= n1; k++ )
 188                 {
 189                     val = rng.uniform(0., 1.)*2-1.;
 190                     w[k*n2 + j] = val;
 191                     s += fabs(val);
 192                 }
 193
 194                 if( i < l_count - 1 )
 195                 {
 196                     s = 1./(s - fabs(val));
 197                     for( k = 0; k <= n1; k++ )
 198                         w[k*n2 + j] *= s;
 199                     w[n1*n2 + j] *= G*(-1+j*2./n2);
 200                 }
 201             }
 202         }
 203     }
 204
 205     Mat getLayerSizes() const
 206     {
 207         return Mat_<int>(layer_sizes, true);
 208     }
 209
 210     void setLayerSizes( InputArray _layer_sizes )
 211     {
 212         clear();
 213
 214         _layer_sizes.copyTo(layer_sizes);
 215         int l_count = layer_count();
 216
 217         weights.resize(l_count + 2);
 218         max_lsize = 0;
 219
 220         if( l_count > 0 )
 221         {
 222             for( int i = 0; i < l_count; i++ )
 223             {
 224                 int n = layer_sizes[i];
 225                 if( n < 1 + (0 < i && i < l_count-1))
 226                     CV_Error( CV_StsOutOfRange,
 227                              "there should be at least one input and one output "
 228                              "and every hidden layer must have more than 1 neuron" );
 229                 max_lsize = std::max( max_lsize, n );
 230                 if( i > 0 )
 231                     weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
 232             }
 233
 234             int ninputs = layer_sizes.front();
 235             int noutputs = layer_sizes.back();
 236             weights[0].create(1, ninputs*2, CV_64F);
 237             weights[l_count].create(1, noutputs*2, CV_64F);
 238             weights[l_count+1].create(1, noutputs*2, CV_64F);
 239         }
 240     }
 241
 242     float predict( InputArray _inputs, OutputArray _outputs, int ) const
 243     {
 244         if( !trained )
 245             CV_Error( CV_StsError, "The network has not been trained or loaded" );
 246
 247         Mat inputs = _inputs.getMat();
 248         int type = inputs.type(), l_count = layer_count();
 249         int n = inputs.rows, dn0 = n;
 250
 251         CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
 252         int noutputs = layer_sizes[l_count-1];
 253         Mat outputs;
 254
 255         int min_buf_sz = 2*max_lsize;
 256         int buf_sz = n*min_buf_sz;
 257
 258         if( buf_sz > max_buf_sz )
 259         {
 260             dn0 = max_buf_sz/min_buf_sz;
 261             dn0 = std::max( dn0, 1 );
 262             buf_sz = dn0*min_buf_sz;
 263         }
 264
 265         cv::AutoBuffer<double> _buf(buf_sz+noutputs);
 266         double* buf = _buf;
 267
 268         if( !_outputs.needed() )
 269         {
 270             CV_Assert( n == 1 );
 271             outputs = Mat(n, noutputs, type, buf + buf_sz);
 272         }
 273         else
 274         {
 275             _outputs.create(n, noutputs, type);
 276             outputs = _outputs.getMat();
 277         }
 278
 279         int dn = 0;
 280         for( int i = 0; i < n; i += dn )
 281         {
 282             dn = std::min( dn0, n - i );
 283
 284             Mat layer_in = inputs.rowRange(i, i + dn);
 285             Mat layer_out( dn, layer_in.cols, CV_64F, buf);
 286
 287             scale_input( layer_in, layer_out );
 288             layer_in = layer_out;
 289
 290             for( int j = 1; j < l_count; j++ )
 291             {
 292                 double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
 293                 int cols = layer_sizes[j];
 294
 295                 layer_out = Mat(dn, cols, CV_64F, data);
 296                 Mat w = weights[j].rowRange(0, layer_in.cols);
 297                 gemm(layer_in, w, 1, noArray(), 0, layer_out);
 298                 calc_activ_func( layer_out, weights[j] );
 299
 300                 layer_in = layer_out;
 301             }
 302
 303             layer_out = outputs.rowRange(i, i + dn);
 304             scale_output( layer_in, layer_out );
 305         }
 306
 307         if( n == 1 )
 308         {
 309             int maxIdx[] = {0, 0};
 310             minMaxIdx(outputs, 0, 0, 0, maxIdx);
 311             return (float)(maxIdx[0] + maxIdx[1]);
 312         }
 313
 314         return 0.f;
 315     }
 316
 317     void scale_input( const Mat& _src, Mat& _dst ) const
 318     {
 319         int cols = _src.cols;
 320         const double* w = weights[0].ptr<double>();
 321
 322         if( _src.type() == CV_32F )
 323         {
 324             for( int i = 0; i < _src.rows; i++ )
 325             {
 326                 const float* src = _src.ptr<float>(i);
 327                 double* dst = _dst.ptr<double>(i);
 328                 for( int j = 0; j < cols; j++ )
 329                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 330             }
 331         }
 332         else
 333         {
 334             for( int i = 0; i < _src.rows; i++ )
 335             {
 336                 const float* src = _src.ptr<float>(i);
 337                 double* dst = _dst.ptr<double>(i);
 338                 for( int j = 0; j < cols; j++ )
 339                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 340             }
 341         }
 342     }
 343
 344     void scale_output( const Mat& _src, Mat& _dst ) const
 345     {
 346         int cols = _src.cols;
 347         const double* w = weights[layer_count()].ptr<double>();
 348
 349         if( _dst.type() == CV_32F )
 350         {
 351             for( int i = 0; i < _src.rows; i++ )
 352             {
 353                 const double* src = _src.ptr<double>(i);
 354                 float* dst = _dst.ptr<float>(i);
 355                 for( int j = 0; j < cols; j++ )
 356                     dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
 357             }
 358         }
 359         else
 360         {
 361             for( int i = 0; i < _src.rows; i++ )
 362             {
 363                 const double* src = _src.ptr<double>(i);
 364                 double* dst = _dst.ptr<double>(i);
 365                 for( int j = 0; j < cols; j++ )
 366                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 367             }
 368         }
 369     }
 370
 371     void calc_activ_func( Mat& sums, const Mat& w ) const
 372     {
 373         const double* bias = w.ptr<double>(w.rows-1);
 374         int i, j, n = sums.rows, cols = sums.cols;
 375         double scale = 0, scale2 = f_param2;
 376
 377         switch( activ_func )
 378         {
 379             case IDENTITY:
 380                 scale = 1.;
 381                 break;
 382             case SIGMOID_SYM:
 383                 scale = -f_param1;
 384                 break;
 385             case GAUSSIAN:
 386                 scale = -f_param1*f_param1;
 387                 break;
 388             default:
 389                 ;
 390         }
 391
 392         CV_Assert( sums.isContinuous() );
 393
 394         if( activ_func != GAUSSIAN )
 395         {
 396             for( i = 0; i < n; i++ )
 397             {
 398                 double* data = sums.ptr<double>(i);
 399                 for( j = 0; j < cols; j++ )
 400                     data[j] = (data[j] + bias[j])*scale;
 401             }
 402
 403             if( activ_func == IDENTITY )
 404                 return;
 405         }
 406         else
 407         {
 408             for( i = 0; i < n; i++ )
 409             {
 410                 double* data = sums.ptr<double>(i);
 411                 for( j = 0; j < cols; j++ )
 412                 {
 413                     double t = data[j] + bias[j];
 414                     data[j] = t*t*scale;
 415                 }
 416             }
 417         }
 418
 419         exp( sums, sums );
 420
 421         if( sums.isContinuous() )
 422         {
 423             cols *= n;
 424             n = 1;
 425         }
 426
 427         switch( activ_func )
 428         {
 429             case SIGMOID_SYM:
 430                 for( i = 0; i < n; i++ )
 431                 {
 432                     double* data = sums.ptr<double>(i);
 433                     for( j = 0; j < cols; j++ )
 434                     {
 435                         if(!cvIsInf(data[j]))
 436                         {
 437                             double t = scale2*(1. - data[j])/(1. + data[j]);
 438                             data[j] = t;
 439                         }
 440                         else
 441                         {
 442                             data[j] = -scale2;
 443                         }
 444                     }
 445                 }
 446                 break;
 447
 448             case GAUSSIAN:
 449                 for( i = 0; i < n; i++ )
 450                 {
 451                     double* data = sums.ptr<double>(i);
 452                     for( j = 0; j < cols; j++ )
 453                         data[j] = scale2*data[j];
 454                 }
 455                 break;
 456
 457             default:
 458                 ;
 459         }
 460     }
 461
 462     void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
 463     {
 464         const double* bias = w.ptr<double>(w.rows-1);
 465         int i, j, n = _xf.rows, cols = _xf.cols;
 466
 467         if( activ_func == IDENTITY )
 468         {
 469             for( i = 0; i < n; i++ )
 470             {
 471                 double* xf = _xf.ptr<double>(i);
 472                 double* df = _df.ptr<double>(i);
 473
 474                 for( j = 0; j < cols; j++ )
 475                 {
 476                     xf[j] += bias[j];
 477                     df[j] = 1;
 478                 }
 479             }
 480         }
 481         else if( activ_func == GAUSSIAN )
 482         {
 483             double scale = -f_param1*f_param1;
 484             double scale2 = scale*f_param2;
 485             for( i = 0; i < n; i++ )
 486             {
 487                 double* xf = _xf.ptr<double>(i);
 488                 double* df = _df.ptr<double>(i);
 489
 490                 for( j = 0; j < cols; j++ )
 491                 {
 492                     double t = xf[j] + bias[j];
 493                     df[j] = t*2*scale2;
 494                     xf[j] = t*t*scale;
 495                 }
 496             }
 497             exp( _xf, _xf );
 498
 499             for( i = 0; i < n; i++ )
 500             {
 501                 double* xf = _xf.ptr<double>(i);
 502                 double* df = _df.ptr<double>(i);
 503
 504                 for( j = 0; j < cols; j++ )
 505                     df[j] *= xf[j];
 506             }
 507         }
 508         else
 509         {
 510             double scale = f_param1;
 511             double scale2 = f_param2;
 512
 513             for( i = 0; i < n; i++ )
 514             {
 515                 double* xf = _xf.ptr<double>(i);
 516                 double* df = _df.ptr<double>(i);
 517
 518                 for( j = 0; j < cols; j++ )
 519                 {
 520                     xf[j] = (xf[j] + bias[j])*scale;
 521                     df[j] = -fabs(xf[j]);
 522                 }
 523             }
 524
 525             exp( _df, _df );
 526
 527             // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
 528             // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
 529             // 2*a*exp(-ax)/(1+exp(-ax))^2
 530             scale *= 2*f_param2;
 531             for( i = 0; i < n; i++ )
 532             {
 533                 double* xf = _xf.ptr<double>(i);
 534                 double* df = _df.ptr<double>(i);
 535
 536                 for( j = 0; j < cols; j++ )
 537                 {
 538                     int s0 = xf[j] > 0 ? 1 : -1;
 539                     double t0 = 1./(1. + df[j]);
 540                     double t1 = scale*df[j]*t0*t0;
 541                     t0 *= scale2*(1. - df[j])*s0;
 542                     df[j] = t1;
 543                     xf[j] = t0;
 544                 }
 545             }
 546         }
 547     }
 548
 549     void calc_input_scale( const Mat& inputs, int flags )
 550     {
 551         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 552         bool no_scale = (flags & NO_INPUT_SCALE) != 0;
 553         double* scale = weights[0].ptr<double>();
 554         int count = inputs.rows;
 555
 556         if( reset_weights )
 557         {
 558             int i, j, vcount = layer_sizes[0];
 559             int type = inputs.type();
 560             double a = no_scale ? 1. : 0.;
 561
 562             for( j = 0; j < vcount; j++ )
 563                 scale[2*j] = a, scale[j*2+1] = 0.;
 564
 565             if( no_scale )
 566                 return;
 567
 568             for( i = 0; i < count; i++ )
 569             {
 570                 const uchar* p = inputs.ptr(i);
 571                 const float* f = (const float*)p;
 572                 const double* d = (const double*)p;
 573                 for( j = 0; j < vcount; j++ )
 574                 {
 575                     double t = type == CV_32F ? (double)f[j] : d[j];
 576                     scale[j*2] += t;
 577                     scale[j*2+1] += t*t;
 578                 }
 579             }
 580
 581             for( j = 0; j < vcount; j++ )
 582             {
 583                 double s = scale[j*2], s2 = scale[j*2+1];
 584                 double m = s/count, sigma2 = s2/count - m*m;
 585                 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
 586                 scale[j*2+1] = -m*scale[j*2];
 587             }
 588         }
 589     }
 590
 591     void calc_output_scale( const Mat& outputs, int flags )
 592     {
 593         int i, j, vcount = layer_sizes.back();
 594         int type = outputs.type();
 595         double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
 596         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 597         bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
 598         int l_count = layer_count();
 599         double* scale = weights[l_count].ptr<double>();
 600         double* inv_scale = weights[l_count+1].ptr<double>();
 601         int count = outputs.rows;
 602
 603         if( reset_weights )
 604         {
 605             double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
 606
 607             for( j = 0; j < vcount; j++ )
 608             {
 609                 scale[2*j] = inv_scale[2*j] = a0;
 610                 scale[j*2+1] = inv_scale[2*j+1] = b0;
 611             }
 612
 613             if( no_scale )
 614                 return;
 615         }
 616
 617         for( i = 0; i < count; i++ )
 618         {
 619             const uchar* p = outputs.ptr(i);
 620             const float* f = (const float*)p;
 621             const double* d = (const double*)p;
 622
 623             for( j = 0; j < vcount; j++ )
 624             {
 625                 double t = type == CV_32F ? (double)f[j] : d[j];
 626
 627                 if( reset_weights )
 628                 {
 629                     double mj = scale[j*2], Mj = scale[j*2+1];
 630                     if( mj > t ) mj = t;
 631                     if( Mj < t ) Mj = t;
 632
 633                     scale[j*2] = mj;
 634                     scale[j*2+1] = Mj;
 635                 }
 636                 else if( !no_scale )
 637                 {
 638                     t = t*inv_scale[j*2] + inv_scale[2*j+1];
 639                     if( t < m1 || t > M1 )
 640                         CV_Error( CV_StsOutOfRange,
 641                                  "Some of new output training vector components run exceed the original range too much" );
 642                 }
 643             }
 644         }
 645
 646         if( reset_weights )
 647             for( j = 0; j < vcount; j++ )
 648             {
 649                 // map mj..Mj to m..M
 650                 double mj = scale[j*2], Mj = scale[j*2+1];
 651                 double a, b;
 652                 double delta = Mj - mj;
 653                 if( delta < DBL_EPSILON )
 654                     a = 1, b = (M + m - Mj - mj)*0.5;
 655                 else
 656                     a = (M - m)/delta, b = m - mj*a;
 657                 inv_scale[j*2] = a; inv_scale[j*2+1] = b;
 658                 a = 1./a; b = -b*a;
 659                 scale[j*2] = a; scale[j*2+1] = b;
 660             }
 661     }
 662
 663     void prepare_to_train( const Mat& inputs, const Mat& outputs,
 664                            Mat& sample_weights, int flags )
 665     {
 666         if( layer_sizes.empty() )
 667             CV_Error( CV_StsError,
 668                      "The network has not been created. Use method create or the appropriate constructor" );
 669
 670         if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
 671             inputs.cols != layer_sizes[0] )
 672             CV_Error( CV_StsBadArg,
 673                      "input training data should be a floating-point matrix with "
 674                      "the number of rows equal to the number of training samples and "
 675                      "the number of columns equal to the size of 0-th (input) layer" );
 676
 677         if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
 678             outputs.cols != layer_sizes.back() )
 679             CV_Error( CV_StsBadArg,
 680                      "output training data should be a floating-point matrix with "
 681                      "the number of rows equal to the number of training samples and "
 682                      "the number of columns equal to the size of last (output) layer" );
 683
 684         if( inputs.rows != outputs.rows )
 685             CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
 686
 687         Mat temp;
 688         double s = sum(sample_weights)[0];
 689         sample_weights.convertTo(temp, CV_64F, 1./s);
 690         sample_weights = temp;
 691
 692         calc_input_scale( inputs, flags );
 693         calc_output_scale( outputs, flags );
 694     }
 695
 696     bool train( const Ptr<TrainData>& trainData, int flags )
 697     {
 698         const int MAX_ITER = 1000;
 699         const double DEFAULT_EPSILON = FLT_EPSILON;
 700
 701         // initialize training data
 702         Mat inputs = trainData->getTrainSamples();
 703         Mat outputs = trainData->getTrainResponses();
 704         Mat sw = trainData->getTrainSampleWeights();
 705         prepare_to_train( inputs, outputs, sw, flags );
 706
 707         // ... and link weights
 708         if( !(flags & UPDATE_WEIGHTS) )
 709             init_weights();
 710
 711         TermCriteria termcrit;
 712         termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
 713         termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
 714         termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
 715
 716         int iter = params.trainMethod == ANN_MLP::BACKPROP ?
 717             train_backprop( inputs, outputs, sw, termcrit ) :
 718             train_rprop( inputs, outputs, sw, termcrit );
 719
 720         trained = iter > 0;
 721         return trained;
 722     }
 723
 724     int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
 725     {
 726         int i, j, k;
 727         double prev_E = DBL_MAX*0.5, E = 0;
 728         int itype = inputs.type(), otype = outputs.type();
 729
 730         int count = inputs.rows;
 731
 732         int iter = -1, max_iter = termCrit.maxCount*count;
 733         double epsilon = termCrit.epsilon*count;
 734
 735         int l_count = layer_count();
 736         int ivcount = layer_sizes[0];
 737         int ovcount = layer_sizes.back();
 738
 739         // allocate buffers
 740         vector<vector<double> > x(l_count);
 741         vector<vector<double> > df(l_count);
 742         vector<Mat> dw(l_count);
 743
 744         for( i = 0; i < l_count; i++ )
 745         {
 746             int n = layer_sizes[i];
 747             x[i].resize(n+1);
 748             df[i].resize(n);
 749             dw[i] = Mat::zeros(weights[i].size(), CV_64F);
 750         }
 751
 752         Mat _idx_m(1, count, CV_32S);
 753         int* _idx = _idx_m.ptr<int>();
 754         for( i = 0; i < count; i++ )
 755             _idx[i] = i;
 756
 757         AutoBuffer<double> _buf(max_lsize*2);
 758         double* buf[] = { _buf, (double*)_buf + max_lsize };
 759
 760         const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
 761
 762         // run back-propagation loop
 763         /*
 764          y_i = w_i*x_{i-1}
 765          x_i = f(y_i)
 766          E = 1/2*||u - x_N||^2
 767          grad_N = (x_N - u)*f'(y_i)
 768          dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
 769          w_i(t+1) = w_i(t) + dw_i(t)
 770          grad_{i-1} = w_i^t*grad_i
 771         */
 772         for( iter = 0; iter < max_iter; iter++ )
 773         {
 774             int idx = iter % count;
 775             double sweight = sw ? count*sw[idx] : 1.;
 776
 777             if( idx == 0 )
 778             {
 779                 //printf("%d. E = %g\n", iter/count, E);
 780                 if( fabs(prev_E - E) < epsilon )
 781                     break;
 782                 prev_E = E;
 783                 E = 0;
 784
 785                 // shuffle indices
 786                 for( i = 0; i < count; i++ )
 787                 {
 788                     j = rng.uniform(0, count);
 789                     k = rng.uniform(0, count);
 790                     std::swap(_idx[j], _idx[k]);
 791                 }
 792             }
 793
 794             idx = _idx[idx];
 795
 796             const uchar* x0data_p = inputs.ptr(idx);
 797             const float* x0data_f = (const float*)x0data_p;
 798             const double* x0data_d = (const double*)x0data_p;
 799
 800             double* w = weights[0].ptr<double>();
 801             for( j = 0; j < ivcount; j++ )
 802                 x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
 803
 804             Mat x1( 1, ivcount, CV_64F, &x[0][0] );
 805
 806             // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
 807             for( i = 1; i < l_count; i++ )
 808             {
 809                 int n = layer_sizes[i];
 810                 Mat x2(1, n, CV_64F, &x[i][0] );
 811                 Mat _w = weights[i].rowRange(0, x1.cols);
 812                 gemm(x1, _w, 1, noArray(), 0, x2);
 813                 Mat _df(1, n, CV_64F, &df[i][0] );
 814                 calc_activ_func_deriv( x2, _df, weights[i] );
 815                 x1 = x2;
 816             }
 817
 818             Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
 819             w = weights[l_count+1].ptr<double>();
 820
 821             // calculate error
 822             const uchar* udata_p = outputs.ptr(idx);
 823             const float* udata_f = (const float*)udata_p;
 824             const double* udata_d = (const double*)udata_p;
 825
 826             double* gdata = grad1.ptr<double>();
 827             for( k = 0; k < ovcount; k++ )
 828             {
 829                 double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
 830                 gdata[k] = t*sweight;
 831                 E += t*t;
 832             }
 833             E *= sweight;
 834
 835             // backward pass, update weights
 836             for( i = l_count-1; i > 0; i-- )
 837             {
 838                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
 839                 Mat _df(1, n2, CV_64F, &df[i][0]);
 840                 multiply( grad1, _df, grad1 );
 841                 Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
 842                 x[i-1][n1] = 1.;
 843                 gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
 844                 add( weights[i], dw[i], weights[i] );
 845                 if( i > 1 )
 846                 {
 847                     Mat grad2(1, n1, CV_64F, buf[i&1]);
 848                     Mat _w = weights[i].rowRange(0, n1);
 849                     gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
 850                     grad1 = grad2;
 851                 }
 852             }
 853         }
 854
 855         iter /= count;
 856         return iter;
 857     }
 858
 859     struct RPropLoop : public ParallelLoopBody
 860     {
 861         RPropLoop(ANN_MLPImpl* _ann,
 862                   const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
 863                   int _dcount0, vector<Mat>& _dEdw, double* _E)
 864         {
 865             ann = _ann;
 866             inputs = _inputs;
 867             outputs = _outputs;
 868             sw = _sw.ptr<double>();
 869             dcount0 = _dcount0;
 870             dEdw = &_dEdw;
 871             pE = _E;
 872         }
 873
 874         ANN_MLPImpl* ann;
 875         vector<Mat>* dEdw;
 876         Mat inputs, outputs;
 877         const double* sw;
 878         int dcount0;
 879         double* pE;
 880
 881         void operator()( const Range& range ) const
 882         {
 883             double inv_count = 1./inputs.rows;
 884             int ivcount = ann->layer_sizes.front();
 885             int ovcount = ann->layer_sizes.back();
 886             int itype = inputs.type(), otype = outputs.type();
 887             int count = inputs.rows;
 888             int i, j, k, l_count = ann->layer_count();
 889             vector<vector<double> > x(l_count);
 890             vector<vector<double> > df(l_count);
 891             vector<double> _buf(ann->max_lsize*dcount0*2);
 892             double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
 893             double E = 0;
 894
 895             for( i = 0; i < l_count; i++ )
 896             {
 897                 x[i].resize(ann->layer_sizes[i]*dcount0);
 898                 df[i].resize(ann->layer_sizes[i]*dcount0);
 899             }
 900
 901             for( int si = range.start; si < range.end; si++ )
 902             {
 903                 int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
 904                 int dcount = i1 - i0;
 905                 const double* w = ann->weights[0].ptr<double>();
 906
 907                 // grab and preprocess input data
 908                 for( i = 0; i < dcount; i++ )
 909                 {
 910                     const uchar* x0data_p = inputs.ptr(i0 + i);
 911                     const float* x0data_f = (const float*)x0data_p;
 912                     const double* x0data_d = (const double*)x0data_p;
 913
 914                     double* xdata = &x[0][i*ivcount];
 915                     for( j = 0; j < ivcount; j++ )
 916                         xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
 917                 }
 918                 Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
 919
 920                 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
 921                 for( i = 1; i < l_count; i++ )
 922                 {
 923                     Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
 924                     Mat _w = ann->weights[i].rowRange(0, x1.cols);
 925                     gemm( x1, _w, 1, noArray(), 0, x2 );
 926                     Mat _df( x2.size(), CV_64F, &df[i][0] );
 927                     ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
 928                     x1 = x2;
 929                 }
 930
 931                 Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
 932
 933                 w = ann->weights[l_count+1].ptr<double>();
 934
 935                 // calculate error
 936                 for( i = 0; i < dcount; i++ )
 937                 {
 938                     const uchar* udata_p = outputs.ptr(i0+i);
 939                     const float* udata_f = (const float*)udata_p;
 940                     const double* udata_d = (const double*)udata_p;
 941
 942                     const double* xdata = &x[l_count-1][i*ovcount];
 943                     double* gdata = grad1.ptr<double>(i);
 944                     double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
 945
 946                     for( j = 0; j < ovcount; j++ )
 947                     {
 948                         double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
 949                         gdata[j] = t*sweight;
 950                         E1 += t*t;
 951                     }
 952                     E += sweight*E1;
 953                 }
 954
 955                 for( i = l_count-1; i > 0; i-- )
 956                 {
 957                     int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
 958                     Mat _df(dcount, n2, CV_64F, &df[i][0]);
 959                     multiply(grad1, _df, grad1);
 960
 961                     {
 962                         AutoLock lock(ann->mtx);
 963                         Mat _dEdw = dEdw->at(i).rowRange(0, n1);
 964                         x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
 965                         gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
 966
 967                         // update bias part of dEdw
 968                         double* dst = dEdw->at(i).ptr<double>(n1);
 969                         for( k = 0; k < dcount; k++ )
 970                         {
 971                             const double* src = grad1.ptr<double>(k);
 972                             for( j = 0; j < n2; j++ )
 973                                 dst[j] += src[j];
 974                         }
 975                     }
 976
 977                     Mat grad2( dcount, n1, CV_64F, buf[i&1] );
 978                     if( i > 1 )
 979                     {
 980                         Mat _w = ann->weights[i].rowRange(0, n1);
 981                         gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
 982                     }
 983                     grad1 = grad2;
 984                 }
 985             }
 986             {
 987                 AutoLock lock(ann->mtx);
 988                 *pE += E;
 989             }
 990         }
 991     };
 992
 993     int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
 994     {
 995         const int max_buf_size = 1 << 16;
 996         int i, iter = -1, count = inputs.rows;
 997
 998         double prev_E = DBL_MAX*0.5;
 999
1000         int max_iter = termCrit.maxCount;
1001         double epsilon = termCrit.epsilon;
1002         double dw_plus = params.rpDWPlus;
1003         double dw_minus = params.rpDWMinus;
1004         double dw_min = params.rpDWMin;
1005         double dw_max = params.rpDWMax;
1006
1007         int l_count = layer_count();
1008
1009         // allocate buffers
1010         vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1011
1012         int total = 0;
1013         for( i = 0; i < l_count; i++ )
1014         {
1015             total += layer_sizes[i];
1016             dw[i].create(weights[i].size(), CV_64F);
1017             dw[i].setTo(Scalar::all(params.rpDW0));
1018             prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1019             dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1020         }
1021
1022         int dcount0 = max_buf_size/(2*total);
1023         dcount0 = std::max( dcount0, 1 );
1024         dcount0 = std::min( dcount0, count );
1025         int chunk_count = (count + dcount0 - 1)/dcount0;
1026
1027         // run rprop loop
1028         /*
1029          y_i(t) = w_i(t)*x_{i-1}(t)
1030          x_i(t) = f(y_i(t))
1031          E = sum_over_all_samples(1/2*||u - x_N||^2)
1032          grad_N = (x_N - u)*f'(y_i)
1033
1034          std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1035          dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1036          dw_i{jk}(t-1) else
1037
1038          if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1039          dE/dw_i{jk}(t)<-0
1040          else
1041          w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1042          grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1043          */
1044         for( iter = 0; iter < max_iter; iter++ )
1045         {
1046             double E = 0;
1047
1048             for( i = 0; i < l_count; i++ )
1049                 dEdw[i].setTo(Scalar::all(0));
1050
1051             // first, iterate through all the samples and compute dEdw
1052             RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1053             parallel_for_(Range(0, chunk_count), invoker);
1054             //invoker(Range(0, chunk_count));
1055
1056             // now update weights
1057             for( i = 1; i < l_count; i++ )
1058             {
1059                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1060                 for( int k = 0; k <= n1; k++ )
1061                 {
1062                     CV_Assert(weights[i].size() == Size(n2, n1+1));
1063                     double* wk = weights[i].ptr<double>(k);
1064                     double* dwk = dw[i].ptr<double>(k);
1065                     double* dEdwk = dEdw[i].ptr<double>(k);
1066                     schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1067
1068                     for( int j = 0; j < n2; j++ )
1069                     {
1070                         double Eval = dEdwk[j];
1071                         double dval = dwk[j];
1072                         double wval = wk[j];
1073                         int s = CV_SIGN(Eval);
1074                         int ss = prevEk[j]*s;
1075                         if( ss > 0 )
1076                         {
1077                             dval *= dw_plus;
1078                             dval = std::min( dval, dw_max );
1079                             dwk[j] = dval;
1080                             wk[j] = wval + dval*s;
1081                         }
1082                         else if( ss < 0 )
1083                         {
1084                             dval *= dw_minus;
1085                             dval = std::max( dval, dw_min );
1086                             prevEk[j] = 0;
1087                             dwk[j] = dval;
1088                             wk[j] = wval + dval*s;
1089                         }
1090                         else
1091                         {
1092                             prevEk[j] = (schar)s;
1093                             wk[j] = wval + dval*s;
1094                         }
1095                         dEdwk[j] = 0.;
1096                     }
1097                 }
1098             }
1099
1100             //printf("%d. E = %g\n", iter, E);
1101             if( fabs(prev_E - E) < epsilon )
1102                 break;
1103             prev_E = E;
1104         }
1105
1106         return iter;
1107     }
1108
1109     void write_params( FileStorage& fs ) const
1110     {
1111         const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1112                                       activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1113                                       activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
1114
1115         if( activ_func_name )
1116             fs << "activation_function" << activ_func_name;
1117         else
1118             fs << "activation_function_id" << activ_func;
1119
1120         if( activ_func != IDENTITY )
1121         {
1122             fs << "f_param1" << f_param1;
1123             fs << "f_param2" << f_param2;
1124         }
1125
1126         fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1127
1128         fs << "training_params" << "{";
1129         if( params.trainMethod == ANN_MLP::BACKPROP )
1130         {
1131             fs << "train_method" << "BACKPROP";
1132             fs << "dw_scale" << params.bpDWScale;
1133             fs << "moment_scale" << params.bpMomentScale;
1134         }
1135         else if( params.trainMethod == ANN_MLP::RPROP )
1136         {
1137             fs << "train_method" << "RPROP";
1138             fs << "dw0" << params.rpDW0;
1139             fs << "dw_plus" << params.rpDWPlus;
1140             fs << "dw_minus" << params.rpDWMinus;
1141             fs << "dw_min" << params.rpDWMin;
1142             fs << "dw_max" << params.rpDWMax;
1143         }
1144         else
1145             CV_Error(CV_StsError, "Unknown training method");
1146
1147         fs << "term_criteria" << "{";
1148         if( params.termCrit.type & TermCriteria::EPS )
1149             fs << "epsilon" << params.termCrit.epsilon;
1150         if( params.termCrit.type & TermCriteria::COUNT )
1151             fs << "iterations" << params.termCrit.maxCount;
1152         fs << "}" << "}";
1153     }
1154
1155     void write( FileStorage& fs ) const
1156     {
1157         if( layer_sizes.empty() )
1158             return;
1159         int i, l_count = layer_count();
1160
1161         writeFormat(fs);
1162         fs << "layer_sizes" << layer_sizes;
1163
1164         write_params( fs );
1165
1166         size_t esz = weights[0].elemSize();
1167
1168         fs << "input_scale" << "[";
1169         fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1170
1171         fs << "]" << "output_scale" << "[";
1172         fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1173
1174         fs << "]" << "inv_output_scale" << "[";
1175         fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1176
1177         fs << "]" << "weights" << "[";
1178         for( i = 1; i < l_count; i++ )
1179         {
1180             fs << "[";
1181             fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1182             fs << "]";
1183         }
1184         fs << "]";
1185     }
1186
1187     void read_params( const FileNode& fn )
1188     {
1189         String activ_func_name = (String)fn["activation_function"];
1190         if( !activ_func_name.empty() )
1191         {
1192             activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1193                          activ_func_name == "IDENTITY" ? IDENTITY :
1194                          activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1195             CV_Assert( activ_func >= 0 );
1196         }
1197         else
1198             activ_func = (int)fn["activation_function_id"];
1199
1200         f_param1 = (double)fn["f_param1"];
1201         f_param2 = (double)fn["f_param2"];
1202
1203         setActivationFunction( activ_func, f_param1, f_param2 );
1204
1205         min_val = (double)fn["min_val"];
1206         max_val = (double)fn["max_val"];
1207         min_val1 = (double)fn["min_val1"];
1208         max_val1 = (double)fn["max_val1"];
1209
1210         FileNode tpn = fn["training_params"];
1211         params = AnnParams();
1212
1213         if( !tpn.empty() )
1214         {
1215             String tmethod_name = (String)tpn["train_method"];
1216
1217             if( tmethod_name == "BACKPROP" )
1218             {
1219                 params.trainMethod = ANN_MLP::BACKPROP;
1220                 params.bpDWScale = (double)tpn["dw_scale"];
1221                 params.bpMomentScale = (double)tpn["moment_scale"];
1222             }
1223             else if( tmethod_name == "RPROP" )
1224             {
1225                 params.trainMethod = ANN_MLP::RPROP;
1226                 params.rpDW0 = (double)tpn["dw0"];
1227                 params.rpDWPlus = (double)tpn["dw_plus"];
1228                 params.rpDWMinus = (double)tpn["dw_minus"];
1229                 params.rpDWMin = (double)tpn["dw_min"];
1230                 params.rpDWMax = (double)tpn["dw_max"];
1231             }
1232             else
1233                 CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1234
1235             FileNode tcn = tpn["term_criteria"];
1236             if( !tcn.empty() )
1237             {
1238                 FileNode tcn_e = tcn["epsilon"];
1239                 FileNode tcn_i = tcn["iterations"];
1240                 params.termCrit.type = 0;
1241                 if( !tcn_e.empty() )
1242                 {
1243                     params.termCrit.type |= TermCriteria::EPS;
1244                     params.termCrit.epsilon = (double)tcn_e;
1245                 }
1246                 if( !tcn_i.empty() )
1247                 {
1248                     params.termCrit.type |= TermCriteria::COUNT;
1249                     params.termCrit.maxCount = (int)tcn_i;
1250                 }
1251             }
1252         }
1253     }
1254
1255     void read( const FileNode& fn )
1256     {
1257         clear();
1258
1259         vector<int> _layer_sizes;
1260         readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1261         setLayerSizes( _layer_sizes );
1262
1263         int i, l_count = layer_count();
1264         read_params(fn);
1265
1266         size_t esz = weights[0].elemSize();
1267
1268         FileNode w = fn["input_scale"];
1269         w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1270
1271         w = fn["output_scale"];
1272         w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1273
1274         w = fn["inv_output_scale"];
1275         w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1276
1277         FileNodeIterator w_it = fn["weights"].begin();
1278
1279         for( i = 1; i < l_count; i++, ++w_it )
1280             (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1281         trained = true;
1282     }
1283
1284     Mat getWeights(int layerIdx) const
1285     {
1286         CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1287         return weights[layerIdx];
1288     }
1289
1290     bool isTrained() const
1291     {
1292         return trained;
1293     }
1294
1295     bool isClassifier() const
1296     {
1297         return false;
1298     }
1299
1300     int getVarCount() const
1301     {
1302         return layer_sizes.empty() ? 0 : layer_sizes[0];
1303     }
1304
1305     String getDefaultName() const
1306     {
1307         return "opencv_ml_ann_mlp";
1308     }
1309
1310     vector<int> layer_sizes;
1311     vector<Mat> weights;
1312     double f_param1, f_param2;
1313     double min_val, max_val, min_val1, max_val1;
1314     int activ_func;
1315     int max_lsize, max_buf_sz;
1316     AnnParams params;
1317     RNG rng;
1318     Mutex mtx;
1319     bool trained;
1320 };
1321
1322
1323 Ptr<ANN_MLP> ANN_MLP::create()
1324 {
1325     return makePtr<ANN_MLPImpl>();
1326 }
1327
1328 Ptr<ANN_MLP> ANN_MLP::load(const String& filepath)
1329 {
1330     FileStorage fs;
1331     fs.open(filepath, FileStorage::READ);
1332
1333     Ptr<ANN_MLP> ann = makePtr<ANN_MLPImpl>();
1334
1335     ((ANN_MLPImpl*)ann.get())->read(fs.getFirstTopLevelNode());
1336     return ann;
1337 }
1338
1339
1340     }}
1341
1342 /* End of file. */