modules/ml/src/ann_mlp.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //
  12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  13 // Third party copyrights are property of their respective owners.
  14 //
  15 // Redistribution and use in source and binary forms, with or without modification,
  16 // are permitted provided that the following conditions are met:
  17 //
  18 //   * Redistribution's of source code must retain the above copyright notice,
  19 //     this list of conditions and the following disclaimer.
  20 //
  21 //   * Redistribution's in binary form must reproduce the above copyright notice,
  22 //     this list of conditions and the following disclaimer in the documentation
  23 //     and/or other materials provided with the distribution.
  24 //
  25 //   * The name of Intel Corporation may not be used to endorse or promote products
  26 //     derived from this software without specific prior written permission.
  27 //
  28 // This software is provided by the copyright holders and contributors "as is" and
  29 // any express or implied warranties, including, but not limited to, the implied
  30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  31 // In no event shall the Intel Corporation or contributors be liable for any direct,
  32 // indirect, incidental, special, exemplary, or consequential damages
  33 // (including, but not limited to, procurement of substitute goods or services;
  34 // loss of use, data, or profits; or business interruption) however caused
  35 // and on any theory of liability, whether in contract, strict liability,
  36 // or tort (including negligence or otherwise) arising in any way out of
  37 // the use of this software, even if advised of the possibility of such damage.
  38 //
  39 //M*/
  40
  41 #include "precomp.hpp"
  42
  43 namespace cv { namespace ml {
  44
  45
  46 struct AnnParams
  47 {
  48     AnnParams()
  49     {
  50         termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
  51         trainMethod = ANN_MLP::RPROP;
  52         bpDWScale = bpMomentScale = 0.1;
  53         rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
  54         rpDWMin = FLT_EPSILON; rpDWMax = 50.;
  55         initialT=10;finalT=0.1,coolingRatio=0.95;itePerStep=10;
  56
  57     }
  58
  59     TermCriteria termCrit;
  60     int trainMethod;
  61
  62     double bpDWScale;
  63     double bpMomentScale;
  64
  65     double rpDW0;
  66     double rpDWPlus;
  67     double rpDWMinus;
  68     double rpDWMin;
  69     double rpDWMax;
  70
  71     double initialT;
  72     double finalT;
  73     double coolingRatio;
  74     int itePerStep;
  75 };
  76
  77 template <typename T>
  78 inline T inBounds(T val, T min_val, T max_val)
  79 {
  80     return std::min(std::max(val, min_val), max_val);
  81 }
  82
  83 SimulatedAnnealingSolver::~SimulatedAnnealingSolver()
  84 {
  85     if (impl) delete impl;
  86 }
  87
  88 void SimulatedAnnealingSolver::init()
  89 {
  90     impl = new SimulatedAnnealingSolver::Impl();
  91 }
  92
  93 void SimulatedAnnealingSolver::setIterPerStep(int ite)
  94 {
  95     CV_Assert(ite>0);
  96     impl->iterPerStep = ite;
  97 }
  98
  99 int SimulatedAnnealingSolver::run()
 100 {
 101     CV_Assert(impl->initialT>impl->finalT);
 102     double Ti = impl->initialT;
 103     double previousEnergy = energy();
 104     int exchange = 0;
 105     while (Ti > impl->finalT)
 106     {
 107         for (int i = 0; i < impl->iterPerStep; i++)
 108         {
 109             changedState();
 110             double newEnergy = energy();
 111             if (newEnergy < previousEnergy)
 112             {
 113                 previousEnergy = newEnergy;
 114             }
 115             else
 116             {
 117                 double r = impl->rEnergy.uniform(double(0.0), double(1.0));
 118                 if (r < exp(-(newEnergy - previousEnergy) / Ti))
 119                 {
 120                     previousEnergy = newEnergy;
 121                     exchange++;
 122                 }
 123                 else
 124                     reverseChangedState();
 125             }
 126
 127         }
 128         Ti *= impl->coolingRatio;
 129     }
 130     impl->finalT = Ti;
 131     return exchange;
 132 }
 133
 134 void SimulatedAnnealingSolver::setInitialTemperature(double x)
 135 {
 136     CV_Assert(x>0);
 137     impl->initialT = x;
 138 };
 139
 140 void SimulatedAnnealingSolver::setFinalTemperature(double x)
 141 {
 142     CV_Assert(x>0);
 143     impl->finalT = x;
 144 };
 145
 146 double SimulatedAnnealingSolver::getFinalTemperature()
 147 {
 148     return impl->finalT;
 149 };
 150
 151 void SimulatedAnnealingSolver::setCoolingRatio(double x)
 152 {
 153     CV_Assert(x>0 && x<1);
 154     impl->coolingRatio = x;
 155 };
 156
 157 class SimulatedAnnealingANN_MLP : public ml::SimulatedAnnealingSolver
 158 {
 159 public:
 160     ml::ANN_MLP *nn;
 161     Ptr<ml::TrainData> data;
 162     int nbVariables;
 163     vector<double*> adrVariables;
 164     RNG rVar;
 165     RNG rIndex;
 166     double varTmp;
 167     int index;
 168
 169     SimulatedAnnealingANN_MLP(ml::ANN_MLP *x, Ptr<ml::TrainData> d) : nn(x), data(d)
 170     {
 171         initVarMap();
 172     };
 173     void changedState()
 174     {
 175         index = rIndex.uniform(0, nbVariables);
 176         double dv = rVar.uniform(-1.0, 1.0);
 177         varTmp = *adrVariables[index];
 178         *adrVariables[index] = dv;
 179     };
 180     void reverseChangedState()
 181     {
 182         *adrVariables[index] = varTmp;
 183     };
 184     double energy() { return nn->calcError(data, false, noArray()); }
 185 protected:
 186     void initVarMap()
 187     {
 188         Mat l = nn->getLayerSizes();
 189         nbVariables = 0;
 190         adrVariables.clear();
 191         for (int i = 1; i < l.rows-1; i++)
 192         {
 193             Mat w = nn->getWeights(i);
 194             for (int j = 0; j < w.rows; j++)
 195             {
 196                 for (int k = 0; k < w.cols; k++, nbVariables++)
 197                 {
 198                     if (j == w.rows - 1)
 199                     {
 200                         adrVariables.push_back(&w.at<double>(w.rows - 1, k));
 201                     }
 202                     else
 203                     {
 204                         adrVariables.push_back(&w.at<double>(j, k));
 205                     }
 206                 }
 207             }
 208         }
 209     }
 210
 211 };
 212
 213 double ANN_MLP::getAnnealInitialT() const
 214 {
 215     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 216     if (!this_)
 217         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 218     return this_->getAnnealInitialT();
 219 }
 220
 221 void ANN_MLP::setAnnealInitialT(double val)
 222 {
 223     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 224     if (!this_)
 225         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 226     this_->setAnnealInitialT(val);
 227 }
 228
 229 double ANN_MLP::getAnnealFinalT() const
 230 {
 231     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 232     if (!this_)
 233         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 234     return this_->getAnnealFinalT();
 235 }
 236
 237 void ANN_MLP::setAnnealFinalT(double val)
 238 {
 239     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 240     if (!this_)
 241         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 242     this_->setAnnealFinalT(val);
 243 }
 244
 245 double ANN_MLP::getAnnealCoolingRatio() const
 246 {
 247     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 248     if (!this_)
 249         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 250     return this_->getAnnealCoolingRatio();
 251 }
 252
 253 void ANN_MLP::setAnnealCoolingRatio(double val)
 254 {
 255     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 256     if (!this_)
 257         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 258     this_->setAnnealCoolingRatio(val);
 259 }
 260
 261 int ANN_MLP::getAnnealItePerStep() const
 262 {
 263     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 264     if (!this_)
 265         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 266     return this_->getAnnealItePerStep();
 267 }
 268
 269 void ANN_MLP::setAnnealItePerStep(int val)
 270 {
 271     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 272     if (!this_)
 273         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 274     this_->setAnnealItePerStep(val);
 275 }
 276
 277
 278 class ANN_MLPImpl : public ANN_MLP_ANNEAL
 279 {
 280 public:
 281     ANN_MLPImpl()
 282     {
 283         clear();
 284         setActivationFunction( SIGMOID_SYM, 0, 0);
 285         setLayerSizes(Mat());
 286         setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
 287     }
 288
 289     virtual ~ANN_MLPImpl() {}
 290
 291     CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.termCrit)
 292     CV_IMPL_PROPERTY(double, BackpropWeightScale, params.bpDWScale)
 293     CV_IMPL_PROPERTY(double, BackpropMomentumScale, params.bpMomentScale)
 294     CV_IMPL_PROPERTY(double, RpropDW0, params.rpDW0)
 295     CV_IMPL_PROPERTY(double, RpropDWPlus, params.rpDWPlus)
 296     CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
 297     CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
 298     CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
 299     CV_IMPL_PROPERTY(double, AnnealInitialT, params.initialT)
 300     CV_IMPL_PROPERTY(double, AnnealFinalT, params.finalT)
 301     CV_IMPL_PROPERTY(double, AnnealCoolingRatio, params.coolingRatio)
 302     CV_IMPL_PROPERTY(int, AnnealItePerStep, params.itePerStep)
 303
 304     void clear()
 305     {
 306         min_val = max_val = min_val1 = max_val1 = 0.;
 307         rng = RNG((uint64)-1);
 308         weights.clear();
 309         trained = false;
 310         max_buf_sz = 1 << 12;
 311     }
 312
 313     int layer_count() const { return (int)layer_sizes.size(); }
 314
 315     void setTrainMethod(int method, double param1, double param2)
 316     {
 317         if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP && method != ANN_MLP::ANNEAL)
 318             method = ANN_MLP::RPROP;
 319         params.trainMethod = method;
 320         if(method == ANN_MLP::RPROP )
 321         {
 322             if( param1 < FLT_EPSILON )
 323                 param1 = 1.;
 324             params.rpDW0 = param1;
 325             params.rpDWMin = std::max( param2, 0. );
 326         }
 327         else if (method == ANN_MLP::BACKPROP)
 328         {
 329             if (param1 <= 0)
 330                 param1 = 0.1;
 331             params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
 332             if (param2 < 0)
 333                 param2 = 0.1;
 334             params.bpMomentScale = std::min(param2, 1.);
 335         }
 336 /*        else if (method == ANN_MLP::ANNEAL)
 337         {
 338             if (param1 <= 0)
 339                 param1 = 10;
 340             if (param2 <= 0 || param2>param1)
 341                 param2 = 0.1;
 342             if (param3 <= 0 || param3 >=1)
 343                 param3 = 0.95;
 344             if (param4 <= 0)
 345                 param4 = 10;
 346             params.initialT = param1;
 347             params.finalT = param2;
 348             params.coolingRatio = param3;
 349             params.itePerStep = param4;
 350         }*/
 351     }
 352
 353     int getTrainMethod() const
 354     {
 355         return params.trainMethod;
 356     }
 357
 358     void setActivationFunction(int _activ_func, double _f_param1, double _f_param2)
 359     {
 360         if( _activ_func < 0 || _activ_func > LEAKYRELU)
 361             CV_Error( CV_StsOutOfRange, "Unknown activation function" );
 362
 363         activ_func = _activ_func;
 364
 365         switch( activ_func )
 366         {
 367         case SIGMOID_SYM:
 368             max_val = 0.95; min_val = -max_val;
 369             max_val1 = 0.98; min_val1 = -max_val1;
 370             if( fabs(_f_param1) < FLT_EPSILON )
 371                 _f_param1 = 2./3;
 372             if( fabs(_f_param2) < FLT_EPSILON )
 373                 _f_param2 = 1.7159;
 374             break;
 375         case GAUSSIAN:
 376             max_val = 1.; min_val = 0.05;
 377             max_val1 = 1.; min_val1 = 0.02;
 378             if (fabs(_f_param1) < FLT_EPSILON)
 379                 _f_param1 = 1.;
 380             if (fabs(_f_param2) < FLT_EPSILON)
 381                 _f_param2 = 1.;
 382             break;
 383         case RELU:
 384             if (fabs(_f_param1) < FLT_EPSILON)
 385                 _f_param1 = 1;
 386             min_val = max_val = min_val1 = max_val1 = 0.;
 387             _f_param2 = 0.;
 388             break;
 389         case LEAKYRELU:
 390             if (fabs(_f_param1) < FLT_EPSILON)
 391                 _f_param1 = 0.01;
 392             min_val = max_val = min_val1 = max_val1 = 0.;
 393             _f_param2 = 0.;
 394             break;
 395         default:
 396             min_val = max_val = min_val1 = max_val1 = 0.;
 397             _f_param1 = 1.;
 398             _f_param2 = 0.;
 399         }
 400
 401         f_param1 = _f_param1;
 402         f_param2 = _f_param2;
 403     }
 404
 405
 406     void init_weights()
 407     {
 408         int i, j, k, l_count = layer_count();
 409
 410         for( i = 1; i < l_count; i++ )
 411         {
 412             int n1 = layer_sizes[i-1];
 413             int n2 = layer_sizes[i];
 414             double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
 415             double* w = weights[i].ptr<double>();
 416
 417             // initialize weights using Nguyen-Widrow algorithm
 418             for( j = 0; j < n2; j++ )
 419             {
 420                 double s = 0;
 421                 for( k = 0; k <= n1; k++ )
 422                 {
 423                     val = rng.uniform(0., 1.)*2-1.;
 424                     w[k*n2 + j] = val;
 425                     s += fabs(val);
 426                 }
 427
 428                 if( i < l_count - 1 )
 429                 {
 430                     s = 1./(s - fabs(val));
 431                     for( k = 0; k <= n1; k++ )
 432                         w[k*n2 + j] *= s;
 433                     w[n1*n2 + j] *= G*(-1+j*2./n2);
 434                 }
 435             }
 436         }
 437     }
 438
 439     Mat getLayerSizes() const
 440     {
 441         return Mat_<int>(layer_sizes, true);
 442     }
 443
 444     void setLayerSizes( InputArray _layer_sizes )
 445     {
 446         clear();
 447
 448         _layer_sizes.copyTo(layer_sizes);
 449         int l_count = layer_count();
 450
 451         weights.resize(l_count + 2);
 452         max_lsize = 0;
 453
 454         if( l_count > 0 )
 455         {
 456             for( int i = 0; i < l_count; i++ )
 457             {
 458                 int n = layer_sizes[i];
 459                 if( n < 1 + (0 < i && i < l_count-1))
 460                     CV_Error( CV_StsOutOfRange,
 461                              "there should be at least one input and one output "
 462                              "and every hidden layer must have more than 1 neuron" );
 463                 max_lsize = std::max( max_lsize, n );
 464                 if( i > 0 )
 465                     weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
 466             }
 467
 468             int ninputs = layer_sizes.front();
 469             int noutputs = layer_sizes.back();
 470             weights[0].create(1, ninputs*2, CV_64F);
 471             weights[l_count].create(1, noutputs*2, CV_64F);
 472             weights[l_count+1].create(1, noutputs*2, CV_64F);
 473         }
 474     }
 475
 476     float predict( InputArray _inputs, OutputArray _outputs, int ) const
 477     {
 478         if( !trained )
 479             CV_Error( CV_StsError, "The network has not been trained or loaded" );
 480
 481         Mat inputs = _inputs.getMat();
 482         int type = inputs.type(), l_count = layer_count();
 483         int n = inputs.rows, dn0 = n;
 484
 485         CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
 486         int noutputs = layer_sizes[l_count-1];
 487         Mat outputs;
 488
 489         int min_buf_sz = 2*max_lsize;
 490         int buf_sz = n*min_buf_sz;
 491
 492         if( buf_sz > max_buf_sz )
 493         {
 494             dn0 = max_buf_sz/min_buf_sz;
 495             dn0 = std::max( dn0, 1 );
 496             buf_sz = dn0*min_buf_sz;
 497         }
 498
 499         cv::AutoBuffer<double> _buf(buf_sz+noutputs);
 500         double* buf = _buf;
 501
 502         if( !_outputs.needed() )
 503         {
 504             CV_Assert( n == 1 );
 505             outputs = Mat(n, noutputs, type, buf + buf_sz);
 506         }
 507         else
 508         {
 509             _outputs.create(n, noutputs, type);
 510             outputs = _outputs.getMat();
 511         }
 512
 513         int dn = 0;
 514         for( int i = 0; i < n; i += dn )
 515         {
 516             dn = std::min( dn0, n - i );
 517
 518             Mat layer_in = inputs.rowRange(i, i + dn);
 519             Mat layer_out( dn, layer_in.cols, CV_64F, buf);
 520
 521             scale_input( layer_in, layer_out );
 522             layer_in = layer_out;
 523
 524             for( int j = 1; j < l_count; j++ )
 525             {
 526                 double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
 527                 int cols = layer_sizes[j];
 528
 529                 layer_out = Mat(dn, cols, CV_64F, data);
 530                 Mat w = weights[j].rowRange(0, layer_in.cols);
 531                 gemm(layer_in, w, 1, noArray(), 0, layer_out);
 532                 calc_activ_func( layer_out, weights[j] );
 533
 534                 layer_in = layer_out;
 535             }
 536
 537             layer_out = outputs.rowRange(i, i + dn);
 538             scale_output( layer_in, layer_out );
 539         }
 540
 541         if( n == 1 )
 542         {
 543             int maxIdx[] = {0, 0};
 544             minMaxIdx(outputs, 0, 0, 0, maxIdx);
 545             return (float)(maxIdx[0] + maxIdx[1]);
 546         }
 547
 548         return 0.f;
 549     }
 550
 551     void scale_input( const Mat& _src, Mat& _dst ) const
 552     {
 553         int cols = _src.cols;
 554         const double* w = weights[0].ptr<double>();
 555
 556         if( _src.type() == CV_32F )
 557         {
 558             for( int i = 0; i < _src.rows; i++ )
 559             {
 560                 const float* src = _src.ptr<float>(i);
 561                 double* dst = _dst.ptr<double>(i);
 562                 for( int j = 0; j < cols; j++ )
 563                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 564             }
 565         }
 566         else
 567         {
 568             for( int i = 0; i < _src.rows; i++ )
 569             {
 570                 const double* src = _src.ptr<double>(i);
 571                 double* dst = _dst.ptr<double>(i);
 572                 for( int j = 0; j < cols; j++ )
 573                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 574             }
 575         }
 576     }
 577
 578     void scale_output( const Mat& _src, Mat& _dst ) const
 579     {
 580         int cols = _src.cols;
 581         const double* w = weights[layer_count()].ptr<double>();
 582
 583         if( _dst.type() == CV_32F )
 584         {
 585             for( int i = 0; i < _src.rows; i++ )
 586             {
 587                 const double* src = _src.ptr<double>(i);
 588                 float* dst = _dst.ptr<float>(i);
 589                 for( int j = 0; j < cols; j++ )
 590                     dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
 591             }
 592         }
 593         else
 594         {
 595             for( int i = 0; i < _src.rows; i++ )
 596             {
 597                 const double* src = _src.ptr<double>(i);
 598                 double* dst = _dst.ptr<double>(i);
 599                 for( int j = 0; j < cols; j++ )
 600                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 601             }
 602         }
 603     }
 604
 605     void calc_activ_func(Mat& sums, const Mat& w) const
 606     {
 607         const double* bias = w.ptr<double>(w.rows - 1);
 608         int i, j, n = sums.rows, cols = sums.cols;
 609         double scale = 0, scale2 = f_param2;
 610
 611         switch (activ_func)
 612         {
 613         case IDENTITY:
 614             scale = 1.;
 615             break;
 616         case SIGMOID_SYM:
 617             scale = -f_param1;
 618             break;
 619         case GAUSSIAN:
 620             scale = -f_param1*f_param1;
 621             break;
 622         case RELU:
 623             scale = 1;
 624             break;
 625         case LEAKYRELU:
 626             scale = 1;
 627             break;
 628         default:
 629             ;
 630         }
 631
 632         CV_Assert(sums.isContinuous());
 633
 634         if (activ_func != GAUSSIAN)
 635         {
 636             for (i = 0; i < n; i++)
 637             {
 638                 double* data = sums.ptr<double>(i);
 639                 for (j = 0; j < cols; j++)
 640                 {
 641                     data[j] = (data[j] + bias[j])*scale;
 642                     if (activ_func == RELU)
 643                         if (data[j] < 0)
 644                             data[j] = 0;
 645                     if (activ_func == LEAKYRELU)
 646                         if (data[j] < 0)
 647                             data[j] *= f_param1;
 648                 }
 649             }
 650
 651             if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU)
 652                 return;
 653         }
 654         else
 655         {
 656             for (i = 0; i < n; i++)
 657             {
 658                 double* data = sums.ptr<double>(i);
 659                 for (j = 0; j < cols; j++)
 660                 {
 661                     double t = data[j] + bias[j];
 662                     data[j] = t*t*scale;
 663                 }
 664             }
 665         }
 666
 667         exp(sums, sums);
 668
 669         if (sums.isContinuous())
 670         {
 671             cols *= n;
 672             n = 1;
 673         }
 674
 675         switch (activ_func)
 676         {
 677         case SIGMOID_SYM:
 678             for (i = 0; i < n; i++)
 679             {
 680                 double* data = sums.ptr<double>(i);
 681                 for (j = 0; j < cols; j++)
 682                 {
 683                     if (!cvIsInf(data[j]))
 684                     {
 685                         double t = scale2*(1. - data[j]) / (1. + data[j]);
 686                         data[j] = t;
 687                     }
 688                     else
 689                     {
 690                         data[j] = -scale2;
 691                     }
 692                 }
 693             }
 694             break;
 695
 696         case GAUSSIAN:
 697             for (i = 0; i < n; i++)
 698             {
 699                 double* data = sums.ptr<double>(i);
 700                 for (j = 0; j < cols; j++)
 701                     data[j] = scale2*data[j];
 702             }
 703             break;
 704
 705         default:
 706             ;
 707         }
 708     }
 709
 710     void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const
 711     {
 712         const double* bias = w.ptr<double>(w.rows - 1);
 713         int i, j, n = _xf.rows, cols = _xf.cols;
 714
 715         if (activ_func == IDENTITY)
 716         {
 717             for (i = 0; i < n; i++)
 718             {
 719                 double* xf = _xf.ptr<double>(i);
 720                 double* df = _df.ptr<double>(i);
 721
 722                 for (j = 0; j < cols; j++)
 723                 {
 724                     xf[j] += bias[j];
 725                     df[j] = 1;
 726                 }
 727             }
 728         }
 729         else if (activ_func == RELU)
 730         {
 731             for (i = 0; i < n; i++)
 732             {
 733                 double* xf = _xf.ptr<double>(i);
 734                 double* df = _df.ptr<double>(i);
 735
 736                 for (j = 0; j < cols; j++)
 737                 {
 738                     xf[j] += bias[j];
 739                     if (xf[j] < 0)
 740                     {
 741                         xf[j] = 0;
 742                         df[j] = 0;
 743                     }
 744                     else
 745                         df[j] = 1;
 746                 }
 747             }
 748         }
 749         else if (activ_func == LEAKYRELU)
 750         {
 751             for (i = 0; i < n; i++)
 752             {
 753                 double* xf = _xf.ptr<double>(i);
 754                 double* df = _df.ptr<double>(i);
 755
 756                 for (j = 0; j < cols; j++)
 757                 {
 758                     xf[j] += bias[j];
 759                     if (xf[j] < 0)
 760                     {
 761                         xf[j] = f_param1*xf[j];
 762                         df[j] = f_param1;
 763                     }
 764                     else
 765                         df[j] = 1;
 766                 }
 767             }
 768         }
 769         else if (activ_func == GAUSSIAN)
 770         {
 771             double scale = -f_param1*f_param1;
 772             double scale2 = scale*f_param2;
 773             for (i = 0; i < n; i++)
 774             {
 775                 double* xf = _xf.ptr<double>(i);
 776                 double* df = _df.ptr<double>(i);
 777
 778                 for (j = 0; j < cols; j++)
 779                 {
 780                     double t = xf[j] + bias[j];
 781                     df[j] = t * 2 * scale2;
 782                     xf[j] = t*t*scale;
 783                 }
 784             }
 785             exp(_xf, _xf);
 786
 787             for (i = 0; i < n; i++)
 788             {
 789                 double* xf = _xf.ptr<double>(i);
 790                 double* df = _df.ptr<double>(i);
 791
 792                 for (j = 0; j < cols; j++)
 793                     df[j] *= xf[j];
 794             }
 795         }
 796         else
 797         {
 798             double scale = f_param1;
 799             double scale2 = f_param2;
 800
 801             for (i = 0; i < n; i++)
 802             {
 803                 double* xf = _xf.ptr<double>(i);
 804                 double* df = _df.ptr<double>(i);
 805
 806                 for (j = 0; j < cols; j++)
 807                 {
 808                     xf[j] = (xf[j] + bias[j])*scale;
 809                     df[j] = -fabs(xf[j]);
 810                 }
 811             }
 812
 813             exp(_df, _df);
 814
 815             // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
 816             // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
 817             // 2*a*exp(-ax)/(1+exp(-ax))^2
 818             scale *= 2 * f_param2;
 819             for (i = 0; i < n; i++)
 820             {
 821                 double* xf = _xf.ptr<double>(i);
 822                 double* df = _df.ptr<double>(i);
 823
 824                 for (j = 0; j < cols; j++)
 825                 {
 826                     int s0 = xf[j] > 0 ? 1 : -1;
 827                     double t0 = 1. / (1. + df[j]);
 828                     double t1 = scale*df[j] * t0*t0;
 829                     t0 *= scale2*(1. - df[j])*s0;
 830                     df[j] = t1;
 831                     xf[j] = t0;
 832                 }
 833             }
 834         }
 835     }
 836
 837     void calc_input_scale( const Mat& inputs, int flags )
 838     {
 839         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 840         bool no_scale = (flags & NO_INPUT_SCALE) != 0;
 841         double* scale = weights[0].ptr<double>();
 842         int count = inputs.rows;
 843
 844         if( reset_weights )
 845         {
 846             int i, j, vcount = layer_sizes[0];
 847             int type = inputs.type();
 848             double a = no_scale ? 1. : 0.;
 849
 850             for( j = 0; j < vcount; j++ )
 851                 scale[2*j] = a, scale[j*2+1] = 0.;
 852
 853             if( no_scale )
 854                 return;
 855
 856             for( i = 0; i < count; i++ )
 857             {
 858                 const uchar* p = inputs.ptr(i);
 859                 const float* f = (const float*)p;
 860                 const double* d = (const double*)p;
 861                 for( j = 0; j < vcount; j++ )
 862                 {
 863                     double t = type == CV_32F ? (double)f[j] : d[j];
 864                     scale[j*2] += t;
 865                     scale[j*2+1] += t*t;
 866                 }
 867             }
 868
 869             for( j = 0; j < vcount; j++ )
 870             {
 871                 double s = scale[j*2], s2 = scale[j*2+1];
 872                 double m = s/count, sigma2 = s2/count - m*m;
 873                 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
 874                 scale[j*2+1] = -m*scale[j*2];
 875             }
 876         }
 877     }
 878
 879     void calc_output_scale( const Mat& outputs, int flags )
 880     {
 881         int i, j, vcount = layer_sizes.back();
 882         int type = outputs.type();
 883         double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
 884         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 885         bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
 886         int l_count = layer_count();
 887         double* scale = weights[l_count].ptr<double>();
 888         double* inv_scale = weights[l_count+1].ptr<double>();
 889         int count = outputs.rows;
 890
 891         if( reset_weights )
 892         {
 893             double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
 894
 895             for( j = 0; j < vcount; j++ )
 896             {
 897                 scale[2*j] = inv_scale[2*j] = a0;
 898                 scale[j*2+1] = inv_scale[2*j+1] = b0;
 899             }
 900
 901             if( no_scale )
 902                 return;
 903         }
 904
 905         for( i = 0; i < count; i++ )
 906         {
 907             const uchar* p = outputs.ptr(i);
 908             const float* f = (const float*)p;
 909             const double* d = (const double*)p;
 910
 911             for( j = 0; j < vcount; j++ )
 912             {
 913                 double t = type == CV_32F ? (double)f[j] : d[j];
 914
 915                 if( reset_weights )
 916                 {
 917                     double mj = scale[j*2], Mj = scale[j*2+1];
 918                     if( mj > t ) mj = t;
 919                     if( Mj < t ) Mj = t;
 920
 921                     scale[j*2] = mj;
 922                     scale[j*2+1] = Mj;
 923                 }
 924                 else if( !no_scale )
 925                 {
 926                     t = t*inv_scale[j*2] + inv_scale[2*j+1];
 927                     if( t < m1 || t > M1 )
 928                         CV_Error( CV_StsOutOfRange,
 929                                  "Some of new output training vector components run exceed the original range too much" );
 930                 }
 931             }
 932         }
 933
 934         if( reset_weights )
 935             for( j = 0; j < vcount; j++ )
 936             {
 937                 // map mj..Mj to m..M
 938                 double mj = scale[j*2], Mj = scale[j*2+1];
 939                 double a, b;
 940                 double delta = Mj - mj;
 941                 if( delta < DBL_EPSILON )
 942                     a = 1, b = (M + m - Mj - mj)*0.5;
 943                 else
 944                     a = (M - m)/delta, b = m - mj*a;
 945                 inv_scale[j*2] = a; inv_scale[j*2+1] = b;
 946                 a = 1./a; b = -b*a;
 947                 scale[j*2] = a; scale[j*2+1] = b;
 948             }
 949     }
 950
 951     void prepare_to_train( const Mat& inputs, const Mat& outputs,
 952                            Mat& sample_weights, int flags )
 953     {
 954         if( layer_sizes.empty() )
 955             CV_Error( CV_StsError,
 956                      "The network has not been created. Use method create or the appropriate constructor" );
 957
 958         if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
 959             inputs.cols != layer_sizes[0] )
 960             CV_Error( CV_StsBadArg,
 961                      "input training data should be a floating-point matrix with "
 962                      "the number of rows equal to the number of training samples and "
 963                      "the number of columns equal to the size of 0-th (input) layer" );
 964
 965         if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
 966             outputs.cols != layer_sizes.back() )
 967             CV_Error( CV_StsBadArg,
 968                      "output training data should be a floating-point matrix with "
 969                      "the number of rows equal to the number of training samples and "
 970                      "the number of columns equal to the size of last (output) layer" );
 971
 972         if( inputs.rows != outputs.rows )
 973             CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
 974
 975         Mat temp;
 976         double s = sum(sample_weights)[0];
 977         sample_weights.convertTo(temp, CV_64F, 1./s);
 978         sample_weights = temp;
 979
 980         calc_input_scale( inputs, flags );
 981         calc_output_scale( outputs, flags );
 982     }
 983
 984     bool train( const Ptr<TrainData>& trainData, int flags )
 985     {
 986         const int MAX_ITER = 1000;
 987         const double DEFAULT_EPSILON = FLT_EPSILON;
 988
 989         // initialize training data
 990         Mat inputs = trainData->getTrainSamples();
 991         Mat outputs = trainData->getTrainResponses();
 992         Mat sw = trainData->getTrainSampleWeights();
 993         prepare_to_train( inputs, outputs, sw, flags );
 994
 995         // ... and link weights
 996         if( !(flags & UPDATE_WEIGHTS) )
 997             init_weights();
 998
 999         TermCriteria termcrit;
1000         termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
1001         termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
1002         termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
1003
1004         int iter = 0;
1005         switch(params.trainMethod){
1006         case ANN_MLP::BACKPROP:
1007             iter = train_backprop(inputs, outputs, sw, termcrit);
1008             break;
1009         case ANN_MLP::RPROP:
1010             iter = train_rprop(inputs, outputs, sw, termcrit);
1011             break;
1012         case ANN_MLP::ANNEAL:
1013             iter = train_anneal(trainData);
1014             break;
1015         }
1016         trained = iter > 0;
1017         return trained;
1018     }
1019     int train_anneal(const Ptr<TrainData>& trainData)
1020     {
1021         SimulatedAnnealingANN_MLP t(this, trainData);
1022         t.setFinalTemperature(params.finalT);
1023         t.setInitialTemperature(params.initialT);
1024         t.setCoolingRatio(params.coolingRatio);
1025         t.setIterPerStep(params.itePerStep);
1026         trained = true; // Enable call to CalcError
1027         int iter =  t.run();
1028         trained =false;
1029         return iter;
1030     }
1031
1032     int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
1033     {
1034         int i, j, k;
1035         double prev_E = DBL_MAX*0.5, E = 0;
1036         int itype = inputs.type(), otype = outputs.type();
1037
1038         int count = inputs.rows;
1039
1040         int iter = -1, max_iter = termCrit.maxCount*count;
1041         double epsilon = termCrit.epsilon*count;
1042
1043         int l_count = layer_count();
1044         int ivcount = layer_sizes[0];
1045         int ovcount = layer_sizes.back();
1046
1047         // allocate buffers
1048         vector<vector<double> > x(l_count);
1049         vector<vector<double> > df(l_count);
1050         vector<Mat> dw(l_count);
1051
1052         for( i = 0; i < l_count; i++ )
1053         {
1054             int n = layer_sizes[i];
1055             x[i].resize(n+1);
1056             df[i].resize(n);
1057             dw[i] = Mat::zeros(weights[i].size(), CV_64F);
1058         }
1059
1060         Mat _idx_m(1, count, CV_32S);
1061         int* _idx = _idx_m.ptr<int>();
1062         for( i = 0; i < count; i++ )
1063             _idx[i] = i;
1064
1065         AutoBuffer<double> _buf(max_lsize*2);
1066         double* buf[] = { _buf, (double*)_buf + max_lsize };
1067
1068         const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
1069
1070         // run back-propagation loop
1071         /*
1072          y_i = w_i*x_{i-1}
1073          x_i = f(y_i)
1074          E = 1/2*||u - x_N||^2
1075          grad_N = (x_N - u)*f'(y_i)
1076          dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
1077          w_i(t+1) = w_i(t) + dw_i(t)
1078          grad_{i-1} = w_i^t*grad_i
1079         */
1080         for( iter = 0; iter < max_iter; iter++ )
1081         {
1082             int idx = iter % count;
1083             double sweight = sw ? count*sw[idx] : 1.;
1084
1085             if( idx == 0 )
1086             {
1087                 //printf("%d. E = %g\n", iter/count, E);
1088                 if( fabs(prev_E - E) < epsilon )
1089                     break;
1090                 prev_E = E;
1091                 E = 0;
1092
1093                 // shuffle indices
1094                 for( i = 0; i <count; i++ )
1095                 {
1096                     j = rng.uniform(0, count);
1097                     k = rng.uniform(0, count);
1098                     std::swap(_idx[j], _idx[k]);
1099                 }
1100             }
1101
1102             idx = _idx[idx];
1103
1104             const uchar* x0data_p = inputs.ptr(idx);
1105             const float* x0data_f = (const float*)x0data_p;
1106             const double* x0data_d = (const double*)x0data_p;
1107
1108             double* w = weights[0].ptr<double>();
1109             for( j = 0; j < ivcount; j++ )
1110                 x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
1111
1112             Mat x1( 1, ivcount, CV_64F, &x[0][0] );
1113
1114             // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1115             for( i = 1; i < l_count; i++ )
1116             {
1117                 int n = layer_sizes[i];
1118                 Mat x2(1, n, CV_64F, &x[i][0] );
1119                 Mat _w = weights[i].rowRange(0, x1.cols);
1120                 gemm(x1, _w, 1, noArray(), 0, x2);
1121                 Mat _df(1, n, CV_64F, &df[i][0] );
1122                 calc_activ_func_deriv( x2, _df, weights[i] );
1123                 x1 = x2;
1124             }
1125
1126             Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
1127             w = weights[l_count+1].ptr<double>();
1128
1129             // calculate error
1130             const uchar* udata_p = outputs.ptr(idx);
1131             const float* udata_f = (const float*)udata_p;
1132             const double* udata_d = (const double*)udata_p;
1133
1134             double* gdata = grad1.ptr<double>();
1135             for( k = 0; k < ovcount; k++ )
1136             {
1137                 double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
1138                 gdata[k] = t*sweight;
1139                 E += t*t;
1140             }
1141             E *= sweight;
1142
1143             // backward pass, update weights
1144             for( i = l_count-1; i > 0; i-- )
1145             {
1146                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1147                 Mat _df(1, n2, CV_64F, &df[i][0]);
1148                 multiply( grad1, _df, grad1 );
1149                 Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
1150                 x[i-1][n1] = 1.;
1151                 gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
1152                 add( weights[i], dw[i], weights[i] );
1153                 if( i > 1 )
1154                 {
1155                     Mat grad2(1, n1, CV_64F, buf[i&1]);
1156                     Mat _w = weights[i].rowRange(0, n1);
1157                     gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
1158                     grad1 = grad2;
1159                 }
1160             }
1161         }
1162
1163         iter /= count;
1164         return iter;
1165     }
1166
1167     struct RPropLoop : public ParallelLoopBody
1168     {
1169         RPropLoop(ANN_MLPImpl* _ann,
1170                   const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
1171                   int _dcount0, vector<Mat>& _dEdw, double* _E)
1172         {
1173             ann = _ann;
1174             inputs = _inputs;
1175             outputs = _outputs;
1176             sw = _sw.ptr<double>();
1177             dcount0 = _dcount0;
1178             dEdw = &_dEdw;
1179             pE = _E;
1180         }
1181
1182         ANN_MLPImpl* ann;
1183         vector<Mat>* dEdw;
1184         Mat inputs, outputs;
1185         const double* sw;
1186         int dcount0;
1187         double* pE;
1188
1189         void operator()( const Range& range ) const
1190         {
1191             double inv_count = 1./inputs.rows;
1192             int ivcount = ann->layer_sizes.front();
1193             int ovcount = ann->layer_sizes.back();
1194             int itype = inputs.type(), otype = outputs.type();
1195             int count = inputs.rows;
1196             int i, j, k, l_count = ann->layer_count();
1197             vector<vector<double> > x(l_count);
1198             vector<vector<double> > df(l_count);
1199             vector<double> _buf(ann->max_lsize*dcount0*2);
1200             double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
1201             double E = 0;
1202
1203             for( i = 0; i < l_count; i++ )
1204             {
1205                 x[i].resize(ann->layer_sizes[i]*dcount0);
1206                 df[i].resize(ann->layer_sizes[i]*dcount0);
1207             }
1208
1209             for( int si = range.start; si < range.end; si++ )
1210             {
1211                 int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
1212                 int dcount = i1 - i0;
1213                 const double* w = ann->weights[0].ptr<double>();
1214
1215                 // grab and preprocess input data
1216                 for( i = 0; i < dcount; i++ )
1217                 {
1218                     const uchar* x0data_p = inputs.ptr(i0 + i);
1219                     const float* x0data_f = (const float*)x0data_p;
1220                     const double* x0data_d = (const double*)x0data_p;
1221
1222                     double* xdata = &x[0][i*ivcount];
1223                     for( j = 0; j < ivcount; j++ )
1224                         xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
1225                 }
1226                 Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
1227
1228                 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1229                 for( i = 1; i < l_count; i++ )
1230                 {
1231                     Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
1232                     Mat _w = ann->weights[i].rowRange(0, x1.cols);
1233                     gemm( x1, _w, 1, noArray(), 0, x2 );
1234                     Mat _df( x2.size(), CV_64F, &df[i][0] );
1235                     ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
1236                     x1 = x2;
1237                 }
1238
1239                 Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
1240
1241                 w = ann->weights[l_count+1].ptr<double>();
1242
1243                 // calculate error
1244                 for( i = 0; i < dcount; i++ )
1245                 {
1246                     const uchar* udata_p = outputs.ptr(i0+i);
1247                     const float* udata_f = (const float*)udata_p;
1248                     const double* udata_d = (const double*)udata_p;
1249
1250                     const double* xdata = &x[l_count-1][i*ovcount];
1251                     double* gdata = grad1.ptr<double>(i);
1252                     double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1253
1254                     for( j = 0; j < ovcount; j++ )
1255                     {
1256                         double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
1257                         gdata[j] = t*sweight;
1258                         E1 += t*t;
1259                     }
1260                     E += sweight*E1;
1261                 }
1262
1263                 for( i = l_count-1; i > 0; i-- )
1264                 {
1265                     int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
1266                     Mat _df(dcount, n2, CV_64F, &df[i][0]);
1267                     multiply(grad1, _df, grad1);
1268
1269                     {
1270                         AutoLock lock(ann->mtx);
1271                         Mat _dEdw = dEdw->at(i).rowRange(0, n1);
1272                         x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
1273                         gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
1274
1275                         // update bias part of dEdw
1276                         double* dst = dEdw->at(i).ptr<double>(n1);
1277                         for( k = 0; k < dcount; k++ )
1278                         {
1279                             const double* src = grad1.ptr<double>(k);
1280                             for( j = 0; j < n2; j++ )
1281                                 dst[j] += src[j];
1282                         }
1283                     }
1284
1285                     Mat grad2( dcount, n1, CV_64F, buf[i&1] );
1286                     if( i > 1 )
1287                     {
1288                         Mat _w = ann->weights[i].rowRange(0, n1);
1289                         gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
1290                     }
1291                     grad1 = grad2;
1292                 }
1293             }
1294             {
1295                 AutoLock lock(ann->mtx);
1296                 *pE += E;
1297             }
1298         }
1299     };
1300
1301     int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
1302     {
1303         const int max_buf_size = 1 << 16;
1304         int i, iter = -1, count = inputs.rows;
1305
1306         double prev_E = DBL_MAX*0.5;
1307
1308         int max_iter = termCrit.maxCount;
1309         double epsilon = termCrit.epsilon;
1310         double dw_plus = params.rpDWPlus;
1311         double dw_minus = params.rpDWMinus;
1312         double dw_min = params.rpDWMin;
1313         double dw_max = params.rpDWMax;
1314
1315         int l_count = layer_count();
1316
1317         // allocate buffers
1318         vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1319
1320         int total = 0;
1321         for( i = 0; i < l_count; i++ )
1322         {
1323             total += layer_sizes[i];
1324             dw[i].create(weights[i].size(), CV_64F);
1325             dw[i].setTo(Scalar::all(params.rpDW0));
1326             prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1327             dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1328         }
1329
1330         int dcount0 = max_buf_size/(2*total);
1331         dcount0 = std::max( dcount0, 1 );
1332         dcount0 = std::min( dcount0, count );
1333         int chunk_count = (count + dcount0 - 1)/dcount0;
1334
1335         // run rprop loop
1336         /*
1337          y_i(t) = w_i(t)*x_{i-1}(t)
1338          x_i(t) = f(y_i(t))
1339          E = sum_over_all_samples(1/2*||u - x_N||^2)
1340          grad_N = (x_N - u)*f'(y_i)
1341
1342          std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1343          dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1344          dw_i{jk}(t-1) else
1345
1346          if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1347          dE/dw_i{jk}(t)<-0
1348          else
1349          w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1350          grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1351          */
1352         for( iter = 0; iter < max_iter; iter++ )
1353         {
1354             double E = 0;
1355
1356             for( i = 0; i < l_count; i++ )
1357                 dEdw[i].setTo(Scalar::all(0));
1358
1359             // first, iterate through all the samples and compute dEdw
1360             RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1361             parallel_for_(Range(0, chunk_count), invoker);
1362             //invoker(Range(0, chunk_count));
1363
1364             // now update weights
1365             for( i = 1; i < l_count; i++ )
1366             {
1367                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1368                 for( int k = 0; k <= n1; k++ )
1369                 {
1370                     CV_Assert(weights[i].size() == Size(n2, n1+1));
1371                     double* wk = weights[i].ptr<double>(k);
1372                     double* dwk = dw[i].ptr<double>(k);
1373                     double* dEdwk = dEdw[i].ptr<double>(k);
1374                     schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1375
1376                     for( int j = 0; j < n2; j++ )
1377                     {
1378                         double Eval = dEdwk[j];
1379                         double dval = dwk[j];
1380                         double wval = wk[j];
1381                         int s = CV_SIGN(Eval);
1382                         int ss = prevEk[j]*s;
1383                         if( ss > 0 )
1384                         {
1385                             dval *= dw_plus;
1386                             dval = std::min( dval, dw_max );
1387                             dwk[j] = dval;
1388                             wk[j] = wval + dval*s;
1389                         }
1390                         else if( ss < 0 )
1391                         {
1392                             dval *= dw_minus;
1393                             dval = std::max( dval, dw_min );
1394                             prevEk[j] = 0;
1395                             dwk[j] = dval;
1396                             wk[j] = wval + dval*s;
1397                         }
1398                         else
1399                         {
1400                             prevEk[j] = (schar)s;
1401                             wk[j] = wval + dval*s;
1402                         }
1403                         dEdwk[j] = 0.;
1404                     }
1405                 }
1406             }
1407
1408             //printf("%d. E = %g\n", iter, E);
1409             if( fabs(prev_E - E) < epsilon )
1410                 break;
1411             prev_E = E;
1412         }
1413
1414         return iter;
1415     }
1416
1417     void write_params( FileStorage& fs ) const
1418     {
1419         const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1420                                       activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1421                                       activ_func == GAUSSIAN ? "GAUSSIAN" :
1422                                       activ_func == RELU ? "RELU" :
1423                                       activ_func == LEAKYRELU ? "LEAKYRELU" : 0;
1424
1425         if( activ_func_name )
1426             fs << "activation_function" << activ_func_name;
1427         else
1428             fs << "activation_function_id" << activ_func;
1429
1430         if( activ_func != IDENTITY )
1431         {
1432             fs << "f_param1" << f_param1;
1433             fs << "f_param2" << f_param2;
1434         }
1435
1436         fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1437
1438         fs << "training_params" << "{";
1439         if( params.trainMethod == ANN_MLP::BACKPROP )
1440         {
1441             fs << "train_method" << "BACKPROP";
1442             fs << "dw_scale" << params.bpDWScale;
1443             fs << "moment_scale" << params.bpMomentScale;
1444         }
1445         else if (params.trainMethod == ANN_MLP::RPROP)
1446         {
1447             fs << "train_method" << "RPROP";
1448             fs << "dw0" << params.rpDW0;
1449             fs << "dw_plus" << params.rpDWPlus;
1450             fs << "dw_minus" << params.rpDWMinus;
1451             fs << "dw_min" << params.rpDWMin;
1452             fs << "dw_max" << params.rpDWMax;
1453         }
1454         else if (params.trainMethod == ANN_MLP::ANNEAL)
1455         {
1456             fs << "train_method" << "ANNEAL";
1457             fs << "initialT" << params.initialT;
1458             fs << "finalT" << params.finalT;
1459             fs << "coolingRatio" << params.coolingRatio;
1460             fs << "itePerStep" << params.itePerStep;
1461         }
1462         else
1463             CV_Error(CV_StsError, "Unknown training method");
1464
1465         fs << "term_criteria" << "{";
1466         if( params.termCrit.type & TermCriteria::EPS )
1467             fs << "epsilon" << params.termCrit.epsilon;
1468         if( params.termCrit.type & TermCriteria::COUNT )
1469             fs << "iterations" << params.termCrit.maxCount;
1470         fs << "}" << "}";
1471     }
1472
1473     void write( FileStorage& fs ) const
1474     {
1475         if( layer_sizes.empty() )
1476             return;
1477         int i, l_count = layer_count();
1478
1479         writeFormat(fs);
1480         fs << "layer_sizes" << layer_sizes;
1481
1482         write_params( fs );
1483
1484         size_t esz = weights[0].elemSize();
1485
1486         fs << "input_scale" << "[";
1487         fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1488
1489         fs << "]" << "output_scale" << "[";
1490         fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1491
1492         fs << "]" << "inv_output_scale" << "[";
1493         fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1494
1495         fs << "]" << "weights" << "[";
1496         for( i = 1; i < l_count; i++ )
1497         {
1498             fs << "[";
1499             fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1500             fs << "]";
1501         }
1502         fs << "]";
1503     }
1504
1505     void read_params( const FileNode& fn )
1506     {
1507         String activ_func_name = (String)fn["activation_function"];
1508         if( !activ_func_name.empty() )
1509         {
1510             activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1511                          activ_func_name == "IDENTITY" ? IDENTITY :
1512                          activ_func_name == "RELU" ? RELU :
1513                          activ_func_name == "LEAKYRELU" ? LEAKYRELU :
1514                          activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1515             CV_Assert( activ_func >= 0 );
1516         }
1517         else
1518             activ_func = (int)fn["activation_function_id"];
1519
1520         f_param1 = (double)fn["f_param1"];
1521         f_param2 = (double)fn["f_param2"];
1522
1523         setActivationFunction( activ_func, f_param1, f_param2);
1524
1525         min_val = (double)fn["min_val"];
1526         max_val = (double)fn["max_val"];
1527         min_val1 = (double)fn["min_val1"];
1528         max_val1 = (double)fn["max_val1"];
1529
1530         FileNode tpn = fn["training_params"];
1531         params = AnnParams();
1532
1533         if( !tpn.empty() )
1534         {
1535             String tmethod_name = (String)tpn["train_method"];
1536
1537             if( tmethod_name == "BACKPROP" )
1538             {
1539                 params.trainMethod = ANN_MLP::BACKPROP;
1540                 params.bpDWScale = (double)tpn["dw_scale"];
1541                 params.bpMomentScale = (double)tpn["moment_scale"];
1542             }
1543             else if (tmethod_name == "RPROP")
1544             {
1545                 params.trainMethod = ANN_MLP::RPROP;
1546                 params.rpDW0 = (double)tpn["dw0"];
1547                 params.rpDWPlus = (double)tpn["dw_plus"];
1548                 params.rpDWMinus = (double)tpn["dw_minus"];
1549                 params.rpDWMin = (double)tpn["dw_min"];
1550                 params.rpDWMax = (double)tpn["dw_max"];
1551             }
1552             else if (tmethod_name == "ANNEAL")
1553             {
1554                 params.trainMethod = ANN_MLP::ANNEAL;
1555                 params.initialT = (double)tpn["initialT"];
1556                 params.finalT = (double)tpn["finalT"];
1557                 params.coolingRatio = (double)tpn["coolingRatio"];
1558                 params.itePerStep = tpn["itePerStep"];
1559             }
1560             else
1561                 CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1562
1563             FileNode tcn = tpn["term_criteria"];
1564             if( !tcn.empty() )
1565             {
1566                 FileNode tcn_e = tcn["epsilon"];
1567                 FileNode tcn_i = tcn["iterations"];
1568                 params.termCrit.type = 0;
1569                 if( !tcn_e.empty() )
1570                 {
1571                     params.termCrit.type |= TermCriteria::EPS;
1572                     params.termCrit.epsilon = (double)tcn_e;
1573                 }
1574                 if( !tcn_i.empty() )
1575                 {
1576                     params.termCrit.type |= TermCriteria::COUNT;
1577                     params.termCrit.maxCount = (int)tcn_i;
1578                 }
1579             }
1580         }
1581     }
1582
1583     void read( const FileNode& fn )
1584     {
1585         clear();
1586
1587         vector<int> _layer_sizes;
1588         readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1589         setLayerSizes( _layer_sizes );
1590
1591         int i, l_count = layer_count();
1592         read_params(fn);
1593
1594         size_t esz = weights[0].elemSize();
1595
1596         FileNode w = fn["input_scale"];
1597         w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1598
1599         w = fn["output_scale"];
1600         w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1601
1602         w = fn["inv_output_scale"];
1603         w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1604
1605         FileNodeIterator w_it = fn["weights"].begin();
1606
1607         for( i = 1; i < l_count; i++, ++w_it )
1608             (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1609         trained = true;
1610     }
1611
1612     Mat getWeights(int layerIdx) const
1613     {
1614         CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1615         return weights[layerIdx];
1616     }
1617
1618     bool isTrained() const
1619     {
1620         return trained;
1621     }
1622
1623     bool isClassifier() const
1624     {
1625         return false;
1626     }
1627
1628     int getVarCount() const
1629     {
1630         return layer_sizes.empty() ? 0 : layer_sizes[0];
1631     }
1632
1633     String getDefaultName() const
1634     {
1635         return "opencv_ml_ann_mlp";
1636     }
1637
1638     vector<int> layer_sizes;
1639     vector<Mat> weights;
1640     double f_param1, f_param2;
1641     double min_val, max_val, min_val1, max_val1;
1642     int activ_func;
1643     int max_lsize, max_buf_sz;
1644     AnnParams params;
1645     RNG rng;
1646     Mutex mtx;
1647     bool trained;
1648 };
1649
1650
1651
1652
1653 Ptr<ANN_MLP> ANN_MLP::create()
1654 {
1655     return makePtr<ANN_MLPImpl>();
1656 }
1657
1658 Ptr<ANN_MLP> ANN_MLP::load(const String& filepath)
1659 {
1660     FileStorage fs;
1661     fs.open(filepath, FileStorage::READ);
1662     CV_Assert(fs.isOpened());
1663     Ptr<ANN_MLP> ann = makePtr<ANN_MLPImpl>();
1664     ((ANN_MLPImpl*)ann.get())->read(fs.getFirstTopLevelNode());
1665     return ann;
1666 }
1667
1668 double ANN_MLP_ANNEAL::getAnnealInitialT() const
1669 {
1670     const ANN_MLPImpl* this_ = dynamic_cast<const ANN_MLPImpl*>(this);
1671     if (!this_)
1672         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1673     return this_->getAnnealInitialT();
1674 }
1675
1676 void ANN_MLP_ANNEAL::setAnnealInitialT(double val)
1677 {
1678     ANN_MLPImpl* this_ = dynamic_cast< ANN_MLPImpl*>(this);
1679     if (!this_)
1680         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1681     this_->setAnnealInitialT(val);
1682 }
1683
1684 double ANN_MLP_ANNEAL::getAnnealFinalT() const
1685 {
1686     const ANN_MLPImpl* this_ = dynamic_cast<const ANN_MLPImpl*>(this);
1687     if (!this_)
1688         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1689     return this_->getAnnealFinalT();
1690 }
1691
1692 void ANN_MLP_ANNEAL::setAnnealFinalT(double val)
1693 {
1694     ANN_MLPImpl* this_ = dynamic_cast<ANN_MLPImpl*>(this);
1695     if (!this_)
1696         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1697     this_->setAnnealFinalT(val);
1698 }
1699
1700 double ANN_MLP_ANNEAL::getAnnealCoolingRatio() const
1701 {
1702     const ANN_MLPImpl* this_ = dynamic_cast<const ANN_MLPImpl*>(this);
1703     if (!this_)
1704         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1705     return this_->getAnnealCoolingRatio();
1706 }
1707
1708 void ANN_MLP_ANNEAL::setAnnealCoolingRatio(double val)
1709 {
1710     ANN_MLPImpl* this_ = dynamic_cast< ANN_MLPImpl*>(this);
1711     if (!this_)
1712         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1713     this_->setAnnealInitialT(val);
1714 }
1715
1716 int ANN_MLP_ANNEAL::getAnnealItePerStep() const
1717 {
1718     const ANN_MLPImpl* this_ = dynamic_cast<const ANN_MLPImpl*>(this);
1719     if (!this_)
1720         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1721     return this_->getAnnealItePerStep();
1722 }
1723
1724 void ANN_MLP_ANNEAL::setAnnealItePerStep(int val)
1725 {
1726     ANN_MLPImpl* this_ = dynamic_cast<ANN_MLPImpl*>(this);
1727     if (!this_)
1728         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
1729     this_->setAnnealInitialT(val);
1730 }
1731
1732 }}
1733
1734 /* End of file. */