modules/ml/src/ann_mlp.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //
  12 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  13 // Third party copyrights are property of their respective owners.
  14 //
  15 // Redistribution and use in source and binary forms, with or without modification,
  16 // are permitted provided that the following conditions are met:
  17 //
  18 //   * Redistribution's of source code must retain the above copyright notice,
  19 //     this list of conditions and the following disclaimer.
  20 //
  21 //   * Redistribution's in binary form must reproduce the above copyright notice,
  22 //     this list of conditions and the following disclaimer in the documentation
  23 //     and/or other materials provided with the distribution.
  24 //
  25 //   * The name of Intel Corporation may not be used to endorse or promote products
  26 //     derived from this software without specific prior written permission.
  27 //
  28 // This software is provided by the copyright holders and contributors "as is" and
  29 // any express or implied warranties, including, but not limited to, the implied
  30 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  31 // In no event shall the Intel Corporation or contributors be liable for any direct,
  32 // indirect, incidental, special, exemplary, or consequential damages
  33 // (including, but not limited to, procurement of substitute goods or services;
  34 // loss of use, data, or profits; or business interruption) however caused
  35 // and on any theory of liability, whether in contract, strict liability,
  36 // or tort (including negligence or otherwise) arising in any way out of
  37 // the use of this software, even if advised of the possibility of such damage.
  38 //
  39 //M*/
  40
  41 #include "precomp.hpp"
  42
  43 namespace cv { namespace ml {
  44
  45 struct SimulatedAnnealingSolver::Impl
  46 {
  47     int refcount;
  48
  49     const Ptr<SimulatedAnnealingSolverSystem> systemPtr;
  50     SimulatedAnnealingSolverSystem& system;
  51     RNG rEnergy;
  52     double coolingRatio;
  53     double initialT;
  54     double finalT;
  55     int iterPerStep;
  56
  57     Impl(const Ptr<SimulatedAnnealingSolverSystem>& s) :
  58         refcount(1),
  59         systemPtr(s),
  60         system(*(s.get())),
  61         rEnergy(12345)
  62     {
  63         CV_Assert(!systemPtr.empty());
  64         initialT = 2;
  65         finalT = 0.1;
  66         coolingRatio = 0.95;
  67         iterPerStep = 100;
  68     }
  69
  70     inline double energy() { return system.energy(); }
  71     inline void changeState() { system.changeState(); }
  72     inline void reverseState() { system.reverseState(); }
  73
  74     void addref() { CV_XADD(&refcount, 1); }
  75     void release() { if (CV_XADD(&refcount, -1) == 1) delete this; }
  76 protected:
  77     virtual ~Impl() { CV_Assert(refcount==0); }
  78 };
  79
  80 struct AnnParams
  81 {
  82     AnnParams()
  83     {
  84         termCrit = TermCriteria( TermCriteria::COUNT + TermCriteria::EPS, 1000, 0.01 );
  85         trainMethod = ANN_MLP::RPROP;
  86         bpDWScale = bpMomentScale = 0.1;
  87         rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
  88         rpDWMin = FLT_EPSILON; rpDWMax = 50.;
  89         initialT=10;finalT=0.1,coolingRatio=0.95;itePerStep=10;
  90         rEnergy = cv::RNG(12345);
  91     }
  92
  93     TermCriteria termCrit;
  94     int trainMethod;
  95
  96     double bpDWScale;
  97     double bpMomentScale;
  98
  99     double rpDW0;
 100     double rpDWPlus;
 101     double rpDWMinus;
 102     double rpDWMin;
 103     double rpDWMax;
 104
 105     double initialT;
 106     double finalT;
 107     double coolingRatio;
 108     int itePerStep;
 109     RNG rEnergy;
 110 };
 111
 112 template <typename T>
 113 inline T inBounds(T val, T min_val, T max_val)
 114 {
 115     return std::min(std::max(val, min_val), max_val);
 116 }
 117
 118 SimulatedAnnealingSolver::SimulatedAnnealingSolver(const Ptr<SimulatedAnnealingSolverSystem>& system)
 119 {
 120     impl = new Impl(system);
 121 }
 122
 123 SimulatedAnnealingSolver::SimulatedAnnealingSolver(const SimulatedAnnealingSolver& b)
 124 {
 125     if (b.impl) b.impl->addref();
 126     release();
 127     impl = b.impl;
 128 }
 129
 130 void SimulatedAnnealingSolver::release()
 131 {
 132     if (impl) { impl->release(); impl = NULL; }
 133 }
 134
 135 void SimulatedAnnealingSolver::setIterPerStep(int ite)
 136 {
 137     CV_Assert(impl);
 138     CV_Assert(ite>0);
 139     impl->iterPerStep = ite;
 140 }
 141
 142 int SimulatedAnnealingSolver::run()
 143 {
 144     CV_Assert(impl);
 145     CV_Assert(impl->initialT>impl->finalT);
 146     double Ti = impl->initialT;
 147     double previousEnergy = impl->energy();
 148     int exchange = 0;
 149     while (Ti > impl->finalT)
 150     {
 151         for (int i = 0; i < impl->iterPerStep; i++)
 152         {
 153             impl->changeState();
 154             double newEnergy = impl->energy();
 155             if (newEnergy < previousEnergy)
 156             {
 157                 previousEnergy = newEnergy;
 158                 //??? exchange++;
 159             }
 160             else
 161             {
 162                 double r = impl->rEnergy.uniform(0.0, 1.0);
 163                 if (r < std::exp(-(newEnergy - previousEnergy) / Ti))
 164                 {
 165                     previousEnergy = newEnergy;
 166                     exchange++;
 167                 }
 168                 else
 169                 {
 170                     impl->reverseState();
 171                 }
 172             }
 173
 174         }
 175         Ti *= impl->coolingRatio;
 176     }
 177     impl->finalT = Ti;
 178     return exchange;
 179 }
 180
 181 void SimulatedAnnealingSolver::setEnergyRNG(const RNG& rng)
 182 {
 183     CV_Assert(impl);
 184     impl->rEnergy = rng;
 185 }
 186
 187 void SimulatedAnnealingSolver::setInitialTemperature(double x)
 188 {
 189     CV_Assert(impl);
 190     CV_Assert(x>0);
 191     impl->initialT = x;
 192 }
 193
 194 void SimulatedAnnealingSolver::setFinalTemperature(double x)
 195 {
 196     CV_Assert(impl);
 197     CV_Assert(x>0);
 198     impl->finalT = x;
 199 }
 200
 201 double SimulatedAnnealingSolver::getFinalTemperature()
 202 {
 203     CV_Assert(impl);
 204     return impl->finalT;
 205 }
 206
 207 void SimulatedAnnealingSolver::setCoolingRatio(double x)
 208 {
 209     CV_Assert(impl);
 210     CV_Assert(x>0 && x<1);
 211     impl->coolingRatio = x;
 212 }
 213
 214 class SimulatedAnnealingANN_MLP : public SimulatedAnnealingSolverSystem
 215 {
 216 protected:
 217     ml::ANN_MLP& nn;
 218     Ptr<ml::TrainData> data;
 219     int nbVariables;
 220     vector<double*> adrVariables;
 221     RNG rVar;
 222     RNG rIndex;
 223     double varTmp;
 224     int index;
 225 public:
 226     SimulatedAnnealingANN_MLP(ml::ANN_MLP& x, const Ptr<ml::TrainData>& d) : nn(x), data(d)
 227     {
 228         initVarMap();
 229     }
 230     ~SimulatedAnnealingANN_MLP() {}
 231 protected:
 232     void changeState()
 233     {
 234         index = rIndex.uniform(0, nbVariables);
 235         double dv = rVar.uniform(-1.0, 1.0);
 236         varTmp = *adrVariables[index];
 237         *adrVariables[index] = dv;
 238     }
 239
 240     void reverseState()
 241     {
 242         *adrVariables[index] = varTmp;
 243     }
 244
 245     double energy() const { return nn.calcError(data, false, noArray()); }
 246
 247 protected:
 248     void initVarMap()
 249     {
 250         Mat l = nn.getLayerSizes();
 251         nbVariables = 0;
 252         adrVariables.clear();
 253         for (int i = 1; i < l.rows-1; i++)
 254         {
 255             Mat w = nn.getWeights(i);
 256             for (int j = 0; j < w.rows; j++)
 257             {
 258                 for (int k = 0; k < w.cols; k++, nbVariables++)
 259                 {
 260                     if (j == w.rows - 1)
 261                     {
 262                         adrVariables.push_back(&w.at<double>(w.rows - 1, k));
 263                     }
 264                     else
 265                     {
 266                         adrVariables.push_back(&w.at<double>(j, k));
 267                     }
 268                 }
 269             }
 270         }
 271     }
 272
 273 };
 274
 275 double ANN_MLP::getAnnealInitialT() const
 276 {
 277     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 278     if (!this_)
 279         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 280     return this_->getAnnealInitialT();
 281 }
 282
 283 void ANN_MLP::setAnnealInitialT(double val)
 284 {
 285     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 286     if (!this_)
 287         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 288     this_->setAnnealInitialT(val);
 289 }
 290
 291 double ANN_MLP::getAnnealFinalT() const
 292 {
 293     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 294     if (!this_)
 295         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 296     return this_->getAnnealFinalT();
 297 }
 298
 299 void ANN_MLP::setAnnealFinalT(double val)
 300 {
 301     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 302     if (!this_)
 303         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 304     this_->setAnnealFinalT(val);
 305 }
 306
 307 double ANN_MLP::getAnnealCoolingRatio() const
 308 {
 309     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 310     if (!this_)
 311         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 312     return this_->getAnnealCoolingRatio();
 313 }
 314
 315 void ANN_MLP::setAnnealCoolingRatio(double val)
 316 {
 317     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 318     if (!this_)
 319         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 320     this_->setAnnealCoolingRatio(val);
 321 }
 322
 323 int ANN_MLP::getAnnealItePerStep() const
 324 {
 325     const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
 326     if (!this_)
 327         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 328     return this_->getAnnealItePerStep();
 329 }
 330
 331 void ANN_MLP::setAnnealItePerStep(int val)
 332 {
 333     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 334     if (!this_)
 335         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 336     this_->setAnnealItePerStep(val);
 337 }
 338
 339 void ANN_MLP::setAnnealEnergyRNG(const RNG& rng)
 340 {
 341     ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
 342     if (!this_)
 343         CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
 344     this_->setAnnealEnergyRNG(rng);
 345 }
 346
 347 class ANN_MLPImpl : public ANN_MLP_ANNEAL
 348 {
 349 public:
 350     ANN_MLPImpl()
 351     {
 352         clear();
 353         setActivationFunction( SIGMOID_SYM, 0, 0);
 354         setLayerSizes(Mat());
 355         setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
 356     }
 357
 358     virtual ~ANN_MLPImpl() {}
 359
 360     CV_IMPL_PROPERTY(TermCriteria, TermCriteria, params.termCrit)
 361     CV_IMPL_PROPERTY(double, BackpropWeightScale, params.bpDWScale)
 362     CV_IMPL_PROPERTY(double, BackpropMomentumScale, params.bpMomentScale)
 363     CV_IMPL_PROPERTY(double, RpropDW0, params.rpDW0)
 364     CV_IMPL_PROPERTY(double, RpropDWPlus, params.rpDWPlus)
 365     CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
 366     CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
 367     CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
 368     CV_IMPL_PROPERTY(double, AnnealInitialT, params.initialT)
 369     CV_IMPL_PROPERTY(double, AnnealFinalT, params.finalT)
 370     CV_IMPL_PROPERTY(double, AnnealCoolingRatio, params.coolingRatio)
 371     CV_IMPL_PROPERTY(int, AnnealItePerStep, params.itePerStep)
 372
 373     //CV_IMPL_PROPERTY(RNG, AnnealEnergyRNG, params.rEnergy)
 374     inline void setAnnealEnergyRNG(const RNG& val) { params.rEnergy = val; }
 375
 376     void clear()
 377     {
 378         min_val = max_val = min_val1 = max_val1 = 0.;
 379         rng = RNG((uint64)-1);
 380         weights.clear();
 381         trained = false;
 382         max_buf_sz = 1 << 12;
 383     }
 384
 385     int layer_count() const { return (int)layer_sizes.size(); }
 386
 387     void setTrainMethod(int method, double param1, double param2)
 388     {
 389         if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP && method != ANN_MLP::ANNEAL)
 390             method = ANN_MLP::RPROP;
 391         params.trainMethod = method;
 392         if(method == ANN_MLP::RPROP )
 393         {
 394             if( param1 < FLT_EPSILON )
 395                 param1 = 1.;
 396             params.rpDW0 = param1;
 397             params.rpDWMin = std::max( param2, 0. );
 398         }
 399         else if (method == ANN_MLP::BACKPROP)
 400         {
 401             if (param1 <= 0)
 402                 param1 = 0.1;
 403             params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
 404             if (param2 < 0)
 405                 param2 = 0.1;
 406             params.bpMomentScale = std::min(param2, 1.);
 407         }
 408 /*        else if (method == ANN_MLP::ANNEAL)
 409         {
 410             if (param1 <= 0)
 411                 param1 = 10;
 412             if (param2 <= 0 || param2>param1)
 413                 param2 = 0.1;
 414             if (param3 <= 0 || param3 >=1)
 415                 param3 = 0.95;
 416             if (param4 <= 0)
 417                 param4 = 10;
 418             params.initialT = param1;
 419             params.finalT = param2;
 420             params.coolingRatio = param3;
 421             params.itePerStep = param4;
 422         }*/
 423     }
 424
 425     int getTrainMethod() const
 426     {
 427         return params.trainMethod;
 428     }
 429
 430     void setActivationFunction(int _activ_func, double _f_param1, double _f_param2)
 431     {
 432         if( _activ_func < 0 || _activ_func > LEAKYRELU)
 433             CV_Error( CV_StsOutOfRange, "Unknown activation function" );
 434
 435         activ_func = _activ_func;
 436
 437         switch( activ_func )
 438         {
 439         case SIGMOID_SYM:
 440             max_val = 0.95; min_val = -max_val;
 441             max_val1 = 0.98; min_val1 = -max_val1;
 442             if( fabs(_f_param1) < FLT_EPSILON )
 443                 _f_param1 = 2./3;
 444             if( fabs(_f_param2) < FLT_EPSILON )
 445                 _f_param2 = 1.7159;
 446             break;
 447         case GAUSSIAN:
 448             max_val = 1.; min_val = 0.05;
 449             max_val1 = 1.; min_val1 = 0.02;
 450             if (fabs(_f_param1) < FLT_EPSILON)
 451                 _f_param1 = 1.;
 452             if (fabs(_f_param2) < FLT_EPSILON)
 453                 _f_param2 = 1.;
 454             break;
 455         case RELU:
 456             if (fabs(_f_param1) < FLT_EPSILON)
 457                 _f_param1 = 1;
 458             min_val = max_val = min_val1 = max_val1 = 0.;
 459             _f_param2 = 0.;
 460             break;
 461         case LEAKYRELU:
 462             if (fabs(_f_param1) < FLT_EPSILON)
 463                 _f_param1 = 0.01;
 464             min_val = max_val = min_val1 = max_val1 = 0.;
 465             _f_param2 = 0.;
 466             break;
 467         default:
 468             min_val = max_val = min_val1 = max_val1 = 0.;
 469             _f_param1 = 1.;
 470             _f_param2 = 0.;
 471         }
 472
 473         f_param1 = _f_param1;
 474         f_param2 = _f_param2;
 475     }
 476
 477
 478     void init_weights()
 479     {
 480         int i, j, k, l_count = layer_count();
 481
 482         for( i = 1; i < l_count; i++ )
 483         {
 484             int n1 = layer_sizes[i-1];
 485             int n2 = layer_sizes[i];
 486             double val = 0, G = n2 > 2 ? 0.7*pow((double)n1,1./(n2-1)) : 1.;
 487             double* w = weights[i].ptr<double>();
 488
 489             // initialize weights using Nguyen-Widrow algorithm
 490             for( j = 0; j < n2; j++ )
 491             {
 492                 double s = 0;
 493                 for( k = 0; k <= n1; k++ )
 494                 {
 495                     val = rng.uniform(0., 1.)*2-1.;
 496                     w[k*n2 + j] = val;
 497                     s += fabs(val);
 498                 }
 499
 500                 if( i < l_count - 1 )
 501                 {
 502                     s = 1./(s - fabs(val));
 503                     for( k = 0; k <= n1; k++ )
 504                         w[k*n2 + j] *= s;
 505                     w[n1*n2 + j] *= G*(-1+j*2./n2);
 506                 }
 507             }
 508         }
 509     }
 510
 511     Mat getLayerSizes() const
 512     {
 513         return Mat_<int>(layer_sizes, true);
 514     }
 515
 516     void setLayerSizes( InputArray _layer_sizes )
 517     {
 518         clear();
 519
 520         _layer_sizes.copyTo(layer_sizes);
 521         int l_count = layer_count();
 522
 523         weights.resize(l_count + 2);
 524         max_lsize = 0;
 525
 526         if( l_count > 0 )
 527         {
 528             for( int i = 0; i < l_count; i++ )
 529             {
 530                 int n = layer_sizes[i];
 531                 if( n < 1 + (0 < i && i < l_count-1))
 532                     CV_Error( CV_StsOutOfRange,
 533                              "there should be at least one input and one output "
 534                              "and every hidden layer must have more than 1 neuron" );
 535                 max_lsize = std::max( max_lsize, n );
 536                 if( i > 0 )
 537                     weights[i].create(layer_sizes[i-1]+1, n, CV_64F);
 538             }
 539
 540             int ninputs = layer_sizes.front();
 541             int noutputs = layer_sizes.back();
 542             weights[0].create(1, ninputs*2, CV_64F);
 543             weights[l_count].create(1, noutputs*2, CV_64F);
 544             weights[l_count+1].create(1, noutputs*2, CV_64F);
 545         }
 546     }
 547
 548     float predict( InputArray _inputs, OutputArray _outputs, int ) const
 549     {
 550         if( !trained )
 551             CV_Error( CV_StsError, "The network has not been trained or loaded" );
 552
 553         Mat inputs = _inputs.getMat();
 554         int type = inputs.type(), l_count = layer_count();
 555         int n = inputs.rows, dn0 = n;
 556
 557         CV_Assert( (type == CV_32F || type == CV_64F) && inputs.cols == layer_sizes[0] );
 558         int noutputs = layer_sizes[l_count-1];
 559         Mat outputs;
 560
 561         int min_buf_sz = 2*max_lsize;
 562         int buf_sz = n*min_buf_sz;
 563
 564         if( buf_sz > max_buf_sz )
 565         {
 566             dn0 = max_buf_sz/min_buf_sz;
 567             dn0 = std::max( dn0, 1 );
 568             buf_sz = dn0*min_buf_sz;
 569         }
 570
 571         cv::AutoBuffer<double> _buf(buf_sz+noutputs);
 572         double* buf = _buf;
 573
 574         if( !_outputs.needed() )
 575         {
 576             CV_Assert( n == 1 );
 577             outputs = Mat(n, noutputs, type, buf + buf_sz);
 578         }
 579         else
 580         {
 581             _outputs.create(n, noutputs, type);
 582             outputs = _outputs.getMat();
 583         }
 584
 585         int dn = 0;
 586         for( int i = 0; i < n; i += dn )
 587         {
 588             dn = std::min( dn0, n - i );
 589
 590             Mat layer_in = inputs.rowRange(i, i + dn);
 591             Mat layer_out( dn, layer_in.cols, CV_64F, buf);
 592
 593             scale_input( layer_in, layer_out );
 594             layer_in = layer_out;
 595
 596             for( int j = 1; j < l_count; j++ )
 597             {
 598                 double* data = buf + ((j&1) ? max_lsize*dn0 : 0);
 599                 int cols = layer_sizes[j];
 600
 601                 layer_out = Mat(dn, cols, CV_64F, data);
 602                 Mat w = weights[j].rowRange(0, layer_in.cols);
 603                 gemm(layer_in, w, 1, noArray(), 0, layer_out);
 604                 calc_activ_func( layer_out, weights[j] );
 605
 606                 layer_in = layer_out;
 607             }
 608
 609             layer_out = outputs.rowRange(i, i + dn);
 610             scale_output( layer_in, layer_out );
 611         }
 612
 613         if( n == 1 )
 614         {
 615             int maxIdx[] = {0, 0};
 616             minMaxIdx(outputs, 0, 0, 0, maxIdx);
 617             return (float)(maxIdx[0] + maxIdx[1]);
 618         }
 619
 620         return 0.f;
 621     }
 622
 623     void scale_input( const Mat& _src, Mat& _dst ) const
 624     {
 625         int cols = _src.cols;
 626         const double* w = weights[0].ptr<double>();
 627
 628         if( _src.type() == CV_32F )
 629         {
 630             for( int i = 0; i < _src.rows; i++ )
 631             {
 632                 const float* src = _src.ptr<float>(i);
 633                 double* dst = _dst.ptr<double>(i);
 634                 for( int j = 0; j < cols; j++ )
 635                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 636             }
 637         }
 638         else
 639         {
 640             for( int i = 0; i < _src.rows; i++ )
 641             {
 642                 const double* src = _src.ptr<double>(i);
 643                 double* dst = _dst.ptr<double>(i);
 644                 for( int j = 0; j < cols; j++ )
 645                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 646             }
 647         }
 648     }
 649
 650     void scale_output( const Mat& _src, Mat& _dst ) const
 651     {
 652         int cols = _src.cols;
 653         const double* w = weights[layer_count()].ptr<double>();
 654
 655         if( _dst.type() == CV_32F )
 656         {
 657             for( int i = 0; i < _src.rows; i++ )
 658             {
 659                 const double* src = _src.ptr<double>(i);
 660                 float* dst = _dst.ptr<float>(i);
 661                 for( int j = 0; j < cols; j++ )
 662                     dst[j] = (float)(src[j]*w[j*2] + w[j*2+1]);
 663             }
 664         }
 665         else
 666         {
 667             for( int i = 0; i < _src.rows; i++ )
 668             {
 669                 const double* src = _src.ptr<double>(i);
 670                 double* dst = _dst.ptr<double>(i);
 671                 for( int j = 0; j < cols; j++ )
 672                     dst[j] = src[j]*w[j*2] + w[j*2+1];
 673             }
 674         }
 675     }
 676
 677     void calc_activ_func(Mat& sums, const Mat& w) const
 678     {
 679         const double* bias = w.ptr<double>(w.rows - 1);
 680         int i, j, n = sums.rows, cols = sums.cols;
 681         double scale = 0, scale2 = f_param2;
 682
 683         switch (activ_func)
 684         {
 685         case IDENTITY:
 686             scale = 1.;
 687             break;
 688         case SIGMOID_SYM:
 689             scale = -f_param1;
 690             break;
 691         case GAUSSIAN:
 692             scale = -f_param1*f_param1;
 693             break;
 694         case RELU:
 695             scale = 1;
 696             break;
 697         case LEAKYRELU:
 698             scale = 1;
 699             break;
 700         default:
 701             ;
 702         }
 703
 704         CV_Assert(sums.isContinuous());
 705
 706         if (activ_func != GAUSSIAN)
 707         {
 708             for (i = 0; i < n; i++)
 709             {
 710                 double* data = sums.ptr<double>(i);
 711                 for (j = 0; j < cols; j++)
 712                 {
 713                     data[j] = (data[j] + bias[j])*scale;
 714                     if (activ_func == RELU)
 715                         if (data[j] < 0)
 716                             data[j] = 0;
 717                     if (activ_func == LEAKYRELU)
 718                         if (data[j] < 0)
 719                             data[j] *= f_param1;
 720                 }
 721             }
 722
 723             if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU)
 724                 return;
 725         }
 726         else
 727         {
 728             for (i = 0; i < n; i++)
 729             {
 730                 double* data = sums.ptr<double>(i);
 731                 for (j = 0; j < cols; j++)
 732                 {
 733                     double t = data[j] + bias[j];
 734                     data[j] = t*t*scale;
 735                 }
 736             }
 737         }
 738
 739         exp(sums, sums);
 740
 741         if (sums.isContinuous())
 742         {
 743             cols *= n;
 744             n = 1;
 745         }
 746
 747         switch (activ_func)
 748         {
 749         case SIGMOID_SYM:
 750             for (i = 0; i < n; i++)
 751             {
 752                 double* data = sums.ptr<double>(i);
 753                 for (j = 0; j < cols; j++)
 754                 {
 755                     if (!cvIsInf(data[j]))
 756                     {
 757                         double t = scale2*(1. - data[j]) / (1. + data[j]);
 758                         data[j] = t;
 759                     }
 760                     else
 761                     {
 762                         data[j] = -scale2;
 763                     }
 764                 }
 765             }
 766             break;
 767
 768         case GAUSSIAN:
 769             for (i = 0; i < n; i++)
 770             {
 771                 double* data = sums.ptr<double>(i);
 772                 for (j = 0; j < cols; j++)
 773                     data[j] = scale2*data[j];
 774             }
 775             break;
 776
 777         default:
 778             ;
 779         }
 780     }
 781
 782     void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const
 783     {
 784         const double* bias = w.ptr<double>(w.rows - 1);
 785         int i, j, n = _xf.rows, cols = _xf.cols;
 786
 787         if (activ_func == IDENTITY)
 788         {
 789             for (i = 0; i < n; i++)
 790             {
 791                 double* xf = _xf.ptr<double>(i);
 792                 double* df = _df.ptr<double>(i);
 793
 794                 for (j = 0; j < cols; j++)
 795                 {
 796                     xf[j] += bias[j];
 797                     df[j] = 1;
 798                 }
 799             }
 800         }
 801         else if (activ_func == RELU)
 802         {
 803             for (i = 0; i < n; i++)
 804             {
 805                 double* xf = _xf.ptr<double>(i);
 806                 double* df = _df.ptr<double>(i);
 807
 808                 for (j = 0; j < cols; j++)
 809                 {
 810                     xf[j] += bias[j];
 811                     if (xf[j] < 0)
 812                     {
 813                         xf[j] = 0;
 814                         df[j] = 0;
 815                     }
 816                     else
 817                         df[j] = 1;
 818                 }
 819             }
 820         }
 821         else if (activ_func == LEAKYRELU)
 822         {
 823             for (i = 0; i < n; i++)
 824             {
 825                 double* xf = _xf.ptr<double>(i);
 826                 double* df = _df.ptr<double>(i);
 827
 828                 for (j = 0; j < cols; j++)
 829                 {
 830                     xf[j] += bias[j];
 831                     if (xf[j] < 0)
 832                     {
 833                         xf[j] = f_param1*xf[j];
 834                         df[j] = f_param1;
 835                     }
 836                     else
 837                         df[j] = 1;
 838                 }
 839             }
 840         }
 841         else if (activ_func == GAUSSIAN)
 842         {
 843             double scale = -f_param1*f_param1;
 844             double scale2 = scale*f_param2;
 845             for (i = 0; i < n; i++)
 846             {
 847                 double* xf = _xf.ptr<double>(i);
 848                 double* df = _df.ptr<double>(i);
 849
 850                 for (j = 0; j < cols; j++)
 851                 {
 852                     double t = xf[j] + bias[j];
 853                     df[j] = t * 2 * scale2;
 854                     xf[j] = t*t*scale;
 855                 }
 856             }
 857             exp(_xf, _xf);
 858
 859             for (i = 0; i < n; i++)
 860             {
 861                 double* xf = _xf.ptr<double>(i);
 862                 double* df = _df.ptr<double>(i);
 863
 864                 for (j = 0; j < cols; j++)
 865                     df[j] *= xf[j];
 866             }
 867         }
 868         else
 869         {
 870             double scale = f_param1;
 871             double scale2 = f_param2;
 872
 873             for (i = 0; i < n; i++)
 874             {
 875                 double* xf = _xf.ptr<double>(i);
 876                 double* df = _df.ptr<double>(i);
 877
 878                 for (j = 0; j < cols; j++)
 879                 {
 880                     xf[j] = (xf[j] + bias[j])*scale;
 881                     df[j] = -fabs(xf[j]);
 882                 }
 883             }
 884
 885             exp(_df, _df);
 886
 887             // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
 888             // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
 889             // 2*a*exp(-ax)/(1+exp(-ax))^2
 890             scale *= 2 * f_param2;
 891             for (i = 0; i < n; i++)
 892             {
 893                 double* xf = _xf.ptr<double>(i);
 894                 double* df = _df.ptr<double>(i);
 895
 896                 for (j = 0; j < cols; j++)
 897                 {
 898                     int s0 = xf[j] > 0 ? 1 : -1;
 899                     double t0 = 1. / (1. + df[j]);
 900                     double t1 = scale*df[j] * t0*t0;
 901                     t0 *= scale2*(1. - df[j])*s0;
 902                     df[j] = t1;
 903                     xf[j] = t0;
 904                 }
 905             }
 906         }
 907     }
 908
 909     void calc_input_scale( const Mat& inputs, int flags )
 910     {
 911         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 912         bool no_scale = (flags & NO_INPUT_SCALE) != 0;
 913         double* scale = weights[0].ptr<double>();
 914         int count = inputs.rows;
 915
 916         if( reset_weights )
 917         {
 918             int i, j, vcount = layer_sizes[0];
 919             int type = inputs.type();
 920             double a = no_scale ? 1. : 0.;
 921
 922             for( j = 0; j < vcount; j++ )
 923                 scale[2*j] = a, scale[j*2+1] = 0.;
 924
 925             if( no_scale )
 926                 return;
 927
 928             for( i = 0; i < count; i++ )
 929             {
 930                 const uchar* p = inputs.ptr(i);
 931                 const float* f = (const float*)p;
 932                 const double* d = (const double*)p;
 933                 for( j = 0; j < vcount; j++ )
 934                 {
 935                     double t = type == CV_32F ? (double)f[j] : d[j];
 936                     scale[j*2] += t;
 937                     scale[j*2+1] += t*t;
 938                 }
 939             }
 940
 941             for( j = 0; j < vcount; j++ )
 942             {
 943                 double s = scale[j*2], s2 = scale[j*2+1];
 944                 double m = s/count, sigma2 = s2/count - m*m;
 945                 scale[j*2] = sigma2 < DBL_EPSILON ? 1 : 1./sqrt(sigma2);
 946                 scale[j*2+1] = -m*scale[j*2];
 947             }
 948         }
 949     }
 950
 951     void calc_output_scale( const Mat& outputs, int flags )
 952     {
 953         int i, j, vcount = layer_sizes.back();
 954         int type = outputs.type();
 955         double m = min_val, M = max_val, m1 = min_val1, M1 = max_val1;
 956         bool reset_weights = (flags & UPDATE_WEIGHTS) == 0;
 957         bool no_scale = (flags & NO_OUTPUT_SCALE) != 0;
 958         int l_count = layer_count();
 959         double* scale = weights[l_count].ptr<double>();
 960         double* inv_scale = weights[l_count+1].ptr<double>();
 961         int count = outputs.rows;
 962
 963         if( reset_weights )
 964         {
 965             double a0 = no_scale ? 1 : DBL_MAX, b0 = no_scale ? 0 : -DBL_MAX;
 966
 967             for( j = 0; j < vcount; j++ )
 968             {
 969                 scale[2*j] = inv_scale[2*j] = a0;
 970                 scale[j*2+1] = inv_scale[2*j+1] = b0;
 971             }
 972
 973             if( no_scale )
 974                 return;
 975         }
 976
 977         for( i = 0; i < count; i++ )
 978         {
 979             const uchar* p = outputs.ptr(i);
 980             const float* f = (const float*)p;
 981             const double* d = (const double*)p;
 982
 983             for( j = 0; j < vcount; j++ )
 984             {
 985                 double t = type == CV_32F ? (double)f[j] : d[j];
 986
 987                 if( reset_weights )
 988                 {
 989                     double mj = scale[j*2], Mj = scale[j*2+1];
 990                     if( mj > t ) mj = t;
 991                     if( Mj < t ) Mj = t;
 992
 993                     scale[j*2] = mj;
 994                     scale[j*2+1] = Mj;
 995                 }
 996                 else if( !no_scale )
 997                 {
 998                     t = t*inv_scale[j*2] + inv_scale[2*j+1];
 999                     if( t < m1 || t > M1 )
1000                         CV_Error( CV_StsOutOfRange,
1001                                  "Some of new output training vector components run exceed the original range too much" );
1002                 }
1003             }
1004         }
1005
1006         if( reset_weights )
1007             for( j = 0; j < vcount; j++ )
1008             {
1009                 // map mj..Mj to m..M
1010                 double mj = scale[j*2], Mj = scale[j*2+1];
1011                 double a, b;
1012                 double delta = Mj - mj;
1013                 if( delta < DBL_EPSILON )
1014                     a = 1, b = (M + m - Mj - mj)*0.5;
1015                 else
1016                     a = (M - m)/delta, b = m - mj*a;
1017                 inv_scale[j*2] = a; inv_scale[j*2+1] = b;
1018                 a = 1./a; b = -b*a;
1019                 scale[j*2] = a; scale[j*2+1] = b;
1020             }
1021     }
1022
1023     void prepare_to_train( const Mat& inputs, const Mat& outputs,
1024                            Mat& sample_weights, int flags )
1025     {
1026         if( layer_sizes.empty() )
1027             CV_Error( CV_StsError,
1028                      "The network has not been created. Use method create or the appropriate constructor" );
1029
1030         if( (inputs.type() != CV_32F && inputs.type() != CV_64F) ||
1031             inputs.cols != layer_sizes[0] )
1032             CV_Error( CV_StsBadArg,
1033                      "input training data should be a floating-point matrix with "
1034                      "the number of rows equal to the number of training samples and "
1035                      "the number of columns equal to the size of 0-th (input) layer" );
1036
1037         if( (outputs.type() != CV_32F && outputs.type() != CV_64F) ||
1038             outputs.cols != layer_sizes.back() )
1039             CV_Error( CV_StsBadArg,
1040                      "output training data should be a floating-point matrix with "
1041                      "the number of rows equal to the number of training samples and "
1042                      "the number of columns equal to the size of last (output) layer" );
1043
1044         if( inputs.rows != outputs.rows )
1045             CV_Error( CV_StsUnmatchedSizes, "The numbers of input and output samples do not match" );
1046
1047         Mat temp;
1048         double s = sum(sample_weights)[0];
1049         sample_weights.convertTo(temp, CV_64F, 1./s);
1050         sample_weights = temp;
1051
1052         calc_input_scale( inputs, flags );
1053         calc_output_scale( outputs, flags );
1054     }
1055
1056     bool train( const Ptr<TrainData>& trainData, int flags )
1057     {
1058         const int MAX_ITER = 1000;
1059         const double DEFAULT_EPSILON = FLT_EPSILON;
1060
1061         // initialize training data
1062         Mat inputs = trainData->getTrainSamples();
1063         Mat outputs = trainData->getTrainResponses();
1064         Mat sw = trainData->getTrainSampleWeights();
1065         prepare_to_train( inputs, outputs, sw, flags );
1066
1067         // ... and link weights
1068         if( !(flags & UPDATE_WEIGHTS) )
1069             init_weights();
1070
1071         TermCriteria termcrit;
1072         termcrit.type = TermCriteria::COUNT + TermCriteria::EPS;
1073         termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
1074         termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
1075
1076         int iter = 0;
1077         switch(params.trainMethod){
1078         case ANN_MLP::BACKPROP:
1079             iter = train_backprop(inputs, outputs, sw, termcrit);
1080             break;
1081         case ANN_MLP::RPROP:
1082             iter = train_rprop(inputs, outputs, sw, termcrit);
1083             break;
1084         case ANN_MLP::ANNEAL:
1085             iter = train_anneal(trainData);
1086             break;
1087         }
1088         trained = iter > 0;
1089         return trained;
1090     }
1091     int train_anneal(const Ptr<TrainData>& trainData)
1092     {
1093         SimulatedAnnealingSolver t(Ptr<SimulatedAnnealingANN_MLP>(new SimulatedAnnealingANN_MLP(*this, trainData)));
1094         t.setEnergyRNG(params.rEnergy);
1095         t.setFinalTemperature(params.finalT);
1096         t.setInitialTemperature(params.initialT);
1097         t.setCoolingRatio(params.coolingRatio);
1098         t.setIterPerStep(params.itePerStep);
1099         trained = true; // Enable call to CalcError
1100         int iter =  t.run();
1101         trained =false;
1102         return iter;
1103     }
1104
1105     int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
1106     {
1107         int i, j, k;
1108         double prev_E = DBL_MAX*0.5, E = 0;
1109         int itype = inputs.type(), otype = outputs.type();
1110
1111         int count = inputs.rows;
1112
1113         int iter = -1, max_iter = termCrit.maxCount*count;
1114         double epsilon = termCrit.epsilon*count;
1115
1116         int l_count = layer_count();
1117         int ivcount = layer_sizes[0];
1118         int ovcount = layer_sizes.back();
1119
1120         // allocate buffers
1121         vector<vector<double> > x(l_count);
1122         vector<vector<double> > df(l_count);
1123         vector<Mat> dw(l_count);
1124
1125         for( i = 0; i < l_count; i++ )
1126         {
1127             int n = layer_sizes[i];
1128             x[i].resize(n+1);
1129             df[i].resize(n);
1130             dw[i] = Mat::zeros(weights[i].size(), CV_64F);
1131         }
1132
1133         Mat _idx_m(1, count, CV_32S);
1134         int* _idx = _idx_m.ptr<int>();
1135         for( i = 0; i < count; i++ )
1136             _idx[i] = i;
1137
1138         AutoBuffer<double> _buf(max_lsize*2);
1139         double* buf[] = { _buf, (double*)_buf + max_lsize };
1140
1141         const double* sw = _sw.empty() ? 0 : _sw.ptr<double>();
1142
1143         // run back-propagation loop
1144         /*
1145          y_i = w_i*x_{i-1}
1146          x_i = f(y_i)
1147          E = 1/2*||u - x_N||^2
1148          grad_N = (x_N - u)*f'(y_i)
1149          dw_i(t) = momentum*dw_i(t-1) + dw_scale*x_{i-1}*grad_i
1150          w_i(t+1) = w_i(t) + dw_i(t)
1151          grad_{i-1} = w_i^t*grad_i
1152         */
1153         for( iter = 0; iter < max_iter; iter++ )
1154         {
1155             int idx = iter % count;
1156             double sweight = sw ? count*sw[idx] : 1.;
1157
1158             if( idx == 0 )
1159             {
1160                 //printf("%d. E = %g\n", iter/count, E);
1161                 if( fabs(prev_E - E) < epsilon )
1162                     break;
1163                 prev_E = E;
1164                 E = 0;
1165
1166                 // shuffle indices
1167                 for( i = 0; i <count; i++ )
1168                 {
1169                     j = rng.uniform(0, count);
1170                     k = rng.uniform(0, count);
1171                     std::swap(_idx[j], _idx[k]);
1172                 }
1173             }
1174
1175             idx = _idx[idx];
1176
1177             const uchar* x0data_p = inputs.ptr(idx);
1178             const float* x0data_f = (const float*)x0data_p;
1179             const double* x0data_d = (const double*)x0data_p;
1180
1181             double* w = weights[0].ptr<double>();
1182             for( j = 0; j < ivcount; j++ )
1183                 x[0][j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2 + 1];
1184
1185             Mat x1( 1, ivcount, CV_64F, &x[0][0] );
1186
1187             // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1188             for( i = 1; i < l_count; i++ )
1189             {
1190                 int n = layer_sizes[i];
1191                 Mat x2(1, n, CV_64F, &x[i][0] );
1192                 Mat _w = weights[i].rowRange(0, x1.cols);
1193                 gemm(x1, _w, 1, noArray(), 0, x2);
1194                 Mat _df(1, n, CV_64F, &df[i][0] );
1195                 calc_activ_func_deriv( x2, _df, weights[i] );
1196                 x1 = x2;
1197             }
1198
1199             Mat grad1( 1, ovcount, CV_64F, buf[l_count&1] );
1200             w = weights[l_count+1].ptr<double>();
1201
1202             // calculate error
1203             const uchar* udata_p = outputs.ptr(idx);
1204             const float* udata_f = (const float*)udata_p;
1205             const double* udata_d = (const double*)udata_p;
1206
1207             double* gdata = grad1.ptr<double>();
1208             for( k = 0; k < ovcount; k++ )
1209             {
1210                 double t = (otype == CV_32F ? (double)udata_f[k] : udata_d[k])*w[k*2] + w[k*2+1] - x[l_count-1][k];
1211                 gdata[k] = t*sweight;
1212                 E += t*t;
1213             }
1214             E *= sweight;
1215
1216             // backward pass, update weights
1217             for( i = l_count-1; i > 0; i-- )
1218             {
1219                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1220                 Mat _df(1, n2, CV_64F, &df[i][0]);
1221                 multiply( grad1, _df, grad1 );
1222                 Mat _x(n1+1, 1, CV_64F, &x[i-1][0]);
1223                 x[i-1][n1] = 1.;
1224                 gemm( _x, grad1, params.bpDWScale, dw[i], params.bpMomentScale, dw[i] );
1225                 add( weights[i], dw[i], weights[i] );
1226                 if( i > 1 )
1227                 {
1228                     Mat grad2(1, n1, CV_64F, buf[i&1]);
1229                     Mat _w = weights[i].rowRange(0, n1);
1230                     gemm( grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T );
1231                     grad1 = grad2;
1232                 }
1233             }
1234         }
1235
1236         iter /= count;
1237         return iter;
1238     }
1239
1240     struct RPropLoop : public ParallelLoopBody
1241     {
1242         RPropLoop(ANN_MLPImpl* _ann,
1243                   const Mat& _inputs, const Mat& _outputs, const Mat& _sw,
1244                   int _dcount0, vector<Mat>& _dEdw, double* _E)
1245         {
1246             ann = _ann;
1247             inputs = _inputs;
1248             outputs = _outputs;
1249             sw = _sw.ptr<double>();
1250             dcount0 = _dcount0;
1251             dEdw = &_dEdw;
1252             pE = _E;
1253         }
1254
1255         ANN_MLPImpl* ann;
1256         vector<Mat>* dEdw;
1257         Mat inputs, outputs;
1258         const double* sw;
1259         int dcount0;
1260         double* pE;
1261
1262         void operator()( const Range& range ) const
1263         {
1264             double inv_count = 1./inputs.rows;
1265             int ivcount = ann->layer_sizes.front();
1266             int ovcount = ann->layer_sizes.back();
1267             int itype = inputs.type(), otype = outputs.type();
1268             int count = inputs.rows;
1269             int i, j, k, l_count = ann->layer_count();
1270             vector<vector<double> > x(l_count);
1271             vector<vector<double> > df(l_count);
1272             vector<double> _buf(ann->max_lsize*dcount0*2);
1273             double* buf[] = { &_buf[0], &_buf[ann->max_lsize*dcount0] };
1274             double E = 0;
1275
1276             for( i = 0; i < l_count; i++ )
1277             {
1278                 x[i].resize(ann->layer_sizes[i]*dcount0);
1279                 df[i].resize(ann->layer_sizes[i]*dcount0);
1280             }
1281
1282             for( int si = range.start; si < range.end; si++ )
1283             {
1284                 int i0 = si*dcount0, i1 = std::min((si + 1)*dcount0, count);
1285                 int dcount = i1 - i0;
1286                 const double* w = ann->weights[0].ptr<double>();
1287
1288                 // grab and preprocess input data
1289                 for( i = 0; i < dcount; i++ )
1290                 {
1291                     const uchar* x0data_p = inputs.ptr(i0 + i);
1292                     const float* x0data_f = (const float*)x0data_p;
1293                     const double* x0data_d = (const double*)x0data_p;
1294
1295                     double* xdata = &x[0][i*ivcount];
1296                     for( j = 0; j < ivcount; j++ )
1297                         xdata[j] = (itype == CV_32F ? (double)x0data_f[j] : x0data_d[j])*w[j*2] + w[j*2+1];
1298                 }
1299                 Mat x1(dcount, ivcount, CV_64F, &x[0][0]);
1300
1301                 // forward pass, compute y[i]=w*x[i-1], x[i]=f(y[i]), df[i]=f'(y[i])
1302                 for( i = 1; i < l_count; i++ )
1303                 {
1304                     Mat x2( dcount, ann->layer_sizes[i], CV_64F, &x[i][0] );
1305                     Mat _w = ann->weights[i].rowRange(0, x1.cols);
1306                     gemm( x1, _w, 1, noArray(), 0, x2 );
1307                     Mat _df( x2.size(), CV_64F, &df[i][0] );
1308                     ann->calc_activ_func_deriv( x2, _df, ann->weights[i] );
1309                     x1 = x2;
1310                 }
1311
1312                 Mat grad1(dcount, ovcount, CV_64F, buf[l_count & 1]);
1313
1314                 w = ann->weights[l_count+1].ptr<double>();
1315
1316                 // calculate error
1317                 for( i = 0; i < dcount; i++ )
1318                 {
1319                     const uchar* udata_p = outputs.ptr(i0+i);
1320                     const float* udata_f = (const float*)udata_p;
1321                     const double* udata_d = (const double*)udata_p;
1322
1323                     const double* xdata = &x[l_count-1][i*ovcount];
1324                     double* gdata = grad1.ptr<double>(i);
1325                     double sweight = sw ? sw[si+i] : inv_count, E1 = 0;
1326
1327                     for( j = 0; j < ovcount; j++ )
1328                     {
1329                         double t = (otype == CV_32F ? (double)udata_f[j] : udata_d[j])*w[j*2] + w[j*2+1] - xdata[j];
1330                         gdata[j] = t*sweight;
1331                         E1 += t*t;
1332                     }
1333                     E += sweight*E1;
1334                 }
1335
1336                 for( i = l_count-1; i > 0; i-- )
1337                 {
1338                     int n1 = ann->layer_sizes[i-1], n2 = ann->layer_sizes[i];
1339                     Mat _df(dcount, n2, CV_64F, &df[i][0]);
1340                     multiply(grad1, _df, grad1);
1341
1342                     {
1343                         AutoLock lock(ann->mtx);
1344                         Mat _dEdw = dEdw->at(i).rowRange(0, n1);
1345                         x1 = Mat(dcount, n1, CV_64F, &x[i-1][0]);
1346                         gemm(x1, grad1, 1, _dEdw, 1, _dEdw, GEMM_1_T);
1347
1348                         // update bias part of dEdw
1349                         double* dst = dEdw->at(i).ptr<double>(n1);
1350                         for( k = 0; k < dcount; k++ )
1351                         {
1352                             const double* src = grad1.ptr<double>(k);
1353                             for( j = 0; j < n2; j++ )
1354                                 dst[j] += src[j];
1355                         }
1356                     }
1357
1358                     Mat grad2( dcount, n1, CV_64F, buf[i&1] );
1359                     if( i > 1 )
1360                     {
1361                         Mat _w = ann->weights[i].rowRange(0, n1);
1362                         gemm(grad1, _w, 1, noArray(), 0, grad2, GEMM_2_T);
1363                     }
1364                     grad1 = grad2;
1365                 }
1366             }
1367             {
1368                 AutoLock lock(ann->mtx);
1369                 *pE += E;
1370             }
1371         }
1372     };
1373
1374     int train_rprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
1375     {
1376         const int max_buf_size = 1 << 16;
1377         int i, iter = -1, count = inputs.rows;
1378
1379         double prev_E = DBL_MAX*0.5;
1380
1381         int max_iter = termCrit.maxCount;
1382         double epsilon = termCrit.epsilon;
1383         double dw_plus = params.rpDWPlus;
1384         double dw_minus = params.rpDWMinus;
1385         double dw_min = params.rpDWMin;
1386         double dw_max = params.rpDWMax;
1387
1388         int l_count = layer_count();
1389
1390         // allocate buffers
1391         vector<Mat> dw(l_count), dEdw(l_count), prev_dEdw_sign(l_count);
1392
1393         int total = 0;
1394         for( i = 0; i < l_count; i++ )
1395         {
1396             total += layer_sizes[i];
1397             dw[i].create(weights[i].size(), CV_64F);
1398             dw[i].setTo(Scalar::all(params.rpDW0));
1399             prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
1400             dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
1401         }
1402
1403         int dcount0 = max_buf_size/(2*total);
1404         dcount0 = std::max( dcount0, 1 );
1405         dcount0 = std::min( dcount0, count );
1406         int chunk_count = (count + dcount0 - 1)/dcount0;
1407
1408         // run rprop loop
1409         /*
1410          y_i(t) = w_i(t)*x_{i-1}(t)
1411          x_i(t) = f(y_i(t))
1412          E = sum_over_all_samples(1/2*||u - x_N||^2)
1413          grad_N = (x_N - u)*f'(y_i)
1414
1415          std::min(dw_i{jk}(t)*dw_plus, dw_max), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) > 0
1416          dw_i{jk}(t) = std::max(dw_i{jk}(t)*dw_minus, dw_min), if dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0
1417          dw_i{jk}(t-1) else
1418
1419          if (dE/dw_i{jk}(t)*dE/dw_i{jk}(t-1) < 0)
1420          dE/dw_i{jk}(t)<-0
1421          else
1422          w_i{jk}(t+1) = w_i{jk}(t) + dw_i{jk}(t)
1423          grad_{i-1}(t) = w_i^t(t)*grad_i(t)
1424          */
1425         for( iter = 0; iter < max_iter; iter++ )
1426         {
1427             double E = 0;
1428
1429             for( i = 0; i < l_count; i++ )
1430                 dEdw[i].setTo(Scalar::all(0));
1431
1432             // first, iterate through all the samples and compute dEdw
1433             RPropLoop invoker(this, inputs, outputs, _sw, dcount0, dEdw, &E);
1434             parallel_for_(Range(0, chunk_count), invoker);
1435             //invoker(Range(0, chunk_count));
1436
1437             // now update weights
1438             for( i = 1; i < l_count; i++ )
1439             {
1440                 int n1 = layer_sizes[i-1], n2 = layer_sizes[i];
1441                 for( int k = 0; k <= n1; k++ )
1442                 {
1443                     CV_Assert(weights[i].size() == Size(n2, n1+1));
1444                     double* wk = weights[i].ptr<double>(k);
1445                     double* dwk = dw[i].ptr<double>(k);
1446                     double* dEdwk = dEdw[i].ptr<double>(k);
1447                     schar* prevEk = prev_dEdw_sign[i].ptr<schar>(k);
1448
1449                     for( int j = 0; j < n2; j++ )
1450                     {
1451                         double Eval = dEdwk[j];
1452                         double dval = dwk[j];
1453                         double wval = wk[j];
1454                         int s = CV_SIGN(Eval);
1455                         int ss = prevEk[j]*s;
1456                         if( ss > 0 )
1457                         {
1458                             dval *= dw_plus;
1459                             dval = std::min( dval, dw_max );
1460                             dwk[j] = dval;
1461                             wk[j] = wval + dval*s;
1462                         }
1463                         else if( ss < 0 )
1464                         {
1465                             dval *= dw_minus;
1466                             dval = std::max( dval, dw_min );
1467                             prevEk[j] = 0;
1468                             dwk[j] = dval;
1469                             wk[j] = wval + dval*s;
1470                         }
1471                         else
1472                         {
1473                             prevEk[j] = (schar)s;
1474                             wk[j] = wval + dval*s;
1475                         }
1476                         dEdwk[j] = 0.;
1477                     }
1478                 }
1479             }
1480
1481             //printf("%d. E = %g\n", iter, E);
1482             if( fabs(prev_E - E) < epsilon )
1483                 break;
1484             prev_E = E;
1485         }
1486
1487         return iter;
1488     }
1489
1490     void write_params( FileStorage& fs ) const
1491     {
1492         const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
1493                                       activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
1494                                       activ_func == GAUSSIAN ? "GAUSSIAN" :
1495                                       activ_func == RELU ? "RELU" :
1496                                       activ_func == LEAKYRELU ? "LEAKYRELU" : 0;
1497
1498         if( activ_func_name )
1499             fs << "activation_function" << activ_func_name;
1500         else
1501             fs << "activation_function_id" << activ_func;
1502
1503         if( activ_func != IDENTITY )
1504         {
1505             fs << "f_param1" << f_param1;
1506             fs << "f_param2" << f_param2;
1507         }
1508
1509         fs << "min_val" << min_val << "max_val" << max_val << "min_val1" << min_val1 << "max_val1" << max_val1;
1510
1511         fs << "training_params" << "{";
1512         if( params.trainMethod == ANN_MLP::BACKPROP )
1513         {
1514             fs << "train_method" << "BACKPROP";
1515             fs << "dw_scale" << params.bpDWScale;
1516             fs << "moment_scale" << params.bpMomentScale;
1517         }
1518         else if (params.trainMethod == ANN_MLP::RPROP)
1519         {
1520             fs << "train_method" << "RPROP";
1521             fs << "dw0" << params.rpDW0;
1522             fs << "dw_plus" << params.rpDWPlus;
1523             fs << "dw_minus" << params.rpDWMinus;
1524             fs << "dw_min" << params.rpDWMin;
1525             fs << "dw_max" << params.rpDWMax;
1526         }
1527         else if (params.trainMethod == ANN_MLP::ANNEAL)
1528         {
1529             fs << "train_method" << "ANNEAL";
1530             fs << "initialT" << params.initialT;
1531             fs << "finalT" << params.finalT;
1532             fs << "coolingRatio" << params.coolingRatio;
1533             fs << "itePerStep" << params.itePerStep;
1534         }
1535         else
1536             CV_Error(CV_StsError, "Unknown training method");
1537
1538         fs << "term_criteria" << "{";
1539         if( params.termCrit.type & TermCriteria::EPS )
1540             fs << "epsilon" << params.termCrit.epsilon;
1541         if( params.termCrit.type & TermCriteria::COUNT )
1542             fs << "iterations" << params.termCrit.maxCount;
1543         fs << "}" << "}";
1544     }
1545
1546     void write( FileStorage& fs ) const
1547     {
1548         if( layer_sizes.empty() )
1549             return;
1550         int i, l_count = layer_count();
1551
1552         writeFormat(fs);
1553         fs << "layer_sizes" << layer_sizes;
1554
1555         write_params( fs );
1556
1557         size_t esz = weights[0].elemSize();
1558
1559         fs << "input_scale" << "[";
1560         fs.writeRaw("d", weights[0].ptr(), weights[0].total()*esz);
1561
1562         fs << "]" << "output_scale" << "[";
1563         fs.writeRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1564
1565         fs << "]" << "inv_output_scale" << "[";
1566         fs.writeRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1567
1568         fs << "]" << "weights" << "[";
1569         for( i = 1; i < l_count; i++ )
1570         {
1571             fs << "[";
1572             fs.writeRaw("d", weights[i].ptr(), weights[i].total()*esz);
1573             fs << "]";
1574         }
1575         fs << "]";
1576     }
1577
1578     void read_params( const FileNode& fn )
1579     {
1580         String activ_func_name = (String)fn["activation_function"];
1581         if( !activ_func_name.empty() )
1582         {
1583             activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
1584                          activ_func_name == "IDENTITY" ? IDENTITY :
1585                          activ_func_name == "RELU" ? RELU :
1586                          activ_func_name == "LEAKYRELU" ? LEAKYRELU :
1587                          activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
1588             CV_Assert( activ_func >= 0 );
1589         }
1590         else
1591             activ_func = (int)fn["activation_function_id"];
1592
1593         f_param1 = (double)fn["f_param1"];
1594         f_param2 = (double)fn["f_param2"];
1595
1596         setActivationFunction( activ_func, f_param1, f_param2);
1597
1598         min_val = (double)fn["min_val"];
1599         max_val = (double)fn["max_val"];
1600         min_val1 = (double)fn["min_val1"];
1601         max_val1 = (double)fn["max_val1"];
1602
1603         FileNode tpn = fn["training_params"];
1604         params = AnnParams();
1605
1606         if( !tpn.empty() )
1607         {
1608             String tmethod_name = (String)tpn["train_method"];
1609
1610             if( tmethod_name == "BACKPROP" )
1611             {
1612                 params.trainMethod = ANN_MLP::BACKPROP;
1613                 params.bpDWScale = (double)tpn["dw_scale"];
1614                 params.bpMomentScale = (double)tpn["moment_scale"];
1615             }
1616             else if (tmethod_name == "RPROP")
1617             {
1618                 params.trainMethod = ANN_MLP::RPROP;
1619                 params.rpDW0 = (double)tpn["dw0"];
1620                 params.rpDWPlus = (double)tpn["dw_plus"];
1621                 params.rpDWMinus = (double)tpn["dw_minus"];
1622                 params.rpDWMin = (double)tpn["dw_min"];
1623                 params.rpDWMax = (double)tpn["dw_max"];
1624             }
1625             else if (tmethod_name == "ANNEAL")
1626             {
1627                 params.trainMethod = ANN_MLP::ANNEAL;
1628                 params.initialT = (double)tpn["initialT"];
1629                 params.finalT = (double)tpn["finalT"];
1630                 params.coolingRatio = (double)tpn["coolingRatio"];
1631                 params.itePerStep = tpn["itePerStep"];
1632             }
1633             else
1634                 CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
1635
1636             FileNode tcn = tpn["term_criteria"];
1637             if( !tcn.empty() )
1638             {
1639                 FileNode tcn_e = tcn["epsilon"];
1640                 FileNode tcn_i = tcn["iterations"];
1641                 params.termCrit.type = 0;
1642                 if( !tcn_e.empty() )
1643                 {
1644                     params.termCrit.type |= TermCriteria::EPS;
1645                     params.termCrit.epsilon = (double)tcn_e;
1646                 }
1647                 if( !tcn_i.empty() )
1648                 {
1649                     params.termCrit.type |= TermCriteria::COUNT;
1650                     params.termCrit.maxCount = (int)tcn_i;
1651                 }
1652             }
1653         }
1654     }
1655
1656     void read( const FileNode& fn )
1657     {
1658         clear();
1659
1660         vector<int> _layer_sizes;
1661         readVectorOrMat(fn["layer_sizes"], _layer_sizes);
1662         setLayerSizes( _layer_sizes );
1663
1664         int i, l_count = layer_count();
1665         read_params(fn);
1666
1667         size_t esz = weights[0].elemSize();
1668
1669         FileNode w = fn["input_scale"];
1670         w.readRaw("d", weights[0].ptr(), weights[0].total()*esz);
1671
1672         w = fn["output_scale"];
1673         w.readRaw("d", weights[l_count].ptr(), weights[l_count].total()*esz);
1674
1675         w = fn["inv_output_scale"];
1676         w.readRaw("d", weights[l_count+1].ptr(), weights[l_count+1].total()*esz);
1677
1678         FileNodeIterator w_it = fn["weights"].begin();
1679
1680         for( i = 1; i < l_count; i++, ++w_it )
1681             (*w_it).readRaw("d", weights[i].ptr(), weights[i].total()*esz);
1682         trained = true;
1683     }
1684
1685     Mat getWeights(int layerIdx) const
1686     {
1687         CV_Assert( 0 <= layerIdx && layerIdx < (int)weights.size() );
1688         return weights[layerIdx];
1689     }
1690
1691     bool isTrained() const
1692     {
1693         return trained;
1694     }
1695
1696     bool isClassifier() const
1697     {
1698         return false;
1699     }
1700
1701     int getVarCount() const
1702     {
1703         return layer_sizes.empty() ? 0 : layer_sizes[0];
1704     }
1705
1706     String getDefaultName() const
1707     {
1708         return "opencv_ml_ann_mlp";
1709     }
1710
1711     vector<int> layer_sizes;
1712     vector<Mat> weights;
1713     double f_param1, f_param2;
1714     double min_val, max_val, min_val1, max_val1;
1715     int activ_func;
1716     int max_lsize, max_buf_sz;
1717     AnnParams params;
1718     RNG rng;
1719     Mutex mtx;
1720     bool trained;
1721 };
1722
1723
1724
1725
1726 Ptr<ANN_MLP> ANN_MLP::create()
1727 {
1728     return makePtr<ANN_MLPImpl>();
1729 }
1730
1731 Ptr<ANN_MLP> ANN_MLP::load(const String& filepath)
1732 {
1733     FileStorage fs;
1734     fs.open(filepath, FileStorage::READ);
1735     CV_Assert(fs.isOpened());
1736     Ptr<ANN_MLP> ann = makePtr<ANN_MLPImpl>();
1737     ((ANN_MLPImpl*)ann.get())->read(fs.getFirstTopLevelNode());
1738     return ann;
1739 }
1740
1741 }}
1742
1743 /* End of file. */