Calcerror uses now weighted samples (#10346)
[platform/upstream/opencv.git] / modules / ml / src / ann_mlp.cpp
index fdc73d9..cd10c0d 100644 (file)
 
 namespace cv { namespace ml {
 
+struct SimulatedAnnealingSolver::Impl
+{
+    int refcount;
+
+    const Ptr<SimulatedAnnealingSolverSystem> systemPtr;
+    SimulatedAnnealingSolverSystem& system;
+    RNG rEnergy;
+    double coolingRatio;
+    double initialT;
+    double finalT;
+    int iterPerStep;
+
+    Impl(const Ptr<SimulatedAnnealingSolverSystem>& s) :
+        refcount(1),
+        systemPtr(s),
+        system(*(s.get())),
+        rEnergy(12345)
+    {
+        CV_Assert(!systemPtr.empty());
+        initialT = 2;
+        finalT = 0.1;
+        coolingRatio = 0.95;
+        iterPerStep = 100;
+    }
+
+    inline double energy() { return system.energy(); }
+    inline void changeState() { system.changeState(); }
+    inline void reverseState() { system.reverseState(); }
+
+    void addref() { CV_XADD(&refcount, 1); }
+    void release() { if (CV_XADD(&refcount, -1) == 1) delete this; }
+protected:
+    virtual ~Impl() { CV_Assert(refcount==0); }
+};
+
 struct AnnParams
 {
     AnnParams()
@@ -51,6 +86,8 @@ struct AnnParams
         bpDWScale = bpMomentScale = 0.1;
         rpDW0 = 0.1; rpDWPlus = 1.2; rpDWMinus = 0.5;
         rpDWMin = FLT_EPSILON; rpDWMax = 50.;
+        initialT=10;finalT=0.1,coolingRatio=0.95;itePerStep=10;
+        rEnergy = cv::RNG(12345);
     }
 
     TermCriteria termCrit;
@@ -64,6 +101,12 @@ struct AnnParams
     double rpDWMinus;
     double rpDWMin;
     double rpDWMax;
+
+    double initialT;
+    double finalT;
+    double coolingRatio;
+    int itePerStep;
+    RNG rEnergy;
 };
 
 template <typename T>
@@ -72,13 +115,242 @@ inline T inBounds(T val, T min_val, T max_val)
     return std::min(std::max(val, min_val), max_val);
 }
 
-class ANN_MLPImpl : public ANN_MLP
+SimulatedAnnealingSolver::SimulatedAnnealingSolver(const Ptr<SimulatedAnnealingSolverSystem>& system)
+{
+    impl = new Impl(system);
+}
+
+SimulatedAnnealingSolver::SimulatedAnnealingSolver(const SimulatedAnnealingSolver& b)
+{
+    if (b.impl) b.impl->addref();
+    release();
+    impl = b.impl;
+}
+
+void SimulatedAnnealingSolver::release()
+{
+    if (impl) { impl->release(); impl = NULL; }
+}
+
+void SimulatedAnnealingSolver::setIterPerStep(int ite)
+{
+    CV_Assert(impl);
+    CV_Assert(ite>0);
+    impl->iterPerStep = ite;
+}
+
+int SimulatedAnnealingSolver::run()
+{
+    CV_Assert(impl);
+    CV_Assert(impl->initialT>impl->finalT);
+    double Ti = impl->initialT;
+    double previousEnergy = impl->energy();
+    int exchange = 0;
+    while (Ti > impl->finalT)
+    {
+        for (int i = 0; i < impl->iterPerStep; i++)
+        {
+            impl->changeState();
+            double newEnergy = impl->energy();
+            if (newEnergy < previousEnergy)
+            {
+                previousEnergy = newEnergy;
+                exchange++;
+            }
+            else
+            {
+                double r = impl->rEnergy.uniform(0.0, 1.0);
+                if (r < std::exp(-(newEnergy - previousEnergy) / Ti))
+                {
+                    previousEnergy = newEnergy;
+                    exchange++;
+                }
+                else
+                {
+                    impl->reverseState();
+                }
+            }
+
+        }
+        Ti *= impl->coolingRatio;
+    }
+    impl->finalT = Ti;
+    return exchange;
+}
+
+void SimulatedAnnealingSolver::setEnergyRNG(const RNG& rng)
+{
+    CV_Assert(impl);
+    impl->rEnergy = rng;
+}
+
+void SimulatedAnnealingSolver::setInitialTemperature(double x)
+{
+    CV_Assert(impl);
+    CV_Assert(x>0);
+    impl->initialT = x;
+}
+
+void SimulatedAnnealingSolver::setFinalTemperature(double x)
+{
+    CV_Assert(impl);
+    CV_Assert(x>0);
+    impl->finalT = x;
+}
+
+double SimulatedAnnealingSolver::getFinalTemperature()
+{
+    CV_Assert(impl);
+    return impl->finalT;
+}
+
+void SimulatedAnnealingSolver::setCoolingRatio(double x)
+{
+    CV_Assert(impl);
+    CV_Assert(x>0 && x<1);
+    impl->coolingRatio = x;
+}
+
+class SimulatedAnnealingANN_MLP : public SimulatedAnnealingSolverSystem
+{
+protected:
+    ml::ANN_MLP& nn;
+    Ptr<ml::TrainData> data;
+    int nbVariables;
+    vector<double*> adrVariables;
+    RNG rVar;
+    RNG rIndex;
+    double varTmp;
+    int index;
+public:
+    SimulatedAnnealingANN_MLP(ml::ANN_MLP& x, const Ptr<ml::TrainData>& d) : nn(x), data(d)
+    {
+        initVarMap();
+    }
+    ~SimulatedAnnealingANN_MLP() {}
+protected:
+    void changeState()
+    {
+        index = rIndex.uniform(0, nbVariables);
+        double dv = rVar.uniform(-1.0, 1.0);
+        varTmp = *adrVariables[index];
+        *adrVariables[index] = dv;
+    }
+
+    void reverseState()
+    {
+        *adrVariables[index] = varTmp;
+    }
+
+    double energy() const { return nn.calcError(data, false, noArray()); }
+
+protected:
+    void initVarMap()
+    {
+        Mat l = nn.getLayerSizes();
+        nbVariables = 0;
+        adrVariables.clear();
+        for (int i = 1; i < l.rows-1; i++)
+        {
+            Mat w = nn.getWeights(i);
+            for (int j = 0; j < w.rows; j++)
+            {
+                for (int k = 0; k < w.cols; k++, nbVariables++)
+                {
+                    if (j == w.rows - 1)
+                    {
+                        adrVariables.push_back(&w.at<double>(w.rows - 1, k));
+                    }
+                    else
+                    {
+                        adrVariables.push_back(&w.at<double>(j, k));
+                    }
+                }
+            }
+        }
+    }
+
+};
+
+double ANN_MLP::getAnnealInitialT() const
+{
+    const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    return this_->getAnnealInitialT();
+}
+
+void ANN_MLP::setAnnealInitialT(double val)
+{
+    ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    this_->setAnnealInitialT(val);
+}
+
+double ANN_MLP::getAnnealFinalT() const
+{
+    const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    return this_->getAnnealFinalT();
+}
+
+void ANN_MLP::setAnnealFinalT(double val)
+{
+    ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    this_->setAnnealFinalT(val);
+}
+
+double ANN_MLP::getAnnealCoolingRatio() const
+{
+    const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    return this_->getAnnealCoolingRatio();
+}
+
+void ANN_MLP::setAnnealCoolingRatio(double val)
+{
+    ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    this_->setAnnealCoolingRatio(val);
+}
+
+int ANN_MLP::getAnnealItePerStep() const
+{
+    const ANN_MLP_ANNEAL* this_ = dynamic_cast<const ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    return this_->getAnnealItePerStep();
+}
+
+void ANN_MLP::setAnnealItePerStep(int val)
+{
+    ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    this_->setAnnealItePerStep(val);
+}
+
+void ANN_MLP::setAnnealEnergyRNG(const RNG& rng)
+{
+    ANN_MLP_ANNEAL* this_ = dynamic_cast<ANN_MLP_ANNEAL*>(this);
+    if (!this_)
+        CV_Error(Error::StsNotImplemented, "the class is not ANN_MLP_ANNEAL");
+    this_->setAnnealEnergyRNG(rng);
+}
+
+class ANN_MLPImpl : public ANN_MLP_ANNEAL
 {
 public:
     ANN_MLPImpl()
     {
         clear();
-        setActivationFunction( SIGMOID_SYM, 0, 0 );
+        setActivationFunction( SIGMOID_SYM, 0, 0);
         setLayerSizes(Mat());
         setTrainMethod(ANN_MLP::RPROP, 0.1, FLT_EPSILON);
     }
@@ -93,6 +365,13 @@ public:
     CV_IMPL_PROPERTY(double, RpropDWMinus, params.rpDWMinus)
     CV_IMPL_PROPERTY(double, RpropDWMin, params.rpDWMin)
     CV_IMPL_PROPERTY(double, RpropDWMax, params.rpDWMax)
+    CV_IMPL_PROPERTY(double, AnnealInitialT, params.initialT)
+    CV_IMPL_PROPERTY(double, AnnealFinalT, params.finalT)
+    CV_IMPL_PROPERTY(double, AnnealCoolingRatio, params.coolingRatio)
+    CV_IMPL_PROPERTY(int, AnnealItePerStep, params.itePerStep)
+
+    //CV_IMPL_PROPERTY(RNG, AnnealEnergyRNG, params.rEnergy)
+    inline void setAnnealEnergyRNG(const RNG& val) { params.rEnergy = val; }
 
     void clear()
     {
@@ -107,7 +386,7 @@ public:
 
     void setTrainMethod(int method, double param1, double param2)
     {
-        if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP)
+        if (method != ANN_MLP::RPROP && method != ANN_MLP::BACKPROP && method != ANN_MLP::ANNEAL)
             method = ANN_MLP::RPROP;
         params.trainMethod = method;
         if(method == ANN_MLP::RPROP )
@@ -117,14 +396,14 @@ public:
             params.rpDW0 = param1;
             params.rpDWMin = std::max( param2, 0. );
         }
-        else if(method == ANN_MLP::BACKPROP )
+        else if (method == ANN_MLP::BACKPROP)
         {
-            if( param1 <= 0 )
+            if (param1 <= 0)
                 param1 = 0.1;
             params.bpDWScale = inBounds<double>(param1, 1e-3, 1.);
-            if( param2 < 0 )
+            if (param2 < 0)
                 param2 = 0.1;
-            params.bpMomentScale = std::min( param2, 1. );
+            params.bpMomentScale = std::min(param2, 1.);
         }
     }
 
@@ -133,9 +412,9 @@ public:
         return params.trainMethod;
     }
 
-    void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 )
+    void setActivationFunction(int _activ_func, double _f_param1, double _f_param2)
     {
-        if( _activ_func < 0 || _activ_func > GAUSSIAN )
+        if( _activ_func < 0 || _activ_func > LEAKYRELU)
             CV_Error( CV_StsOutOfRange, "Unknown activation function" );
 
         activ_func = _activ_func;
@@ -153,11 +432,23 @@ public:
         case GAUSSIAN:
             max_val = 1.; min_val = 0.05;
             max_val1 = 1.; min_val1 = 0.02;
-            if( fabs(_f_param1) < FLT_EPSILON )
+            if (fabs(_f_param1) < FLT_EPSILON)
                 _f_param1 = 1.;
-            if( fabs(_f_param2) < FLT_EPSILON )
+            if (fabs(_f_param2) < FLT_EPSILON)
                 _f_param2 = 1.;
             break;
+        case RELU:
+            if (fabs(_f_param1) < FLT_EPSILON)
+                _f_param1 = 1;
+            min_val = max_val = min_val1 = max_val1 = 0.;
+            _f_param2 = 0.;
+            break;
+        case LEAKYRELU:
+            if (fabs(_f_param1) < FLT_EPSILON)
+                _f_param1 = 0.01;
+            min_val = max_val = min_val1 = max_val1 = 0.;
+            _f_param2 = 0.;
+            break;
         default:
             min_val = max_val = min_val1 = max_val1 = 0.;
             _f_param1 = 1.;
@@ -368,47 +659,61 @@ public:
         }
     }
 
-    void calc_activ_func( Mat& sums, const Mat& w ) const
+    void calc_activ_func(Mat& sums, const Mat& w) const
     {
-        const double* bias = w.ptr<double>(w.rows-1);
+        const double* bias = w.ptr<double>(w.rows - 1);
         int i, j, n = sums.rows, cols = sums.cols;
         double scale = 0, scale2 = f_param2;
 
-        switch( activ_func )
+        switch (activ_func)
         {
-            case IDENTITY:
-                scale = 1.;
-                break;
-            case SIGMOID_SYM:
-                scale = -f_param1;
-                break;
-            case GAUSSIAN:
-                scale = -f_param1*f_param1;
-                break;
-            default:
-                ;
+        case IDENTITY:
+            scale = 1.;
+            break;
+        case SIGMOID_SYM:
+            scale = -f_param1;
+            break;
+        case GAUSSIAN:
+            scale = -f_param1*f_param1;
+            break;
+        case RELU:
+            scale = 1;
+            break;
+        case LEAKYRELU:
+            scale = 1;
+            break;
+        default:
+            ;
         }
 
-        CV_Assert( sums.isContinuous() );
+        CV_Assert(sums.isContinuous());
 
-        if( activ_func != GAUSSIAN )
+        if (activ_func != GAUSSIAN)
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* data = sums.ptr<double>(i);
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
+                {
                     data[j] = (data[j] + bias[j])*scale;
+                    if (activ_func == RELU)
+                        if (data[j] < 0)
+                            data[j] = 0;
+                    if (activ_func == LEAKYRELU)
+                        if (data[j] < 0)
+                            data[j] *= f_param1;
+                }
             }
 
-            if( activ_func == IDENTITY )
+            if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU)
                 return;
         }
         else
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* data = sums.ptr<double>(i);
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     double t = data[j] + bias[j];
                     data[j] = t*t*scale;
@@ -416,92 +721,132 @@ public:
             }
         }
 
-        exp( sums, sums );
+        exp(sums, sums);
 
-        if( sums.isContinuous() )
+        if (sums.isContinuous())
         {
             cols *= n;
             n = 1;
         }
 
-        switch( activ_func )
+        switch (activ_func)
         {
-            case SIGMOID_SYM:
-                for( i = 0; i < n; i++ )
+        case SIGMOID_SYM:
+            for (i = 0; i < n; i++)
+            {
+                double* data = sums.ptr<double>(i);
+                for (j = 0; j < cols; j++)
                 {
-                    double* data = sums.ptr<double>(i);
-                    for( j = 0; j < cols; j++ )
+                    if (!cvIsInf(data[j]))
                     {
-                        if(!cvIsInf(data[j]))
-                        {
-                            double t = scale2*(1. - data[j])/(1. + data[j]);
-                            data[j] = t;
-                        }
-                        else
-                        {
-                            data[j] = -scale2;
-                        }
+                        double t = scale2*(1. - data[j]) / (1. + data[j]);
+                        data[j] = t;
+                    }
+                    else
+                    {
+                        data[j] = -scale2;
                     }
                 }
-                break;
+            }
+            break;
 
-            case GAUSSIAN:
-                for( i = 0; i < n; i++ )
-                {
-                    double* data = sums.ptr<double>(i);
-                    for( j = 0; j < cols; j++ )
-                        data[j] = scale2*data[j];
-                }
-                break;
+        case GAUSSIAN:
+            for (i = 0; i < n; i++)
+            {
+                double* data = sums.ptr<double>(i);
+                for (j = 0; j < cols; j++)
+                    data[j] = scale2*data[j];
+            }
+            break;
 
-            default:
-                ;
+        default:
+            ;
         }
     }
 
-    void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
+    void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const
     {
-        const double* bias = w.ptr<double>(w.rows-1);
+        const double* bias = w.ptr<double>(w.rows - 1);
         int i, j, n = _xf.rows, cols = _xf.cols;
 
-        if( activ_func == IDENTITY )
+        if (activ_func == IDENTITY)
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     xf[j] += bias[j];
                     df[j] = 1;
                 }
             }
         }
-        else if( activ_func == GAUSSIAN )
+        else if (activ_func == RELU)
+        {
+            for (i = 0; i < n; i++)
+            {
+                double* xf = _xf.ptr<double>(i);
+                double* df = _df.ptr<double>(i);
+
+                for (j = 0; j < cols; j++)
+                {
+                    xf[j] += bias[j];
+                    if (xf[j] < 0)
+                    {
+                        xf[j] = 0;
+                        df[j] = 0;
+                    }
+                    else
+                        df[j] = 1;
+                }
+            }
+        }
+        else if (activ_func == LEAKYRELU)
+        {
+            for (i = 0; i < n; i++)
+            {
+                double* xf = _xf.ptr<double>(i);
+                double* df = _df.ptr<double>(i);
+
+                for (j = 0; j < cols; j++)
+                {
+                    xf[j] += bias[j];
+                    if (xf[j] < 0)
+                    {
+                        xf[j] = f_param1*xf[j];
+                        df[j] = f_param1;
+                    }
+                    else
+                        df[j] = 1;
+                }
+            }
+        }
+        else if (activ_func == GAUSSIAN)
         {
             double scale = -f_param1*f_param1;
             double scale2 = scale*f_param2;
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     double t = xf[j] + bias[j];
-                    df[j] = t*2*scale2;
+                    df[j] = t * 2 * scale2;
                     xf[j] = t*t*scale;
                 }
             }
-            exp( _xf, _xf );
+            exp(_xf, _xf);
 
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                     df[j] *= xf[j];
             }
         }
@@ -510,34 +855,34 @@ public:
             double scale = f_param1;
             double scale2 = f_param2;
 
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     xf[j] = (xf[j] + bias[j])*scale;
                     df[j] = -fabs(xf[j]);
                 }
             }
 
-            exp( _df, _df );
+            exp(_df, _df);
 
             // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
             // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
             // 2*a*exp(-ax)/(1+exp(-ax))^2
-            scale *= 2*f_param2;
-            for( i = 0; i < n; i++ )
+            scale *= 2 * f_param2;
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     int s0 = xf[j] > 0 ? 1 : -1;
-                    double t0 = 1./(1. + df[j]);
-                    double t1 = scale*df[j]*t0*t0;
+                    double t0 = 1. / (1. + df[j]);
+                    double t1 = scale*df[j] * t0*t0;
                     t0 *= scale2*(1. - df[j])*s0;
                     df[j] = t1;
                     xf[j] = t0;
@@ -713,13 +1058,34 @@ public:
         termcrit.maxCount = std::max((params.termCrit.type & CV_TERMCRIT_ITER ? params.termCrit.maxCount : MAX_ITER), 1);
         termcrit.epsilon = std::max((params.termCrit.type & CV_TERMCRIT_EPS ? params.termCrit.epsilon : DEFAULT_EPSILON), DBL_EPSILON);
 
-        int iter = params.trainMethod == ANN_MLP::BACKPROP ?
-            train_backprop( inputs, outputs, sw, termcrit ) :
-            train_rprop( inputs, outputs, sw, termcrit );
-
+        int iter = 0;
+        switch(params.trainMethod){
+        case ANN_MLP::BACKPROP:
+            iter = train_backprop(inputs, outputs, sw, termcrit);
+            break;
+        case ANN_MLP::RPROP:
+            iter = train_rprop(inputs, outputs, sw, termcrit);
+            break;
+        case ANN_MLP::ANNEAL:
+            iter = train_anneal(trainData);
+            break;
+        }
         trained = iter > 0;
         return trained;
     }
+    int train_anneal(const Ptr<TrainData>& trainData)
+    {
+        SimulatedAnnealingSolver t(Ptr<SimulatedAnnealingANN_MLP>(new SimulatedAnnealingANN_MLP(*this, trainData)));
+        t.setEnergyRNG(params.rEnergy);
+        t.setFinalTemperature(params.finalT);
+        t.setInitialTemperature(params.initialT);
+        t.setCoolingRatio(params.coolingRatio);
+        t.setIterPerStep(params.itePerStep);
+        trained = true; // Enable call to CalcError
+        int iter =  t.run();
+        trained =false;
+        return iter;
+    }
 
     int train_backprop( const Mat& inputs, const Mat& outputs, const Mat& _sw, TermCriteria termCrit )
     {
@@ -783,7 +1149,7 @@ public:
                 E = 0;
 
                 // shuffle indices
-                for( i = 0; i < count; i++ )
+                for( i = 0; i <count; i++ )
                 {
                     j = rng.uniform(0, count);
                     k = rng.uniform(0, count);
@@ -1110,7 +1476,9 @@ public:
     {
         const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
                                       activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
-                                      activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
+                                      activ_func == GAUSSIAN ? "GAUSSIAN" :
+                                      activ_func == RELU ? "RELU" :
+                                      activ_func == LEAKYRELU ? "LEAKYRELU" : 0;
 
         if( activ_func_name )
             fs << "activation_function" << activ_func_name;
@@ -1132,7 +1500,7 @@ public:
             fs << "dw_scale" << params.bpDWScale;
             fs << "moment_scale" << params.bpMomentScale;
         }
-        else if( params.trainMethod == ANN_MLP::RPROP )
+        else if (params.trainMethod == ANN_MLP::RPROP)
         {
             fs << "train_method" << "RPROP";
             fs << "dw0" << params.rpDW0;
@@ -1141,6 +1509,14 @@ public:
             fs << "dw_min" << params.rpDWMin;
             fs << "dw_max" << params.rpDWMax;
         }
+        else if (params.trainMethod == ANN_MLP::ANNEAL)
+        {
+            fs << "train_method" << "ANNEAL";
+            fs << "initialT" << params.initialT;
+            fs << "finalT" << params.finalT;
+            fs << "coolingRatio" << params.coolingRatio;
+            fs << "itePerStep" << params.itePerStep;
+        }
         else
             CV_Error(CV_StsError, "Unknown training method");
 
@@ -1191,6 +1567,8 @@ public:
         {
             activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
                          activ_func_name == "IDENTITY" ? IDENTITY :
+                         activ_func_name == "RELU" ? RELU :
+                         activ_func_name == "LEAKYRELU" ? LEAKYRELU :
                          activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
             CV_Assert( activ_func >= 0 );
         }
@@ -1200,7 +1578,7 @@ public:
         f_param1 = (double)fn["f_param1"];
         f_param2 = (double)fn["f_param2"];
 
-        setActivationFunction( activ_func, f_param1, f_param2 );
+        setActivationFunction( activ_func, f_param1, f_param2);
 
         min_val = (double)fn["min_val"];
         max_val = (double)fn["max_val"];
@@ -1220,7 +1598,7 @@ public:
                 params.bpDWScale = (double)tpn["dw_scale"];
                 params.bpMomentScale = (double)tpn["moment_scale"];
             }
-            else if( tmethod_name == "RPROP" )
+            else if (tmethod_name == "RPROP")
             {
                 params.trainMethod = ANN_MLP::RPROP;
                 params.rpDW0 = (double)tpn["dw0"];
@@ -1229,6 +1607,14 @@ public:
                 params.rpDWMin = (double)tpn["dw_min"];
                 params.rpDWMax = (double)tpn["dw_max"];
             }
+            else if (tmethod_name == "ANNEAL")
+            {
+                params.trainMethod = ANN_MLP::ANNEAL;
+                params.initialT = (double)tpn["initialT"];
+                params.finalT = (double)tpn["finalT"];
+                params.coolingRatio = (double)tpn["coolingRatio"];
+                params.itePerStep = tpn["itePerStep"];
+            }
             else
                 CV_Error(CV_StsParseError, "Unknown training method (should be BACKPROP or RPROP)");
 
@@ -1320,6 +1706,8 @@ public:
 };
 
 
+
+
 Ptr<ANN_MLP> ANN_MLP::create()
 {
     return makePtr<ANN_MLPImpl>();
@@ -1329,14 +1717,12 @@ Ptr<ANN_MLP> ANN_MLP::load(const String& filepath)
 {
     FileStorage fs;
     fs.open(filepath, FileStorage::READ);
-
+    CV_Assert(fs.isOpened());
     Ptr<ANN_MLP> ann = makePtr<ANN_MLPImpl>();
-
     ((ANN_MLPImpl*)ann.get())->read(fs.getFirstTopLevelNode());
     return ann;
 }
 
-
-    }}
+}}
 
 /* End of file. */