Fix android build warnings
authorAndrey Kamaev <andrey.kamaev@itseez.com>
Tue, 4 Sep 2012 13:44:23 +0000 (17:44 +0400)
committerAndrey Kamaev <andrey.kamaev@itseez.com>
Tue, 4 Sep 2012 13:44:23 +0000 (17:44 +0400)
modules/contrib/src/bowmsctrainer.cpp
modules/contrib/src/openfabmap.cpp
modules/objdetect/src/haar.cpp
samples/cpp/pca.cpp

index c952282..448505c 100644 (file)
@@ -81,46 +81,46 @@ Mat BOWMSCTrainer::cluster() const {
     return cluster(mergedDescriptors);
 }
 
-Mat BOWMSCTrainer::cluster(const Mat& descriptors) const {
+Mat BOWMSCTrainer::cluster(const Mat& _descriptors) const {
 
-    CV_Assert(!descriptors.empty());
+    CV_Assert(!_descriptors.empty());
 
     // TODO: sort the descriptors before clustering.
 
 
-    Mat icovar = Mat::eye(descriptors.cols,descriptors.cols,descriptors.type());
+    Mat icovar = Mat::eye(_descriptors.cols,_descriptors.cols,_descriptors.type());
 
     vector<Mat> initialCentres;
-    initialCentres.push_back(descriptors.row(0));
-    for (int i = 1; i < descriptors.rows; i++) {
+    initialCentres.push_back(_descriptors.row(0));
+    for (int i = 1; i < _descriptors.rows; i++) {
         double minDist = DBL_MAX;
         for (size_t j = 0; j < initialCentres.size(); j++) {
             minDist = std::min(minDist,
-                cv::Mahalanobis(descriptors.row(i),initialCentres[j],
+                cv::Mahalanobis(_descriptors.row(i),initialCentres[j],
                 icovar));
         }
         if (minDist > clusterSize)
-            initialCentres.push_back(descriptors.row(i));
+            initialCentres.push_back(_descriptors.row(i));
     }
 
     std::vector<std::list<cv::Mat> > clusters;
     clusters.resize(initialCentres.size());
-    for (int i = 0; i < descriptors.rows; i++) {
+    for (int i = 0; i < _descriptors.rows; i++) {
         int index = 0; double dist = 0, minDist = DBL_MAX;
         for (size_t j = 0; j < initialCentres.size(); j++) {
-            dist = cv::Mahalanobis(descriptors.row(i),initialCentres[j],icovar);
+            dist = cv::Mahalanobis(_descriptors.row(i),initialCentres[j],icovar);
             if (dist < minDist) {
                 minDist = dist;
                 index = (int)j;
             }
         }
-        clusters[index].push_back(descriptors.row(i));
+        clusters[index].push_back(_descriptors.row(i));
     }
 
     // TODO: throw away small clusters.
 
     Mat vocabulary;
-    Mat centre = Mat::zeros(1,descriptors.cols,descriptors.type());
+    Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
     for (size_t i = 0; i < clusters.size(); i++) {
         centre.setTo(0);
         for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {
index 99795c9..a6b4ac1 100644 (file)
@@ -63,7 +63,7 @@ namespace of2 {
 static double logsumexp(double a, double b) {
     return a > b ? log(1 + exp(b - a)) + a : log(1 + exp(a - b)) + b;
 }
-    
+
 FabMap::FabMap(const Mat& _clTree, double _PzGe,
         double _PzGNe, int _flags, int _numSamples) :
     clTree(_clTree), PzGe(_PzGe), PzGNe(_PzGNe), flags(
@@ -445,16 +445,16 @@ FabMap1::~FabMap1() {
 }
 
 void FabMap1::getLikelihoods(const Mat& queryImgDescriptor,
-        const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
+        const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
 
-    for (size_t i = 0; i < testImgDescriptors.size(); i++) {
+    for (size_t i = 0; i < testImageDescriptors.size(); i++) {
         bool zq, zpq, Lzq;
         double logP = 0;
         for (int q = 0; q < clTree.cols; q++) {
 
             zq = queryImgDescriptor.at<float>(0,q) > 0;
             zpq = queryImgDescriptor.at<float>(0,pq(q)) > 0;
-            Lzq = testImgDescriptors[i].at<float>(0,q) > 0;
+            Lzq = testImageDescriptors[i].at<float>(0,q) > 0;
 
             logP += log((this->*PzGL)(q, zq, zpq, Lzq));
 
@@ -490,16 +490,16 @@ FabMapLUT::~FabMapLUT() {
 }
 
 void FabMapLUT::getLikelihoods(const Mat& queryImgDescriptor,
-        const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
+        const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
 
     double precFactor = (double)pow(10.0, -precision);
 
-    for (size_t i = 0; i < testImgDescriptors.size(); i++) {
+    for (size_t i = 0; i < testImageDescriptors.size(); i++) {
         unsigned long long int logP = 0;
         for (int q = 0; q < clTree.cols; q++) {
             logP += table[q][(queryImgDescriptor.at<float>(0,pq(q)) > 0) +
             ((queryImgDescriptor.at<float>(0, q) > 0) << 1) +
-            ((testImgDescriptors[i].at<float>(0,q) > 0) << 2)];
+            ((testImageDescriptors[i].at<float>(0,q) > 0) << 2)];
         }
         matches.push_back(IMatch(0,(int)i,-precFactor*(double)logP,0));
     }
@@ -518,7 +518,7 @@ FabMapFBO::~FabMapFBO() {
 }
 
 void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
-        const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
+        const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
 
     std::multiset<WordStats> wordData;
     setWordStatistics(queryImgDescriptor, wordData);
@@ -526,7 +526,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
     vector<int> matchIndices;
     vector<IMatch> queryMatches;
 
-    for (size_t i = 0; i < testImgDescriptors.size(); i++) {
+    for (size_t i = 0; i < testImageDescriptors.size(); i++) {
         queryMatches.push_back(IMatch(0,(int)i,0,0));
         matchIndices.push_back((int)i);
     }
@@ -543,7 +543,7 @@ void FabMapFBO::getLikelihoods(const Mat& queryImgDescriptor,
 
         for (size_t i = 0; i < matchIndices.size(); i++) {
             bool Lzq =
-                testImgDescriptors[matchIndices[i]].at<float>(0,wordIter->q) > 0;
+                testImageDescriptors[matchIndices[i]].at<float>(0,wordIter->q) > 0;
             queryMatches[matchIndices[i]].likelihood +=
                 log((this->*PzGL)(wordIter->q,zq,zpq,Lzq));
             currBest =
@@ -689,17 +689,17 @@ void FabMap2::add(const vector<Mat>& queryImgDescriptors) {
 }
 
 void FabMap2::getLikelihoods(const Mat& queryImgDescriptor,
-        const vector<Mat>& testImgDescriptors, vector<IMatch>& matches) {
+        const vector<Mat>& testImageDescriptors, vector<IMatch>& matches) {
 
-    if (&testImgDescriptors== &(this->testImgDescriptors)) {
+    if (&testImageDescriptors == &testImgDescriptors) {
         getIndexLikelihoods(queryImgDescriptor, testDefaults, testInvertedMap,
             matches);
     } else {
         CV_Assert(!(flags & MOTION_MODEL));
         vector<double> defaults;
         std::map<int, vector<int> > invertedMap;
-        for (size_t i = 0; i < testImgDescriptors.size(); i++) {
-            addToIndex(testImgDescriptors[i],defaults,invertedMap);
+        for (size_t i = 0; i < testImageDescriptors.size(); i++) {
+            addToIndex(testImageDescriptors[i],defaults,invertedMap);
         }
         getIndexLikelihoods(queryImgDescriptor, defaults, invertedMap, matches);
     }
index 3dd032f..4ba4e7f 100644 (file)
 
 #if CV_SSE2 || CV_SSE3
 #   if !CV_SSE4_1 && !CV_SSE4_2
-#              define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
-#       define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m)) 
+#       define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m))
+#       define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m))
 #   endif
 #endif
 
-#      if  CV_AVX
-#              define CV_HAAR_USE_AVX 1
-#      else 
-#              if  CV_SSE2 || CV_SSE3 
-#                      define CV_HAAR_USE_SSE 1
-#              endif
-#      endif
+#   if  CV_AVX
+#       define CV_HAAR_USE_AVX 1
+#   else
+#       if  CV_SSE2 || CV_SSE3
+#           define CV_HAAR_USE_SSE 1
+#       endif
+#   endif
 
 /* these settings affect the quality of detection: change with care */
 #define CV_ADJUST_FEATURES 1
@@ -634,86 +634,86 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
 
 
 //AVX version icvEvalHidHaarClassifier.  Process 8 CvHidHaarClassifiers per call. Check AVX support before invocation!!
-#ifdef CV_HAAR_USE_AVX 
+#ifdef CV_HAAR_USE_AVX
 CV_INLINE
 double icvEvalHidHaarClassifierAVX( CvHidHaarClassifier* classifier,
-                                                                       double variance_norm_factor, size_t p_offset )
+                                    double variance_norm_factor, size_t p_offset )
 {
-       int  CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0};
-       char flags[8] = {0,0,0,0,0,0,0,0};
-       CvHidHaarTreeNode* nodes[8];
-       double res = 0; 
-       char exitConditionFlag = 0;
-       for(;;)
-       {
-               float  CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
-               nodes[0] = classifier    ->node + idxV[0];
-               nodes[1] = (classifier+1)->node + idxV[1];
-               nodes[2] = (classifier+2)->node + idxV[2];
-               nodes[3] = (classifier+3)->node + idxV[3];
-               nodes[4] = (classifier+4)->node + idxV[4];
-               nodes[5] = (classifier+5)->node + idxV[5];
-               nodes[6] = (classifier+6)->node + idxV[6];
-               nodes[7] = (classifier+7)->node + idxV[7];
-
-               __m256 t = _mm256_set1_ps(variance_norm_factor);
-               t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
-
-               __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
-                               calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
-                               p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); 
-               __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, 
-                               nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); 
-               __m256 sum = _mm256_mul_ps(offset, weight);
-                       
-               offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
-                               calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
-                               calc_sum(nodes[0]->feature.rect[1],p_offset)); 
-               weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, 
-                               nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); 
-       
-               sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
-                                       
-               if( nodes[0]->feature.rect[2].p0 )
-                       tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
-               if( nodes[1]->feature.rect[2].p0 )
+    int  CV_DECL_ALIGNED(32) idxV[8] = {0,0,0,0,0,0,0,0};
+    char flags[8] = {0,0,0,0,0,0,0,0};
+    CvHidHaarTreeNode* nodes[8];
+    double res = 0;
+    char exitConditionFlag = 0;
+    for(;;)
+    {
+        float  CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
+        nodes[0] = classifier    ->node + idxV[0];
+        nodes[1] = (classifier+1)->node + idxV[1];
+        nodes[2] = (classifier+2)->node + idxV[2];
+        nodes[3] = (classifier+3)->node + idxV[3];
+        nodes[4] = (classifier+4)->node + idxV[4];
+        nodes[5] = (classifier+5)->node + idxV[5];
+        nodes[6] = (classifier+6)->node + idxV[6];
+        nodes[7] = (classifier+7)->node + idxV[7];
+
+        __m256 t = _mm256_set1_ps(variance_norm_factor);
+        t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
+
+        __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
+                calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
+                p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
+        __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
+                nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
+        __m256 sum = _mm256_mul_ps(offset, weight);
+
+        offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
+                calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
+                calc_sum(nodes[0]->feature.rect[1],p_offset));
+        weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
+                nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
+
+        sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
+
+        if( nodes[0]->feature.rect[2].p0 )
+            tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
+        if( nodes[1]->feature.rect[2].p0 )
             tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
-               if( nodes[2]->feature.rect[2].p0 )
+        if( nodes[2]->feature.rect[2].p0 )
             tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
-               if( nodes[3]->feature.rect[2].p0 )
+        if( nodes[3]->feature.rect[2].p0 )
             tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
-               if( nodes[4]->feature.rect[2].p0 )
-                       tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
-               if( nodes[5]->feature.rect[2].p0 )
+        if( nodes[4]->feature.rect[2].p0 )
+            tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
+        if( nodes[5]->feature.rect[2].p0 )
             tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
-               if( nodes[6]->feature.rect[2].p0 )
+        if( nodes[6]->feature.rect[2].p0 )
             tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
-               if( nodes[7]->feature.rect[2].p0 )
+        if( nodes[7]->feature.rect[2].p0 )
             tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
-               
-               sum = _mm256_add_ps(sum,_mm256_load_ps(tmp));
-
-               __m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left);
-               __m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right);
-
-               _mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ ))));
-
-               for(int i = 0; i < 8; i++)
-               {
-                       if(idxV[i]<=0)
-                       {
-                               if(!flags[i])
-                               {
-                                       exitConditionFlag++;
-                                       flags[i]=1;
-                                       res+=((classifier+i)->alpha[-idxV[i]]);
-                               }
-                               idxV[i]=0;
-                       }
-               }
-               if(exitConditionFlag==8)
-                       return res;
-       }
+
+        sum = _mm256_add_ps(sum,_mm256_load_ps(tmp));
+
+        __m256 left = _mm256_set_ps(nodes[7]->left,nodes[6]->left,nodes[5]->left,nodes[4]->left,nodes[3]->left,nodes[2]->left,nodes[1]->left,nodes[0]->left);
+        __m256 right = _mm256_set_ps(nodes[7]->right,nodes[6]->right,nodes[5]->right,nodes[4]->right,nodes[3]->right,nodes[2]->right,nodes[1]->right,nodes[0]->right);
+
+        _mm256_store_si256((__m256i*)idxV,_mm256_cvttps_epi32(_mm256_blendv_ps(right, left,_mm256_cmp_ps(sum, t, _CMP_LT_OQ ))));
+
+        for(int i = 0; i < 8; i++)
+        {
+            if(idxV[i]<=0)
+            {
+                if(!flags[i])
+                {
+                    exitConditionFlag++;
+                    flags[i]=1;
+                    res+=((classifier+i)->alpha[-idxV[i]]);
+                }
+                idxV[i]=0;
+            }
+        }
+        if(exitConditionFlag==8)
+            return res;
+    }
 }
 #endif
 
@@ -723,50 +723,50 @@ double icvEvalHidHaarClassifier( CvHidHaarClassifier* classifier,
                                  size_t p_offset )
 {
     int idx = 0;
-       /*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX 
-               if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
-               {
-                       double CV_DECL_ALIGNED(16) temp[2];             
-                       __m128d zero = _mm_setzero_pd();
-                       do
-                       {
-                               CvHidHaarTreeNode* node = classifier->node + idx;
-                               __m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
-                               __m128d left = _mm_set1_pd(node->left);
-                               __m128d right = _mm_set1_pd(node->right);
-
-                               double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                               _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                               if( node->feature.rect[2].p0 )
-                                       _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-
-                               __m128d sum = _mm_set1_pd(_sum);
-                               t = _mm_cmplt_sd(sum, t);
-                               sum = _mm_blendv_pd(right, left, t);
-
-                               _mm_store_pd(temp, sum);
-                               idx = (int)temp[0];             
-                       }
-                       while(idx > 0 );
-                               
-               }
-               else
-       #endif*/
+    /*#if CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX
+        if(cv::checkHardwareSupport(CV_CPU_SSE2))//based on old SSE variant. Works slow
+        {
+            double CV_DECL_ALIGNED(16) temp[2];
+            __m128d zero = _mm_setzero_pd();
+            do
+            {
+                CvHidHaarTreeNode* node = classifier->node + idx;
+                __m128d t = _mm_set1_pd((node->threshold)*variance_norm_factor);
+                __m128d left = _mm_set1_pd(node->left);
+                __m128d right = _mm_set1_pd(node->right);
+
+                double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                if( node->feature.rect[2].p0 )
+                    _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
+
+                __m128d sum = _mm_set1_pd(_sum);
+                t = _mm_cmplt_sd(sum, t);
+                sum = _mm_blendv_pd(right, left, t);
+
+                _mm_store_pd(temp, sum);
+                idx = (int)temp[0];
+            }
+            while(idx > 0 );
+
+        }
+        else
+    #endif*/
     {
-               do
-               {
+        do
+        {
             CvHidHaarTreeNode* node = classifier->node + idx;
             double t = node->threshold * variance_norm_factor;
 
-                       double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                       sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+            double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+            sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
 
-                       if( node->feature.rect[2].p0 )
-                               sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
+            if( node->feature.rect[2].p0 )
+                sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
 
-                       idx = sum < t ? node->left : node->right;
-               }
-               while( idx > 0 );
+            idx = sum < t ? node->left : node->right;
+        }
+        while( idx > 0 );
     }
     return classifier->alpha[-idx];
 }
@@ -777,18 +777,18 @@ static int
 cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
                                CvPoint pt, double& stage_sum, int start_stage )
 {
-       #ifdef CV_HAAR_USE_AVX 
-                       bool haveAVX = false;
-                       if(cv::checkHardwareSupport(CV_CPU_AVX))
-                               if(_xgetbv(_XCR_XFEATURE_ENABLED_MASK)&0x6)// Check if the OS will save the YMM registers
-                               {
-                                       haveAVX = true;
-                               }                                       
-       #else 
-               #ifdef CV_HAAR_USE_SSE 
-                       bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
-               #endif
-       #endif
+    #ifdef CV_HAAR_USE_AVX
+            bool haveAVX = false;
+            if(cv::checkHardwareSupport(CV_CPU_AVX))
+                if(_xgetbv(_XCR_XFEATURE_ENABLED_MASK)&0x6)// Check if the OS will save the YMM registers
+                {
+                    haveAVX = true;
+                }
+    #else
+        #ifdef CV_HAAR_USE_SSE
+            bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
+        #endif
+    #endif
 
     int p_offset, pq_offset;
     int i, j;
@@ -828,17 +828,17 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
         {
             stage_sum = 0.0;
 
-                       #ifdef CV_HAAR_USE_AVX 
-                       if(haveAVX)
-                       {
-                               for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
-                               {
-                                       stage_sum += icvEvalHidHaarClassifierAVX(
-                                               cascade->stage_classifier[i].classifier+j, 
-                                               variance_norm_factor, p_offset );
-                               }
-                       }
-                       #endif
+            #ifdef CV_HAAR_USE_AVX
+            if(haveAVX)
+            {
+                for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
+                {
+                    stage_sum += icvEvalHidHaarClassifierAVX(
+                        cascade->stage_classifier[i].classifier+j,
+                        variance_norm_factor, p_offset );
+                }
+            }
+            #endif
             for( j = 0; j < ptr->count; j++ )
             {
                 stage_sum += icvEvalHidHaarClassifier( ptr->classifier + j, variance_norm_factor, p_offset );
@@ -859,283 +859,283 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade,
     }
     else if( cascade->isStumpBased )
     {
-       #ifdef CV_HAAR_USE_AVX 
-                       if(haveAVX)
-                       {
-                               CvHidHaarClassifier* classifiers[8];
-                               CvHidHaarTreeNode* nodes[8];
-                               for( i = start_stage; i < cascade->count; i++ )
-                               {
-                                       stage_sum = 0.0;
-                                       int j = 0;
-                                       float  CV_DECL_ALIGNED(32) buf[8];
-                                       if( cascade->stage_classifier[i].two_rects )
-                                       {
-                                               for( ; j <= cascade->stage_classifier[i].count-8; j+=8 )
-                                               {
-                                                       //__m256 stage_sumPart = _mm256_setzero_ps(); 
-                                                       classifiers[0] = cascade->stage_classifier[i].classifier + j;
-                                                       nodes[0] = classifiers[0]->node;
-                                                       classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
-                                                       nodes[1] = classifiers[1]->node;
-                                                       classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
-                                                       nodes[2]= classifiers[2]->node;
-                                                       classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
-                                                       nodes[3] = classifiers[3]->node;
-                                                       classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
-                                                       nodes[4] = classifiers[4]->node;
-                                                       classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
-                                                       nodes[5] = classifiers[5]->node;
-                                                       classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
-                                                       nodes[6] = classifiers[6]->node;
-                                                       classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
-                                                       nodes[7] = classifiers[7]->node;
-
-                                                       __m256 t = _mm256_set1_ps(variance_norm_factor);
-                                                       t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
-
-                                                       __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
-                                                               calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
-                                                               p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); 
-                                                       __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, 
-                                                               nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); 
-                                                       __m256 sum = _mm256_mul_ps(offset, weight);
-
-                                                       offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
-                                                               calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
-                                                               calc_sum(nodes[0]->feature.rect[1],p_offset)); 
-                                                       weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, 
-                                                               nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); 
-                                                       sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
-                                       
-                                                       __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
-                                                               classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
-                                                       __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
-                                                               classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
-
-                                                       _mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )));
-                                                       stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
-                       
-                                               }
-                       
-                                               for( ; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-
-                                                       double t = node->threshold*variance_norm_factor;
-                                                       double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                                                       sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                                                       stage_sum += classifier->alpha[sum >= t];
-                                               }
-                                       }
-                                       else
-                                       {
-                                               for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 )
-                                               {
-                                                       float  CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
-
-                                                       classifiers[0] = cascade->stage_classifier[i].classifier + j;
-                                                       nodes[0] = classifiers[0]->node;
-                                                       classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
-                                                       nodes[1] = classifiers[1]->node;
-                                                       classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
-                                                       nodes[2]= classifiers[2]->node;
-                                                       classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
-                                                       nodes[3] = classifiers[3]->node;
-                                                       classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
-                                                       nodes[4] = classifiers[4]->node;
-                                                       classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
-                                                       nodes[5] = classifiers[5]->node;
-                                                       classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
-                                                       nodes[6] = classifiers[6]->node;
-                                                       classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
-                                                       nodes[7] = classifiers[7]->node;
-
-                                                       __m256 t = _mm256_set1_ps(variance_norm_factor);
-                                                       t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
-                                                       
-                                                       __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
-                                                               calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
-                                                               p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset)); 
-                                                       __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight, 
-                                                               nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight); 
-                                                       __m256 sum = _mm256_mul_ps(offset, weight);
-
-                                                       offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
-                                                               calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
-                                                               calc_sum(nodes[0]->feature.rect[1],p_offset)); 
-                                                       weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight, 
-                                                               nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight); 
-                               
-                                                       sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
-
-                                                       if( nodes[0]->feature.rect[2].p0 )
-                                                               tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
-                                                       if( nodes[1]->feature.rect[2].p0 )
-                                                               tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
-                                                       if( nodes[2]->feature.rect[2].p0 )
-                                                               tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
-                                                       if( nodes[3]->feature.rect[2].p0 )
-                                                               tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
-                                                       if( nodes[4]->feature.rect[2].p0 )
-                                                               tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
-                                                       if( nodes[5]->feature.rect[2].p0 )
-                                                               tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
-                                                       if( nodes[6]->feature.rect[2].p0 )
-                                                               tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
-                                                       if( nodes[7]->feature.rect[2].p0 )
-                                                               tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
-                               
-                                                       sum = _mm256_add_ps(sum, _mm256_load_ps(tmp));
-
-                                                       __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
-                                                               classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
-                                                       __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
-                                                               classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
-
-                                                       __m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ));
-                                                       outBuf = _mm256_hadd_ps(outBuf, outBuf);
-                                                       outBuf = _mm256_hadd_ps(outBuf, outBuf);
-                                                       _mm256_store_ps(buf, outBuf);
-                                                       stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]); 
-                                               }
-                               
-                                               for( ; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-
-                                                       double t = node->threshold*variance_norm_factor;
-                                                       double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                                                       sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                                                       if( node->feature.rect[2].p0 )
-                                                               sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-                                                       stage_sum += classifier->alpha[sum >= t];
-                                               }
-                                       }
-                                       if( stage_sum < cascade->stage_classifier[i].threshold )
-                                               return -i;
-                               }
-                       }
-                       else
-       #endif
-       #ifdef  CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
-                       if(haveSSE2) 
-                       {
-                               for( i = start_stage; i < cascade->count; i++ )
-                               {
-                                       __m128d stage_sum = _mm_setzero_pd();
-                                       if( cascade->stage_classifier[i].two_rects )
-                                       {
-                                               for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-
-                                                       // ayasin - NHM perf optim. Avoid use of costly flaky jcc
-                                                       __m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
-                                                       __m128d a = _mm_set_sd(classifier->alpha[0]);
-                                                       __m128d b = _mm_set_sd(classifier->alpha[1]);
-                                                       __m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight +
-                                                                                                               calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight);
-                                                       t = _mm_cmpgt_sd(t, sum);
-                                                       stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
-                                               }
-                                       }
-                                       else
-                                       {
-                                               for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-                                                       // ayasin - NHM perf optim. Avoid use of costly flaky jcc
-                                                       __m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
-                                                       __m128d a = _mm_set_sd(classifier->alpha[0]);
-                                                       __m128d b = _mm_set_sd(classifier->alpha[1]);
-                                                       double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                                                       _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                                                       if( node->feature.rect[2].p0 )
-                                                               _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-                                                       __m128d sum = _mm_set_sd(_sum);
-
-                                                       t = _mm_cmpgt_sd(t, sum);
-                                                       stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
-                                               }
-                                       }
-                                       __m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold);
-                                       if( _mm_comilt_sd(stage_sum, i_threshold) )
-                                               return -i;
-                               }
-                       }
-                       else 
-       #endif
-                       {
-                               for( i = start_stage; i < cascade->count; i++ )
-                               {
-                                       stage_sum = 0.0;
-                                       if( cascade->stage_classifier[i].two_rects )
-                                       {
-                                               for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-                                                       double t = node->threshold*variance_norm_factor;
-                                                       double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                                                       sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                                                       stage_sum += classifier->alpha[sum >= t];
-                                               }
-                                       }
-                                       else
-                                       {
-                                               for( j = 0; j < cascade->stage_classifier[i].count; j++ )
-                                               {
-                                                       CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
-                                                       CvHidHaarTreeNode* node = classifier->node;
-                                                       double t = node->threshold*variance_norm_factor;
-                                                       double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
-                                                       sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
-                                                       if( node->feature.rect[2].p0 )
-                                                               sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
-                                                       stage_sum += classifier->alpha[sum >= t];
-                                               }
-                                       }
-                                       if( stage_sum < cascade->stage_classifier[i].threshold )
-                                               return -i;
-                               }
-                       }
-       }
-    
-       else
+    #ifdef CV_HAAR_USE_AVX
+            if(haveAVX)
+            {
+                CvHidHaarClassifier* classifiers[8];
+                CvHidHaarTreeNode* nodes[8];
+                for( i = start_stage; i < cascade->count; i++ )
+                {
+                    stage_sum = 0.0;
+                    int j = 0;
+                    float  CV_DECL_ALIGNED(32) buf[8];
+                    if( cascade->stage_classifier[i].two_rects )
+                    {
+                        for( ; j <= cascade->stage_classifier[i].count-8; j+=8 )
+                        {
+                            //__m256 stage_sumPart = _mm256_setzero_ps();
+                            classifiers[0] = cascade->stage_classifier[i].classifier + j;
+                            nodes[0] = classifiers[0]->node;
+                            classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
+                            nodes[1] = classifiers[1]->node;
+                            classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
+                            nodes[2]= classifiers[2]->node;
+                            classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
+                            nodes[3] = classifiers[3]->node;
+                            classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
+                            nodes[4] = classifiers[4]->node;
+                            classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
+                            nodes[5] = classifiers[5]->node;
+                            classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
+                            nodes[6] = classifiers[6]->node;
+                            classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
+                            nodes[7] = classifiers[7]->node;
+
+                            __m256 t = _mm256_set1_ps(variance_norm_factor);
+                            t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
+
+                            __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
+                                calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
+                                p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
+                            __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
+                                nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
+                            __m256 sum = _mm256_mul_ps(offset, weight);
+
+                            offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
+                                calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
+                                calc_sum(nodes[0]->feature.rect[1],p_offset));
+                            weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
+                                nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
+                            sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
+
+                            __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
+                                classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
+                            __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
+                                classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
+
+                            _mm256_store_ps(buf, _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ )));
+                            stage_sum+=(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+
+                        }
+
+                        for( ; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+
+                            double t = node->threshold*variance_norm_factor;
+                            double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                            sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                            stage_sum += classifier->alpha[sum >= t];
+                        }
+                    }
+                    else
+                    {
+                        for( ; j <= (cascade->stage_classifier[i].count)-8; j+=8 )
+                        {
+                            float  CV_DECL_ALIGNED(32) tmp[8] = {0,0,0,0,0,0,0,0};
+
+                            classifiers[0] = cascade->stage_classifier[i].classifier + j;
+                            nodes[0] = classifiers[0]->node;
+                            classifiers[1] = cascade->stage_classifier[i].classifier + j + 1;
+                            nodes[1] = classifiers[1]->node;
+                            classifiers[2] = cascade->stage_classifier[i].classifier + j + 2;
+                            nodes[2]= classifiers[2]->node;
+                            classifiers[3] = cascade->stage_classifier[i].classifier + j + 3;
+                            nodes[3] = classifiers[3]->node;
+                            classifiers[4] = cascade->stage_classifier[i].classifier + j + 4;
+                            nodes[4] = classifiers[4]->node;
+                            classifiers[5] = cascade->stage_classifier[i].classifier + j + 5;
+                            nodes[5] = classifiers[5]->node;
+                            classifiers[6] = cascade->stage_classifier[i].classifier + j + 6;
+                            nodes[6] = classifiers[6]->node;
+                            classifiers[7] = cascade->stage_classifier[i].classifier + j + 7;
+                            nodes[7] = classifiers[7]->node;
+
+                            __m256 t = _mm256_set1_ps(variance_norm_factor);
+                            t = _mm256_mul_ps(t, _mm256_set_ps(nodes[7]->threshold,nodes[6]->threshold,nodes[5]->threshold,nodes[4]->threshold,nodes[3]->threshold,nodes[2]->threshold,nodes[1]->threshold,nodes[0]->threshold));
+
+                            __m256 offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[0],p_offset), calc_sum(nodes[6]->feature.rect[0],p_offset), calc_sum(nodes[5]->feature.rect[0],p_offset),
+                                calc_sum(nodes[4]->feature.rect[0],p_offset), calc_sum(nodes[3]->feature.rect[0],p_offset), calc_sum(nodes[2]->feature.rect[0],p_offset), calc_sum(nodes[1]->feature.rect[0],
+                                p_offset),calc_sum(nodes[0]->feature.rect[0],p_offset));
+                            __m256 weight = _mm256_set_ps(nodes[7]->feature.rect[0].weight, nodes[6]->feature.rect[0].weight, nodes[5]->feature.rect[0].weight,
+                                nodes[4]->feature.rect[0].weight, nodes[3]->feature.rect[0].weight, nodes[2]->feature.rect[0].weight, nodes[1]->feature.rect[0].weight, nodes[0]->feature.rect[0].weight);
+                            __m256 sum = _mm256_mul_ps(offset, weight);
+
+                            offset = _mm256_set_ps(calc_sum(nodes[7]->feature.rect[1],p_offset),calc_sum(nodes[6]->feature.rect[1],p_offset),calc_sum(nodes[5]->feature.rect[1],p_offset),
+                                calc_sum(nodes[4]->feature.rect[1],p_offset),calc_sum(nodes[3]->feature.rect[1],p_offset),calc_sum(nodes[2]->feature.rect[1],p_offset),calc_sum(nodes[1]->feature.rect[1],p_offset),
+                                calc_sum(nodes[0]->feature.rect[1],p_offset));
+                            weight = _mm256_set_ps(nodes[7]->feature.rect[1].weight, nodes[6]->feature.rect[1].weight, nodes[5]->feature.rect[1].weight, nodes[4]->feature.rect[1].weight,
+                                nodes[3]->feature.rect[1].weight, nodes[2]->feature.rect[1].weight, nodes[1]->feature.rect[1].weight, nodes[0]->feature.rect[1].weight);
+
+                            sum = _mm256_add_ps(sum, _mm256_mul_ps(offset,weight));
+
+                            if( nodes[0]->feature.rect[2].p0 )
+                                tmp[0] = calc_sum(nodes[0]->feature.rect[2],p_offset) * nodes[0]->feature.rect[2].weight;
+                            if( nodes[1]->feature.rect[2].p0 )
+                                tmp[1] = calc_sum(nodes[1]->feature.rect[2],p_offset) * nodes[1]->feature.rect[2].weight;
+                            if( nodes[2]->feature.rect[2].p0 )
+                                tmp[2] = calc_sum(nodes[2]->feature.rect[2],p_offset) * nodes[2]->feature.rect[2].weight;
+                            if( nodes[3]->feature.rect[2].p0 )
+                                tmp[3] = calc_sum(nodes[3]->feature.rect[2],p_offset) * nodes[3]->feature.rect[2].weight;
+                            if( nodes[4]->feature.rect[2].p0 )
+                                tmp[4] = calc_sum(nodes[4]->feature.rect[2],p_offset) * nodes[4]->feature.rect[2].weight;
+                            if( nodes[5]->feature.rect[2].p0 )
+                                tmp[5] = calc_sum(nodes[5]->feature.rect[2],p_offset) * nodes[5]->feature.rect[2].weight;
+                            if( nodes[6]->feature.rect[2].p0 )
+                                tmp[6] = calc_sum(nodes[6]->feature.rect[2],p_offset) * nodes[6]->feature.rect[2].weight;
+                            if( nodes[7]->feature.rect[2].p0 )
+                                tmp[7] = calc_sum(nodes[7]->feature.rect[2],p_offset) * nodes[7]->feature.rect[2].weight;
+
+                            sum = _mm256_add_ps(sum, _mm256_load_ps(tmp));
+
+                            __m256 alpha0 = _mm256_set_ps(classifiers[7]->alpha[0],classifiers[6]->alpha[0],classifiers[5]->alpha[0],classifiers[4]->alpha[0],classifiers[3]->alpha[0],
+                                classifiers[2]->alpha[0],classifiers[1]->alpha[0],classifiers[0]->alpha[0]);
+                            __m256 alpha1 = _mm256_set_ps(classifiers[7]->alpha[1],classifiers[6]->alpha[1],classifiers[5]->alpha[1],classifiers[4]->alpha[1],classifiers[3]->alpha[1],
+                                classifiers[2]->alpha[1],classifiers[1]->alpha[1],classifiers[0]->alpha[1]);
+
+                            __m256 outBuf = _mm256_blendv_ps(alpha0, alpha1, _mm256_cmp_ps(t, sum, _CMP_LE_OQ ));
+                            outBuf = _mm256_hadd_ps(outBuf, outBuf);
+                            outBuf = _mm256_hadd_ps(outBuf, outBuf);
+                            _mm256_store_ps(buf, outBuf);
+                            stage_sum+=(buf[0]+buf[4]);//(buf[0]+buf[1]+buf[2]+buf[3]+buf[4]+buf[5]+buf[6]+buf[7]);
+                        }
+
+                        for( ; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+
+                            double t = node->threshold*variance_norm_factor;
+                            double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                            sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                            if( node->feature.rect[2].p0 )
+                                sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
+                            stage_sum += classifier->alpha[sum >= t];
+                        }
+                    }
+                    if( stage_sum < cascade->stage_classifier[i].threshold )
+                        return -i;
+                }
+            }
+            else
+    #endif
+    #if defined CV_HAAR_USE_SSE && CV_HAAR_USE_SSE && !CV_HAAR_USE_AVX //old SSE optimization
+            if(haveSSE2)
+            {
+                for( i = start_stage; i < cascade->count; i++ )
+                {
+                    __m128d stage_sum = _mm_setzero_pd();
+                    if( cascade->stage_classifier[i].two_rects )
+                    {
+                        for( j = 0; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+
+                            // ayasin - NHM perf optim. Avoid use of costly flaky jcc
+                            __m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
+                            __m128d a = _mm_set_sd(classifier->alpha[0]);
+                            __m128d b = _mm_set_sd(classifier->alpha[1]);
+                            __m128d sum = _mm_set_sd(calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight +
+                                                        calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight);
+                            t = _mm_cmpgt_sd(t, sum);
+                            stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
+                        }
+                    }
+                    else
+                    {
+                        for( j = 0; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+                            // ayasin - NHM perf optim. Avoid use of costly flaky jcc
+                            __m128d t = _mm_set_sd(node->threshold*variance_norm_factor);
+                            __m128d a = _mm_set_sd(classifier->alpha[0]);
+                            __m128d b = _mm_set_sd(classifier->alpha[1]);
+                            double _sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                            _sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                            if( node->feature.rect[2].p0 )
+                                _sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
+                            __m128d sum = _mm_set_sd(_sum);
+
+                            t = _mm_cmpgt_sd(t, sum);
+                            stage_sum = _mm_add_sd(stage_sum, _mm_blendv_pd(b, a, t));
+                        }
+                    }
+                    __m128d i_threshold = _mm_set1_pd(cascade->stage_classifier[i].threshold);
+                    if( _mm_comilt_sd(stage_sum, i_threshold) )
+                        return -i;
+                }
+            }
+            else
+    #endif
+            {
+                for( i = start_stage; i < cascade->count; i++ )
+                {
+                    stage_sum = 0.0;
+                    if( cascade->stage_classifier[i].two_rects )
+                    {
+                        for( j = 0; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+                            double t = node->threshold*variance_norm_factor;
+                            double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                            sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                            stage_sum += classifier->alpha[sum >= t];
+                        }
+                    }
+                    else
+                    {
+                        for( j = 0; j < cascade->stage_classifier[i].count; j++ )
+                        {
+                            CvHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j;
+                            CvHidHaarTreeNode* node = classifier->node;
+                            double t = node->threshold*variance_norm_factor;
+                            double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight;
+                            sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight;
+                            if( node->feature.rect[2].p0 )
+                                sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight;
+                            stage_sum += classifier->alpha[sum >= t];
+                        }
+                    }
+                    if( stage_sum < cascade->stage_classifier[i].threshold )
+                        return -i;
+                }
+            }
+    }
+
+    else
     {
         for( i = start_stage; i < cascade->count; i++ )
         {
             stage_sum = 0.0;
-                       int j = 0;
-                       #ifdef CV_HAAR_USE_AVX 
-                       if(haveAVX)
-                       {
-                               for( ; j < cascade->stage_classifier[i].count-8; j+=8 )
-                               {
-                                       stage_sum += icvEvalHidHaarClassifierAVX(
-                                               cascade->stage_classifier[i].classifier+j, 
-                                               variance_norm_factor, p_offset );
-                               }
-                       }
-                       #endif
-                               for(; j < cascade->stage_classifier[i].count; j++ )
-                               {
-                               
-                                       stage_sum += icvEvalHidHaarClassifier(
-                                               cascade->stage_classifier[i].classifier + j,
-                                               variance_norm_factor, p_offset );
-                               }
-                       
+            int k = 0;
+            #ifdef CV_HAAR_USE_AVX
+            if(haveAVX)
+            {
+                for( ; k < cascade->stage_classifier[i].count-8; k+=8 )
+                {
+                    stage_sum += icvEvalHidHaarClassifierAVX(
+                        cascade->stage_classifier[i].classifier+k,
+                        variance_norm_factor, p_offset );
+                }
+            }
+            #endif
+                for(; k < cascade->stage_classifier[i].count; k++ )
+                {
+
+                    stage_sum += icvEvalHidHaarClassifier(
+                        cascade->stage_classifier[i].classifier + k,
+                        variance_norm_factor, p_offset );
+                }
+
             if( stage_sum < cascade->stage_classifier[i].threshold )
                 return -i;
         }
     }
-       //_mm256_zeroupper();
+    //_mm256_zeroupper();
     return 1;
 }
 
index b489a5e..7ab86ea 100644 (file)
@@ -1,13 +1,13 @@
 /*
 * pca.cpp
 *
-*  Author: 
+*  Author:
 *  Kevin Hughes <kevinhughes27[at]gmail[dot]com>
 *
 *  Special Thanks to:
 *  Philipp Wagner <bytefish[at]gmx[dot]de>
 *
-* This program demonstrates how to use OpenCV PCA with a 
+* This program demonstrates how to use OpenCV PCA with a
 * specified amount of variance to retain. The effect
 * is illustrated further by using a trackbar to
 * change the value for retained varaince.
@@ -17,9 +17,9 @@
 * on this list of images. The author recommends using
 * the first 15 faces of the AT&T face data set:
 * http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
-* 
+*
 * so for example your input text file would look like this:
-* 
+*
 *        <path_to_at&t_faces>/orl_faces/s1/1.pgm
 *        <path_to_at&t_faces>/orl_faces/s2/1.pgm
 *        <path_to_at&t_faces>/orl_faces/s3/1.pgm
@@ -50,7 +50,7 @@ using namespace std;
 
 ///////////////////////
 // Functions
-void read_imgList(const string& filename, vector<Mat>& images) {
+static void read_imgList(const string& filename, vector<Mat>& images) {
     std::ifstream file(filename.c_str(), ifstream::in);
     if (!file) {
         string error_message = "No valid input file was given, please check the given filename.";
@@ -62,19 +62,19 @@ void read_imgList(const string& filename, vector<Mat>& images) {
     }
 }
 
-Mat formatImagesForPCA(const vector<Mat> &data)
+static  Mat formatImagesForPCA(const vector<Mat> &data)
 {
     Mat dst(data.size(), data[0].rows*data[0].cols, CV_32F);
     for(unsigned int i = 0; i < data.size(); i++)
     {
         Mat image_row = data[i].clone().reshape(1,1);
         Mat row_i = dst.row(i);
-        image_row.convertTo(row_i,CV_32F);    
+        image_row.convertTo(row_i,CV_32F);
     }
     return dst;
 }
 
-Mat toGrayscale(InputArray _src) {
+static Mat toGrayscale(InputArray _src) {
     Mat src = _src.getMat();
     // only allow one channel
     if(src.channels() != 1) {
@@ -95,22 +95,22 @@ struct params
     string winName;
 };
 
-void onTrackbar(int pos, void* ptr) 
-{    
+static void onTrackbar(int pos, void* ptr)
+{
     cout << "Retained Variance = " << pos << "%   ";
     cout << "re-calculating PCA..." << std::flush;
-    
+
     double var = pos / 100.0;
-    
+
     struct params *p = (struct params *)ptr;
-    
+
     p->pca = PCA(p->data, cv::Mat(), CV_PCA_DATA_AS_ROW, var);
-    
+
     Mat point = p->pca.project(p->data.row(0));
     Mat reconstruction = p->pca.backProject(point);
     reconstruction = reconstruction.reshape(p->ch, p->rows);
     reconstruction = toGrayscale(reconstruction);
-    
+
     imshow(p->winName, reconstruction);
     cout << "done!   # of principal components: " << p->pca.eigenvectors.rows << endl;
 }
@@ -118,19 +118,19 @@ void onTrackbar(int pos, void* ptr)
 
 ///////////////////////
 // Main
-int main(int argc, char** argv) 
+int main(int argc, char** argv)
 {
     if (argc != 2) {
         cout << "usage: " << argv[0] << " <image_list.txt>" << endl;
         exit(1);
     }
-    
+
     // Get the path to your CSV.
     string imgList = string(argv[1]);
-    
+
     // vector to hold the images
     vector<Mat> images;
-    
+
     // Read in the data. This can fail if not valid
     try {
         read_imgList(imgList, images);
@@ -138,29 +138,29 @@ int main(int argc, char** argv)
         cerr << "Error opening file \"" << imgList << "\". Reason: " << e.msg << endl;
         exit(1);
     }
-    
+
     // Quit if there are not enough images for this demo.
     if(images.size() <= 1) {
         string error_message = "This demo needs at least 2 images to work. Please add more images to your data set!";
         CV_Error(CV_StsError, error_message);
     }
-        
+
     // Reshape and stack images into a rowMatrix
     Mat data = formatImagesForPCA(images);
-    
+
     // perform PCA
     PCA pca(data, cv::Mat(), CV_PCA_DATA_AS_ROW, 0.95); // trackbar is initially set here, also this is a common value for retainedVariance
-    
-    // Demonstration of the effect of retainedVariance on the first image 
+
+    // Demonstration of the effect of retainedVariance on the first image
     Mat point = pca.project(data.row(0)); // project into the eigenspace, thus the image becomes a "point"
     Mat reconstruction = pca.backProject(point); // re-create the image from the "point"
     reconstruction = reconstruction.reshape(images[0].channels(), images[0].rows); // reshape from a row vector into image shape
     reconstruction = toGrayscale(reconstruction); // re-scale for displaying purposes
-    
+
     // init highgui window
     string winName = "Reconstruction | press 'q' to quit";
     namedWindow(winName, CV_WINDOW_NORMAL);
-    
+
     // params struct to pass to the trackbar handler
     params p;
     p.data = data;
@@ -168,17 +168,17 @@ int main(int argc, char** argv)
     p.rows = images[0].rows;
     p.pca = pca;
     p.winName = winName;
-    
+
     // create the tracbar
     int pos = 95;
-    createTrackbar("Retained Variance (%)", winName, &pos, 100, onTrackbar, (void*)&p); 
-    
+    createTrackbar("Retained Variance (%)", winName, &pos, 100, onTrackbar, (void*)&p);
+
     // display until user presses q
     imshow(winName, reconstruction);
-    
+
     char key = 0;
     while(key != 'q')
         key = waitKey();
-   
-   return 0; 
+
+   return 0;
 }