enabled SSE3 by default; integrated SSE3-optimized bilateral filter (by Grigoriy...

author Vadim Pisarevsky <vadim.pisarevsky@itseez.com>

Tue, 21 Aug 2012 13:16:06 +0000 (17:16 +0400)

committer Vadim Pisarevsky <vadim.pisarevsky@itseez.com>

Tue, 21 Aug 2012 13:39:40 +0000 (17:39 +0400)
author Vadim Pisarevsky <vadim.pisarevsky@itseez.com>
Tue, 21 Aug 2012 13:16:06 +0000 (17:16 +0400)
committer Vadim Pisarevsky <vadim.pisarevsky@itseez.com>
Tue, 21 Aug 2012 13:39:40 +0000 (17:39 +0400)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 80cf798..c81cbd9 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,7 +191,7 @@ OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"
  OCV_OPTION(ENABLE_FAST_MATH           "Enable -ffast-math (not recommended for GCC 4.6.x)"       OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  OCV_OPTION(ENABLE_SSE                 "Enable SSE instructions"                                  ON   IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  OCV_OPTION(ENABLE_SSE2                "Enable SSE2 instructions"                                 ON   IF (MSVC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
-OCV_OPTION(ENABLE_SSE3                "Enable SSE3 instructions"                                 OFF  IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
+OCV_OPTION(ENABLE_SSE3                "Enable SSE3 instructions"                                 ON   IF (MSVC OR CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  OCV_OPTION(ENABLE_SSSE3               "Enable SSSE3 instructions"                                OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  OCV_OPTION(ENABLE_SSE41               "Enable SSE4.1 instructions"                               OFF  IF (CV_ICC OR CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
  OCV_OPTION(ENABLE_SSE42               "Enable SSE4.2 instructions"                               OFF  IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp

index 0587685..0fe1b6b 100644 (file)
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1294,28 +1294,64 @@ class BilateralFilter_8u_Invoker :
  public:
      BilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, int _radius, int _maxk,
          int* _space_ofs, float *_space_weight, float *_color_weight) :
-        ParallelLoopBody(), dest(&_dest), temp(&_temp), radius(_radius),
+        temp(&_temp), dest(&_dest), radius(_radius),
          maxk(_maxk), space_ofs(_space_ofs), space_weight(_space_weight), color_weight(_color_weight)
      {
      }
-
+    
      virtual void operator() (const Range& range) const
      {
          int i, j, cn = dest->channels(), k;
          Size size = dest->size();
-
+               #if CV_SSE3
+        int CV_DECL_ALIGNED(16) buf[4];
+        float CV_DECL_ALIGNED(16) bufSum[4];
+        static const int CV_DECL_ALIGNED(16) bufSignMask[] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+        bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
+               #endif
+        
          for( i = range.start; i < range.end; i++ )
          {
              const uchar* sptr = temp->ptr(i+radius) + radius*cn;
              uchar* dptr = dest->ptr(i);
-
+            
              if( cn == 1 )
              {
                  for( j = 0; j < size.width; j++ )
                  {
                      float sum = 0, wsum = 0;
                      int val0 = sptr[j];
-                    for( k = 0; k < maxk; k++ )
+                                       k = 0;
+                                       #if CV_SSE3
+                                       if( haveSSE3 )
+                                       {
+                                               __m128 _val0 = _mm_set1_ps(val0);
+                                               const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+
+                                               for( ; k <= maxk - 4; k += 4 )
+                                               {
+                                                       __m128 _valF = _mm_set_ps(sptr[j + space_ofs[k+3]], sptr[j + space_ofs[k+2]],
+                                                      sptr[j + space_ofs[k+1]], sptr[j + space_ofs[k]]);
+                                                       
+                                                       __m128 _val = _mm_andnot_ps(_signMask, _mm_sub_ps(_valF, _val0));
+                                                       _mm_store_si128((__m128i*)buf, _mm_cvtps_epi32(_val));
+
+                                                       __m128 _cw = _mm_set_ps(color_weight[buf[3]],color_weight[buf[2]],
+                                                    color_weight[buf[1]],color_weight[buf[0]]);
+                                                       __m128 _sw = _mm_loadu_ps(space_weight+k);
+                                                       __m128 _w = _mm_mul_ps(_cw, _sw);
+                                                        _cw = _mm_mul_ps(_w, _valF);
+
+                                                        _sw = _mm_hadd_ps(_w, _cw);
+                                                        _sw = _mm_hadd_ps(_sw, _sw);
+                                                        _mm_storel_pi((__m64*)bufSum, _sw);
+
+                                                        sum += bufSum[1];
+                                                        wsum += bufSum[0];
+                                               }
+                                       }
+                                       #endif
+                    for( ; k < maxk; k++ )
                      {
                          int val = sptr[j + space_ofs[k]];
                          float w = space_weight[k]*color_weight[std::abs(val - val0)];
@@ -1333,7 +1369,57 @@ public:
                  {
                      float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
                      int b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
-                    for( k = 0; k < maxk; k++ )
+                                       k = 0;
+                                       #if CV_SSE3
+                                       if( haveSSE3 )
+                                       {
+                        const __m128 _b0 = _mm_set1_ps(b0);
+                                               const __m128 _g0 = _mm_set1_ps(g0);
+                                               const __m128 _r0 = _mm_set1_ps(r0);
+                                               const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+                        
+                        for( ; k <= maxk - 4; k += 4 )
+                        {
+                                                       const uchar* sptr_k  = sptr + j + space_ofs[k];
+                                                       const uchar* sptr_k1 = sptr + j + space_ofs[k+1];
+                                                       const uchar* sptr_k2 = sptr + j + space_ofs[k+2];
+                                                       const uchar* sptr_k3 = sptr + j + space_ofs[k+3];
+
+                                                       __m128 _b = _mm_set_ps(sptr_k3[0],sptr_k2[0],sptr_k1[0],sptr_k[0]);
+                                                       __m128 _g = _mm_set_ps(sptr_k3[1],sptr_k2[1],sptr_k1[1],sptr_k[1]);
+                                                       __m128 _r = _mm_set_ps(sptr_k3[2],sptr_k2[2],sptr_k1[2],sptr_k[2]);
+
+                                                       __m128 bt = _mm_andnot_ps(_signMask, _mm_sub_ps(_b,_b0));
+                                                       __m128 gt = _mm_andnot_ps(_signMask, _mm_sub_ps(_g,_g0));
+                                                       __m128 rt = _mm_andnot_ps(_signMask, _mm_sub_ps(_r,_r0));
+
+                                                       bt =_mm_add_ps(rt, _mm_add_ps(bt, gt));
+                                                       _mm_store_si128((__m128i*)buf, _mm_cvtps_epi32(bt));
+
+                                                       __m128 _w  = _mm_set_ps(color_weight[buf[3]],color_weight[buf[2]],
+                                                    color_weight[buf[1]],color_weight[buf[0]]);
+                                                       __m128 _sw = _mm_loadu_ps(space_weight+k);
+
+                                                       _w = _mm_mul_ps(_w,_sw);
+                                                       _b = _mm_mul_ps(_b, _w);
+                                                       _g = _mm_mul_ps(_g, _w);
+                                                       _r = _mm_mul_ps(_r, _w);
+
+                                                        _w = _mm_hadd_ps(_w, _b);
+                                                        _g = _mm_hadd_ps(_g, _r);
+
+                                                        _w = _mm_hadd_ps(_w, _g);
+                                                        _mm_store_ps(bufSum, _w);
+
+                                                        wsum  += bufSum[0];
+                                                        sum_b += bufSum[1];
+                                                        sum_g += bufSum[2];
+                                                        sum_r += bufSum[3];
+                                                }
+                                       }
+                                       #endif
+
+                    for( ; k < maxk; k++ )
                      {
                          const uchar* sptr_k = sptr + j + space_ofs[k];
                          int b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
@@ -1351,10 +1437,10 @@ public:
              }
          }
      }
-
+    
  private:
-    Mat *dest;
      const Mat *temp;
+    Mat *dest;
      int radius, maxk, *space_ofs;
      float *space_weight, *color_weight;
  };
@@ -1364,46 +1450,51 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
      double sigma_color, double sigma_space,
      int borderType )
  {
+
      int cn = src.channels();
      int i, j, maxk, radius;
      Size size = src.size();
-
+    
      CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
                src.type() == dst.type() && src.size() == dst.size() &&
                src.data != dst.data );
-
+    
      if( sigma_color <= 0 )
          sigma_color = 1;
      if( sigma_space <= 0 )
          sigma_space = 1;
-
+    
      double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
      double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
-
+    
      if( d <= 0 )
          radius = cvRound(sigma_space*1.5);
      else
          radius = d/2;
      radius = MAX(radius, 1);
      d = radius*2 + 1;
-
+    
      Mat temp;
      copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
-
+    
      vector<float> _color_weight(cn*256);
      vector<float> _space_weight(d*d);
      vector<int> _space_ofs(d*d);
      float* color_weight = &_color_weight[0];
      float* space_weight = &_space_weight[0];
      int* space_ofs = &_space_ofs[0];
-
+    
      // initialize color-related bilateral filter coefficients
+
      for( i = 0; i < 256*cn; i++ )
          color_weight[i] = (float)std::exp(i*i*gauss_color_coeff);
-
+    
      // initialize space-related bilateral filter coefficients
      for( i = -radius, maxk = 0; i <= radius; i++ )
-        for( j = -radius; j <= radius; j++ )
+       {
+               j = -radius;
+
+        for( ;j <= radius; j++ )
          {
              double r = std::sqrt((double)i*i + (double)j*j);
              if( r > radius )
@@ -1411,7 +1502,8 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
              space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff);
              space_ofs[maxk++] = (int)(i*temp.step + j*cn);
          }
-
+       }
+    
      BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight);
      parallel_for_(Range(0, size.height), body);
  }
@@ -1424,7 +1516,7 @@ public:
  
      BilateralFilter_32f_Invoker(int _cn, int _radius, int _maxk, int *_space_ofs,
          const Mat& _temp, Mat& _dest, float _scale_index, float *_space_weight, float *_expLUT) :
-        ParallelLoopBody(), cn(_cn), radius(_radius), maxk(_maxk), space_ofs(_space_ofs),
+        cn(_cn), radius(_radius), maxk(_maxk), space_ofs(_space_ofs),
          temp(&_temp), dest(&_dest), scale_index(_scale_index), space_weight(_space_weight), expLUT(_expLUT)
      {
      }
@@ -1433,6 +1525,12 @@ public:
      {
          int i, j, k;
          Size size = dest->size();
+               #if CV_SSE3
+        int CV_DECL_ALIGNED(16) idxBuf[4];
+        float CV_DECL_ALIGNED(16) bufSum32[4];
+        static const int CV_DECL_ALIGNED(16) bufSignMask[] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+        bool haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
+               #endif
  
          for( i = range.start; i < range.end; i++ )
          {
@@ -1445,7 +1543,44 @@ public:
                  {
                      float sum = 0, wsum = 0;
                      float val0 = sptr[j];
-                    for( k = 0; k < maxk; k++ )
+                                       k = 0;
+                                       #if CV_SSE3
+                                       if( haveSSE3 )
+                                       {
+                                               const __m128 _val0 = _mm_set1_ps(sptr[j]);
+                                               const __m128 _scale_index = _mm_set1_ps(scale_index);
+                                               const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+                                       
+                                               for( ; k <= maxk - 4 ; k += 4 )
+                                               {
+                                                       __m128 _sw    = _mm_loadu_ps(space_weight + k);
+                                                       __m128 _val   = _mm_set_ps(sptr[j + space_ofs[k+3]], sptr[j + space_ofs[k+2]],
+                                                       sptr[j + space_ofs[k+1]], sptr[j + space_ofs[k]]);
+                                                       __m128 _alpha = _mm_mul_ps(_mm_andnot_ps( _signMask, _mm_sub_ps(_val,_val0)), _scale_index);
+
+                                                       __m128i _idx = _mm_cvtps_epi32(_alpha);
+                                                       _mm_store_si128((__m128i*)idxBuf, _idx);
+                                                       _alpha = _mm_sub_ps(_alpha, _mm_cvtepi32_ps(_idx));
+
+                                                       __m128 _explut  = _mm_set_ps(expLUT[idxBuf[3]], expLUT[idxBuf[2]],
+                                                         expLUT[idxBuf[1]], expLUT[idxBuf[0]]);
+                                                       __m128 _explut1 = _mm_set_ps(expLUT[idxBuf[3]+1], expLUT[idxBuf[2]+1],
+                                                         expLUT[idxBuf[1]+1], expLUT[idxBuf[0]+1]);
+                                               
+                                                       __m128 _w = _mm_mul_ps(_sw, _mm_add_ps(_explut, _mm_mul_ps(_alpha, _mm_sub_ps(_explut1, _explut))));
+                                                       _val = _mm_mul_ps(_w, _val);
+
+                                                        _sw = _mm_hadd_ps(_w, _val);
+                                                        _sw = _mm_hadd_ps(_sw, _sw);
+                                                        _mm_storel_pi((__m64*)bufSum32, _sw);
+
+                                                        sum += bufSum32[1];
+                                                        wsum += bufSum32[0];
+                                               }
+                                       }
+                                       #endif
+
+                    for( ; k < maxk; k++ )
                      {
                          float val = sptr[j + space_ofs[k]];
                          float alpha = (float)(std::abs(val - val0)*scale_index);
@@ -1465,7 +1600,64 @@ public:
                  {
                      float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0;
                      float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2];
-                    for( k = 0; k < maxk; k++ )
+                                       k = 0;
+                                       #if  CV_SSE3
+                                       if( haveSSE3 )
+                                       {
+                                               const __m128 _b0 = _mm_set1_ps(b0);
+                                               const __m128 _g0 = _mm_set1_ps(g0);
+                                               const __m128 _r0 = _mm_set1_ps(r0);
+                                               const __m128 _scale_index = _mm_set1_ps(scale_index);
+                        const __m128 _signMask = _mm_load_ps((const float*)bufSignMask);
+                               
+                                               for( ; k <= maxk-4; k += 4 )
+                                               {
+                                                       __m128 _sw = _mm_loadu_ps(space_weight + k);
+
+                                                       const float* sptr_k  = sptr + j + space_ofs[k];
+                                                       const float* sptr_k1 = sptr + j + space_ofs[k+1];
+                                                       const float* sptr_k2 = sptr + j + space_ofs[k+2];
+                                                       const float* sptr_k3 = sptr + j + space_ofs[k+3];
+
+                                                       __m128 _b = _mm_set_ps(sptr_k3[0], sptr_k2[0], sptr_k1[0], sptr_k[0]);
+                                                       __m128 _g = _mm_set_ps(sptr_k3[1], sptr_k2[1], sptr_k1[1], sptr_k[1]);
+                                                       __m128 _r = _mm_set_ps(sptr_k3[2], sptr_k2[2], sptr_k1[2], sptr_k[2]);
+
+                                                       __m128 _bt = _mm_andnot_ps(_signMask,_mm_sub_ps(_b,_b0));
+                                                       __m128 _gt = _mm_andnot_ps(_signMask,_mm_sub_ps(_g,_g0));
+                                                       __m128 _rt = _mm_andnot_ps(_signMask,_mm_sub_ps(_r,_r0));
+
+                                                       __m128 _alpha = _mm_mul_ps(_scale_index, _mm_add_ps(_rt,_mm_add_ps(_bt, _gt)));
+
+                                                       __m128i _idx  = _mm_cvtps_epi32(_alpha);
+                                                       _mm_store_si128((__m128i*)idxBuf, _idx);
+                                                       _alpha = _mm_sub_ps(_alpha, _mm_cvtepi32_ps(_idx));
+
+                                                       __m128 _explut  = _mm_set_ps(expLUT[idxBuf[3]], expLUT[idxBuf[2]], expLUT[idxBuf[1]], expLUT[idxBuf[0]]);
+                                                       __m128 _explut1 = _mm_set_ps(expLUT[idxBuf[3]+1], expLUT[idxBuf[2]+1], expLUT[idxBuf[1]+1], expLUT[idxBuf[0]+1]);
+                                                       
+                                                       __m128 _w = _mm_mul_ps(_sw, _mm_add_ps(_explut, _mm_mul_ps(_alpha, _mm_sub_ps(_explut1, _explut))));
+
+                                                       _b = _mm_mul_ps(_b, _w);
+                                                       _g = _mm_mul_ps(_g, _w);
+                                                       _r = _mm_mul_ps(_r, _w);
+
+                                                        _w = _mm_hadd_ps(_w, _b);
+                                                        _g = _mm_hadd_ps(_g, _r);
+
+                                                        _w = _mm_hadd_ps(_w, _g);
+                                                        _mm_store_ps(bufSum32, _w);
+
+                                                        wsum  += bufSum32[0];
+                                                        sum_b += bufSum32[1];
+                                                        sum_g += bufSum32[2];
+                                                        sum_r += bufSum32[3];
+                                               }
+
+                                       }
+                                       #endif
+                                       
+                    for(; k < maxk; k++ )
                      {
                          const float* sptr_k = sptr + j + space_ofs[k];
                          float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2];
@@ -1493,6 +1685,7 @@ private:
      Mat *dest;
      float scale_index, *space_weight, *expLUT;
  };
+    
  
  static void
  bilateralFilter_32f( const Mat& src, Mat& dst, int d,
@@ -1569,7 +1762,7 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
      }
  
      // initialize space-related bilateral filter coefficients
-    for( i = -radius, maxk = 0; i <= radius; i++ )
+    for( i = -radius, maxk = 0; i <= radius; i++ ) 
          for( j = -radius; j <= radius; j++ )
          {
              double r = std::sqrt((double)i*i + (double)j*j);
diff --git a/modules/photo/include/opencv2/photo/denoising.hpp b/modules/photo/include/opencv2/photo/denoising.hpp

index b322c31..1fc35a6 100644 (file)
--- a/modules/photo/include/opencv2/photo/denoising.hpp
+++ b/modules/photo/include/opencv2/photo/denoising.hpp
@@ -55,23 +55,23 @@
  namespace cv
  {
  
-CV_EXPORTS void fastNlMeansDenoising( const Mat& src, Mat& dst,
-                                      int templateWindowSize, int searchWindowSize, int h);
+CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
+                                        int templateWindowSize, int searchWindowSize, int h);
  
-CV_EXPORTS void fastNlMeansDenoisingColored( const Mat& src, Mat& dst,
-                                             int templateWindowSize, int searchWindowSize, 
-                                             int h, int hForColorComponents);
+CV_EXPORTS_W void fastNlMeansDenoisingColored( InputArray src, OutputArray dst,
+                                               int templateWindowSize, int searchWindowSize, 
+                                               int h, int hForColorComponents);
  
-CV_EXPORTS void fastNlMeansDenoisingMulti( const std::vector<Mat>& srcImgs, 
-                                           int imgToDenoiseIndex, int temporalWindowSize,
-                                           Mat& dst,
-                                           int templateWindowSize, int searchWindowSize, int h);
+CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, 
+                                             int imgToDenoiseIndex, int temporalWindowSize,
+                                             OutputArray dst,
+                                             int templateWindowSize, int searchWindowSize, int h);
  
-CV_EXPORTS void fastNlMeansDenoisingColoredMulti( const std::vector<Mat>& srcImgs, 
-                                                  int imgToDenoiseIndex, int temporalWindowSize,
-                                                  Mat& dst,
-                                                  int templateWindowSize, int searchWindowSize, 
-                                                  int h, int hForColorComponents);
+CV_EXPORTS_W void fastNlMeansDenoisingColoredMulti( InputArrayOfArrays srcImgs, 
+                                                    int imgToDenoiseIndex, int temporalWindowSize,
+                                                    OutputArray dst,
+                                                    int templateWindowSize, int searchWindowSize, 
+                                                    int h, int hForColorComponents);
  
  }
  #endif
diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp

index b980b40..7452d0d 100644 (file)
--- a/modules/photo/src/denoising.cpp
+++ b/modules/photo/src/denoising.cpp
@@ -45,9 +45,13 @@
  #include "fast_nlmeans_denoising_invoker.hpp"
  #include "fast_nlmeans_multi_denoising_invoker.hpp"
  
-void cv::fastNlMeansDenoising( const cv::Mat& src, cv::Mat& dst,
+void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst,
                                 int templateWindowSize, int searchWindowSize, int h)
-{  
+{
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+    
      switch (src.type()) {
          case CV_8U:
              parallel_for(cv::BlockedRange(0, src.rows), 
@@ -70,10 +74,14 @@ void cv::fastNlMeansDenoising( const cv::Mat& src, cv::Mat& dst,
      }
  }
  
-void cv::fastNlMeansDenoisingColored( const cv::Mat& src, cv::Mat& dst,
+void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
                                        int templateWindowSize, int searchWindowSize, 
                                        int h, int hForColorComponents)
  {
+    Mat src = _src.getMat();
+    _dst.create(src.size(), src.type());
+    Mat dst = _dst.getMat();
+    
      if (src.type() != CV_8UC3) {
          CV_Error(CV_StsBadArg, "Type of input image should be CV_8UC3!");
          return;
@@ -130,15 +138,20 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
      }
  }
  
-void cv::fastNlMeansDenoisingMulti( const std::vector<Mat>& srcImgs, 
+void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, 
                                      int imgToDenoiseIndex, int temporalWindowSize,
-                                    cv::Mat& dst,
+                                    OutputArray _dst,
                                      int templateWindowSize, int searchWindowSize, int h)
-{ 
+{
+    vector<Mat> srcImgs;
+    _srcImgs.getMatVector(srcImgs);
+    
      fastNlMeansDenoisingMultiCheckPreconditions(
          srcImgs, imgToDenoiseIndex, 
          temporalWindowSize, templateWindowSize, searchWindowSize
      );
+    _dst.create(srcImgs[0].size(), srcImgs[0].type());
+    Mat dst = _dst.getMat();
  
      switch (srcImgs[0].type()) {
          case CV_8U:
@@ -165,16 +178,22 @@ void cv::fastNlMeansDenoisingMulti( const std::vector<Mat>& srcImgs,
      }
  }
  
-void cv::fastNlMeansDenoisingColoredMulti( const std::vector<Mat>& srcImgs, 
+void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, 
                                             int imgToDenoiseIndex, int temporalWindowSize,
-                                           cv::Mat& dst,
+                                           OutputArray _dst,
                                             int templateWindowSize, int searchWindowSize, 
                                             int h, int hForColorComponents)
  {
+    vector<Mat> srcImgs;
+    _srcImgs.getMatVector(srcImgs);
+    
      fastNlMeansDenoisingMultiCheckPreconditions(
          srcImgs, imgToDenoiseIndex, 
          temporalWindowSize, templateWindowSize, searchWindowSize
      );
+    
+    _dst.create(srcImgs[0].size(), srcImgs[0].type());
+    Mat dst = _dst.getMat();
  
      int src_imgs_size = (int)srcImgs.size();
  
diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp

index 02185d1..602007e 100644 (file)
--- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
+++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp
@@ -270,9 +270,9 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range)
                  estimation[channel_num] = 0;
              }
              for (int d = 0; d < temporal_window_size_; d++) {
+                const Mat& esrc_d = extended_srcs_[d];
                  for (int y = 0; y < search_window_size_; y++) {
-                    const T* cur_row_ptr = 
-                        extended_srcs_[d].ptr<T>(border_size_ + search_window_y + y);
+                    const T* cur_row_ptr = esrc_d.ptr<T>(border_size_ + search_window_y + y);
  
                      int* dist_sums_row = dist_sums.row_ptr(d, y);
  
@@ -298,7 +298,8 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range)
                  dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
  
              } else { // weights_sum == 0
-                dst_.at<T>(i,j) = extended_srcs_[temporal_window_half_size_].at<T>(i,j);
+                const Mat& esrc = extended_srcs_[temporal_window_half_size_];
+                dst_.at<T>(i,j) = esrc.at<T>(i,j);
              }
          }
      }
author	Vadim Pisarevsky <vadim.pisarevsky@itseez.com>
	Tue, 21 Aug 2012 13:16:06 +0000 (17:16 +0400)
committer	Vadim Pisarevsky <vadim.pisarevsky@itseez.com>
	Tue, 21 Aug 2012 13:39:40 +0000 (17:39 +0400)
CMakeLists.txt		patch \| blob \| history
modules/imgproc/src/smooth.cpp		patch \| blob \| history
modules/photo/include/opencv2/photo/denoising.hpp		patch \| blob \| history
modules/photo/src/denoising.cpp		patch \| blob \| history
modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp		patch \| blob \| history